When the correlation between the two securities temporarily weakens, i.e. one stock moves up while the other moves down, the pairs trade would be to short the outperforming stock and to long the underperforming one, betting that the “spread” between the two would eventually converge. The divergence within a pair can be caused by temporary supply/demand changes, large buy/sell orders for one security, reaction for important news about one of the companies, and so on.

# Import the necessary modules
% matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from sklearn import linear_model

# Set the parameters
start_date = '2013-01-01'
end_date = '2017-10-02'
entry_condition_long = - 1
entry_condition_short = 1
abs_condition_exit = 0.5

# Read the price data
# Read GLD prices
gld_df = pd . read_csv ( 'GLD.csv' , index_col = 'Date' )
gld_df . index = pd . to_datetime ( gld_df . index , format = '%Y-%m-%d' )
gld_df = gld_df [ start_date : end_date ]
# Read GDX prices
gdx_df = pd . read_csv ( 'GDX.csv' , index_col = 'Date' )
gdx_df . index = pd . to_datetime ( gdx_df . index , format = '%m/%d/%Y' )
gdx_df = gdx_df [ start_date : end_date ]
calendar_dates = pd . date_range ( start = start_date , end = end_date , freq = 'D' , tz = None )
gld_df = gld_df . reindex ( calendar_dates )
gdx_df = gdx_df . reindex ( calendar_dates )

# Plot the GLD prices
plt . figure ( figsize = ( 14 , 4 ))
plt . plot ( gld_df [ 'Close' ])
plt . title ( "GLD Prices" )
plt . ylabel ( "Price" )
plt . xlabel ( "Date" );

# Plot the GDX prices
plt . figure ( figsize = ( 14 , 4 ))
plt . plot ( gdx_df [ 'Close' ])
plt . title ( "GDX Prices" )
plt . ylabel ( "Price" )
plt . xlabel ( "Date" );

# Run a LinearRegression model to figure out the beta such that:
# GLD = beta * GDX
# We can use Ordinary Least Squares to estimate the values of the coefficients.
# See: http://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html
gld_cl = gld_df [ 'Close' ]
gdx_cl = gdx_df [ 'Close' ]
gld_cl = gld_cl . dropna ()
gdx_cl = gdx_cl . dropna ()
# Find the date/index intersection of the two data sets
gld_gdx_index_intersection = gld_cl . index . intersection ( gdx_cl . index )
gld_cl_y = gld_cl [ gld_gdx_index_intersection ]
gdx_cl_x = gdx_cl [ gld_gdx_index_intersection ]
gld_cl = gld_cl_y
gdx_cl = gdx_cl_x
train_size = len ( gdx_cl ) // 2
# Split the data into training/testing sets
gld_cl_y_train = gld_cl_y [: - train_size ]
gld_cl_y_test = gld_cl_y [ - train_size :]
dates_test = gld_cl_y_test . index
dates_train = gld_cl_y_train . index
# Split the targets into training/testing sets
gdx_cl_x_train = gdx_cl_x [: - train_size ]
gdx_cl_x_test = gdx_cl_x [ - train_size :]
gld_cl_y_test = gld_cl_y_test . values . reshape ( - 1 , 1 )
gdx_cl_x_test = gdx_cl_x_test . values . reshape ( - 1 , 1 )
gld_cl_y_train = gld_cl_y_train . values . reshape ( - 1 , 1 )
gdx_cl_x_train = gdx_cl_x_train . values . reshape ( - 1 , 1 )
regr = linear_model . LinearRegression ()
# Train the model using the training sets
regr . fit ( gdx_cl_x_train , gld_cl_y_train )
# Make predictions using the testing set
gld_cl_y_pred = regr . predict ( gdx_cl_x_test )
# The coefficients
beta = regr . coef_ [ 0 ][ 0 ]
# Plot outputs
plt . figure ( figsize = ( 14 , 4 ))
plt . scatter ( gdx_cl_x_test , gld_cl_y_test , color = 'black' )
plt . plot ( gdx_cl_x_test , gld_cl_y_pred , color = 'blue' , linewidth = 3 )
plt . ylabel ( "GLD" );
plt . xlabel ( "GDX" )
plt . show ()

# Plot the GLD and GDX (adjusted using beta) prices
plt . figure ( figsize = ( 14 , 4 ))
plt . plot ( gld_df [ 'Close' ], 'y' )
plt . plot ( gdx_df [ 'Close' ] * beta , 'c' )
plt . legend ([ 'GLD' , 'GDX' ])
plt . show ()

# Calculate estimated spread:
# spread = GLD - beta * GDX
spread = gld_cl - beta * gdx_cl
plt . figure ( figsize = ( 14 , 4 ))
plt . title ( "Spread (Spread = GLD - beta * GDX)" )
plt . plot ( spread , color = 'blue' , linewidth = 1 )

# Calculate and plot the spread's z-score:
# spread_zscore = (spread - spread_mean)/spread_std
spread_mean = spread . mean ()
spread_std = spread . std ()
spread_zscore = ( spread - spread_mean ) / spread_std
plt . figure ( figsize = ( 14 , 4 ))
plt . plot ( spread_zscore , color = 'blue' , linewidth = 1 )
plt . axhline ( spread_zscore . mean (), color = 'black' )
plt . axhline ( entry_condition_short , color = 'green' , linestyle = '--' )
plt . axhline ( entry_condition_long , color = 'red' , linestyle = '--' )
plt . legend ([ 'Spread z-score' , 'Spread z-score Mean' , ( '%s' % entry_condition_short ), ( '%s' % entry_condition_long )])
plt . title ( "Spread z-score" )

# Derive trading signals based on the spread's z-score
# Buy spread when its value drops below entry_condition_long standard deviations
longs = spread_zscore & lt ; = entry_condition_long
# Short spread when its value rises above entry_condition_short standard deviations
shorts = spread_zscore & gt ; = entry_condition_short
# Exit any spread position when its value approaches abs_condition_exit
# standard deviation of its mean
exits = abs ( spread_zscore ). between ( - abs_condition_exit , abs_condition_exit , inclusive = True )

Going back to GDX and GLD above that follow GLD = ⍺ GDX + e, such that ratio (GLD/GDX) moves around it’s mean value ⍺, we make money on the ratio of the two reverting to the mean. In order to do this we’ll watch for when GDX and GLD are far apart, i.e ⍺ is too high or too low:

Going Long the Ratio This is when the ratio ⍺ is smaller than usual and we expect it to increase. In the above example, we place a bet on this by buying GLD and selling GDX.

Going Short the Ratio This is when the ratio ⍺ is large and we expect it to become smaller. In the above example, we place a bet on this by selling GLD and buying GDX.

# Plot the z-score together with the buy and sell signals
plt . figure ( figsize = ( 14 , 4 ))
buyR = 0 * spread_zscore . copy ()
sellR = 0 * spread_zscore . copy ()
buyR [ longs [ longs == True ]. index ] = spread_zscore [ longs [ longs == True ]. index ]
sellR [ shorts [ shorts == True ]. index ] = spread_zscore [ shorts [ shorts == True ]. index ]
sellR . replace ( 0.0 , np . nan , inplace = True )
buyR . replace ( 0.0 , np . nan , inplace = True )
plt . plot ( spread_zscore , color = 'blue' , linewidth = 1 )
plt . plot ( buyR , color = 'g' , linestyle = 'None' , marker = '^' )
plt . plot ( sellR , color = 'r' , linestyle = 'None' , marker = '^' )
plt . axhline ( abs_condition_exit , color = 'c' , linestyle = '-' )
plt . axhline ( - abs_condition_exit , color = 'c' , linestyle = '-' )
plt . axhline ( spread_zscore . mean (), color = 'black' )
plt . axhline ( entry_condition_short , color = 'green' , linestyle = '--' )
plt . axhline ( entry_condition_long , color = 'red' , linestyle = '--' )
plt . legend ([ 'Spread z-score' , 'Buy Signal' , 'Sell Signal' , 'Exit' , 'Exit' , 'Spread z-score Mean' , ( '%s' % entry_condition_short ), ( '%s' % entry_condition_long )])
plt . title ( "Spread z-score" )
plt . show ()

# Plot the prices together with the buy and sell signals
buy_gld_prices = buyR . copy ()
buy_gld_prices [ longs [ longs == True ]. index ] = gld_df [ 'Close' ][ longs [ longs == True ]. index ]
sell_gld_prices = sellR . copy ()
sell_gld_prices [ shorts [ shorts == True ]. index ] = gld_df [ 'Close' ][ shorts [ shorts == True ]. index ]
buy_gdx_prices = sellR . copy ()
buy_gdx_prices [ shorts [ shorts == True ]. index ] = gdx_df [ 'Close' ][ shorts [ shorts == True ]. index ] * beta
sell_gdx_prices = buyR . copy ()
sell_gdx_prices [ longs [ longs == True ]. index ] = gdx_df [ 'Close' ][ longs [ longs == True ]. index ] * beta
plt . figure ( figsize = ( 14 , 4 ))
plt . plot ( gld_df [ 'Close' ], 'y' )
plt . plot ( gdx_df [ 'Close' ] * beta , 'c' )
plt . plot ( buy_gld_prices , color = 'g' , linestyle = 'None' , marker = '^' )
plt . plot ( sell_gld_prices , color = 'r' , linestyle = 'None' , marker = '^' )
plt . plot ( buy_gdx_prices , color = 'g' , linestyle = 'None' , marker = '^' )
plt . plot ( sell_gdx_prices , color = 'r' , linestyle = 'None' , marker = '^' )
plt . legend ([ 'GLD' , 'GDX' , 'Buy Signal' , 'Sell Signal' ])
plt . show ()

Simulate Trading

positions_gld = 0 * gld_cl . copy ()
positions_gdx = positions_gld . copy ()
# Initial amount of money
money = 5000
print ( 'Initial money: %s (%s)' % ( money , start_date ))
# Do not short more than leverage amount
max_debt = 1.5 * money
debt = 0
count_gld = 0
count_gdx = 0
going_long = False
going_short = False
for day in gld_cl . index :
ratio = gld_cl [ day ] // gdx_cl [ day ]
# Buy GLD, Sell GDX
if longs [ day ]:
if money & gt ; 0 :
quantity = money // gld_cl [ day ]
if ( debt + gdx_cl [ day ] * ratio * quantity ) & lt ; max_debt :
#print('\x1b[32mLong GLD, Short GDX\x1b[0m')
money -= gld_cl [ day ] * quantity
money += gdx_cl [ day ] * ratio * quantity
count_gld += quantity
count_gdx -= ratio * quantity
positions_gld [ day ] = count_gld
positions_gdx [ day ] = count_gdx
going_long = True
#print(' GLD Price: %s, GDX Price: %s, count_gld: %s, count_gdx: %s, money: %s' % (gld_cl[day], gdx_cl[day],count_gld, count_gdx, money))
# Sell GLD, Buy GDX
elif shorts [ day ]:
if money & gt ; 0 :
quantity = money // gdx_cl [ day ]
if ( debt + gld_cl [ day ] * quantity ) & lt ; max_debt :
#print('\x1b[31mShort GLD, Long GDX\x1b[0m')
money += gld_cl [ day ] * quantity
money -= gdx_cl [ day ] * ratio * quantity
count_gld -= quantity
count_gdx += ratio * quantity
positions_gld [ day ] = count_gld
positions_gdx [ day ] = count_gdx
going_short = True
#print(' GLD Price: %s, GDX Price: %s, count_gld: %s, count_gdx: %s, money: %s' % (gld_cl[day], gdx_cl[day],count_gld, count_gdx, money))
# Clear positions
elif exits [ day ] and ( going_long or going_short ):
#print("\x1b[34mExiting on %s with count_gld: %s, count_gdx: %s\x1b[0m" % (day, count_gld, count_gdx))
money += gld_cl [ day ] * count_gld
money += gdx_cl [ day ] * count_gdx
count_gld = 0
count_gdx = 0
positions_gld [ day ] = 0
positions_gdx [ day ] = 0
going_long = False
going_short = False
#print(' GLD Price: %s, GDX Price: %s, money: %s' % (gld_cl[day], gdx_cl[day], money))
else :
# Cary positions forward
positions_gld [ day ] = count_gld
positions_gdx [ day ] = count_gdx
debt = 0
if count_gld & lt ; 0 :
debt = abs ( count_gld ) * gld_cl [ day ]
if count_gdx & lt ; 0 :
debt += abs ( count_gdx ) * gdx_cl [ day ]
print ( 'Final money: %s (%s)' % ( money , end_date ))
print ( "Exiting any remaining positions..." )
money += gld_cl [ day ] * count_gld
money += gdx_cl [ day ] * count_gdx
print ( 'Final Money: %s' % money )

Initial money : 5000 ( 2013 - 01 - 01 )
Final money : 8024.5562 ( 2017 - 10 - 02 )
Exiting any remaining positions ...
Final Money : 8024.5562

# Compute portfolio market value. We are lagging our trading positions by
# since the trades we make on the current day did not happen yet.
market_value_gld = positions_gld . shift ( 1 ) * gld_cl
market_value_gdx = positions_gdx . shift ( 1 ) * gdx_cl
market_value_total = market_value_gld + market_value_gdx
# Plot the full equity curve
plt . figure ( figsize = ( 14 , 4 ))
plt . plot ( market_value_total . cumsum ())
plt . ylabel ( "Portfolio Value in USD" )
plt . xlabel ( "Time" );
plt . show ()

returns = market_value_total . pct_change ()
returns . fillna ( 0.0 , inplace = True )
returns . replace ([ np . inf , - np . inf ], 0.0 , inplace = True )

# Sharpe ratio
sharpe_ratio_test = ( returns [ dates_test ]. mean () * math . sqrt ( 252 )) / returns [ dates_test ]. std ()
sharpe_ratio_train = ( returns [ dates_train ]. mean () * math . sqrt ( 252 )) / returns [ dates_train ]. std ()
sharpe_ratio = ( returns . mean () * math . sqrt ( 252 )) / returns . std ()
print ( "sharpe_ratio: %s sharpe_ratio_train: %s sharpe_ratio_test: %s" % ( sharpe_ratio , sharpe_ratio_train , sharpe_ratio_test ))

sharpe_ratio : - 1.36390315681 sharpe_ratio_train : - 2.13182148448 sharpe_ratio_test : - 0.511747503253