import numpy as np
import pandas as pd
import datetime as dt
pd.set_option('display.max_rows', 16)
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (16.0, 9.0)
import seaborn as sns
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression
import gc
plt.rcParams['figure.figsize'] = (16.0, 9.0)
START = '2007-01-01'
END = '2024-04-15'
# Security Id
stk_info = DataAPI.SecIDGet(assetClass="E",pandas="1")
cond1 = (stk_info['exchangeCD'] == 'XSHE') | (stk_info['exchangeCD'] == 'XSHG')
cond2 = (stk_info['listStatusCD'] == 'L') | (stk_info['listStatusCD'] == 'DE')
cond3 = stk_info['transCurrCD']=='CNY'
stk_info = stk_info[cond1 & cond2 & cond3].copy()
stk_id = stk_info['secID']
stk_info
st_df = DataAPI.SecSTGet(beginDate=START,endDate=END,secID=stk_id,field=['secID','tradeDate','STflg'],pandas="1")
st_df['tradeDate'] = pd.to_datetime(st_df['tradeDate'],format="%Y-%m-%d")
DataAPI.MktIborGet(secID="Shibor1M.IRCN",beginDate=START,endDate=END,field=['secID','tradeDate','rate'],pandas="1")
shibor_df = DataAPI.MktIborGet(secID="Shibor1M.IRCN",beginDate=START,endDate=END,field=['secID','tradeDate','rate'],pandas="1")
shibor_df['rate'] = shibor_df['rate']*0.01/12
shibor_df['tradeDate'] = pd.to_datetime(shibor_df['tradeDate'])
shibor_df.drop('secID',axis=1,inplace=True)
shibor_df.rename(columns={'rate':'rf'},inplace=True)
shibor_df['ym'] = shibor_df['tradeDate'].dt.to_period('M')
shibor_df.sort_values('tradeDate',inplace=True)
shibor_df_m = shibor_df.groupby('ym',as_index=False).last()
shibor_df_m.drop('tradeDate',axis=1,inplace=True)
shibor_df_m
beta_df = pd.read_pickle('./data/beta_df.pkl')
beta_df['tradeDate'] = pd.to_datetime(beta_df['tradeDate'], format="%Y-%m-%d")
beta_df['ym'] = beta_df['tradeDate'].dt.to_period('M')
beta_df.drop(['Beta60','Beta120'],axis=1,inplace=True)
beta_df['Beta252'] = pd.to_numeric(beta_df['Beta252'])
# Winsorization
# up_q = 0.99999
# lower_q = 0.00001
# beta_df['Beta252_winsor'] = beta_df['Beta252'].clip(lower=beta_df['Beta252'].quantile(lower_q),upper=beta_df['Beta252'].quantile(up_q))
# Monthly
beta_df_m = beta_df.groupby(['secID','ym'],as_index=False)['Beta252'].last()
beta_df_m.rename(columns={'Beta252':'beta'},inplace=True)
beta_df_m
stk_df = pd.read_pickle('./data/stk_df.pkl')
stk_df['tradeDate'] = pd.to_datetime(stk_df['tradeDate'], format='%Y-%m-%d')
stk_df['ym'] = stk_df['tradeDate'].dt.to_period('M')
stk_df.sort_values(['secID','tradeDate'],inplace=True)
# drop ST stocks
print(stk_df.shape)
stk_df = pd.merge(stk_df, st_df, on=['secID','tradeDate'],how='left')
stk_df = stk_df[stk_df['STflg'].isna()].copy()
stk_df.drop('STflg',axis=1,inplace=True)
print(stk_df.shape)
# Monthly
stk_df_m = stk_df.groupby(['secID','ym'],as_index=False).last()
stk_df_m
pd.__version__
Momentum 介绍:
Momentum即动量,指的是过去一段时间表现较好的股票,在未来一段时间内也会较好。“过去”和“未来”有很多定义,比较常见的:
也即,中间跳过了一个月,t-1。 这是因为,短期发现了反转,reversal:上个月表现好的,这个月表现差
Momentum 的计算涉及到pandas groupby rolling。如果是自定义的函数,apply会比较慢。但新版本(pandas version >= 1)中,apply中可以指定用numba作为计算引擎,速度会快非常多。由于优矿的pandas版本很低, 没有这个选项。
另外,按照月来做rolling时,pandas rolling的选项不可以用"MonthEnd"这样长度不固定的时间作为window size。因此,如果想做得很精确的话,需要用一些其他的办法。一种比较容易的思路是把停牌的日期(用MultiIndex)填上,对应的ret值留空。窗口长度就可以固定(通过指定observation个数,而不是月份数)。
例:
stk_df.loc[(stk_df['secID']=='000001.XSHE') & (stk_df['tradeDate']>='2010-06-20') & (stk_df['tradeDate']<='2010-07-10')]
stk_df[(stk_df['secID']=='000007.XSHE') & (stk_df['tradeDate']>='2021-04-20')&(stk_df['tradeDate']<='2022-07-05')]
stk_df_m['ret_mom'] = stk_df_m.groupby('secID')['closePrice'].apply(lambda x: x / x.shift() - 1) #这个ret_mom不用作后面ret的计算,后面仍保留monthly ret
stk_df_m[(stk_df_m['secID']=='000007.XSHE') & (stk_df_m['tradeDate']>='2021-04-20')&(stk_df_m['tradeDate']<='2022-09-05')]
stk_df_m.sort_values(['secID','ym'],inplace=True)
stk_df_m
stk_df_m['1+ret_mom'] = stk_df_m['ret_mom'] + 1
def fill_missing(df, full_dates, id_col='secID', date_col='ym'):
"""
This function fills the missing dates for stocks.
Parameters:
df: The dataframe. Could be a sub-dataframe created by "groupby".
The dataframe must be sorted on the "date_col".
full_dates: the unique dates covering all securities in the full dataframe.
Need to be sorted.
id_col: the security id.
date_col: the dates column for the security
Returns:
A dataframe with the missing dates filled with NA.
"""
one_stk_id = df[id_col].unique()
date_start = np.where(full_dates == df[date_col].min())[0][0]
date_end = np.where(full_dates == df[date_col].max())[0][0]
dates = full_dates[date_start:date_end+1]
idx = pd.MultiIndex.from_product([one_stk_id,dates],
names=(id_col,date_col))
df = df.set_index([id_col,date_col]).reindex(idx).reset_index()
return df
%%time
full_dates = np.sort(stk_df['ym'].unique())
stk_df_m = stk_df_m.groupby('secID').apply(fill_missing, full_dates=full_dates)
stk_df_m.reset_index(drop=True, inplace=True)
stk_df_m.loc[stk_df_m['1+ret_mom'].isna()]
stk_df_m.loc[(stk_df_m['secID']=='000001.XSHE') & (stk_df_m['ym']>='2010-06') & (stk_df_m['ym']<='2010-11')]
stk_df_m[(stk_df_m['secID']=='000007.XSHE')&(stk_df_m['ym']>='2021-02')]
stk_df_m[(stk_df_m['secID']=='000007.XSHE')&(stk_df_m['ym']>='2021-02')&(~stk_df_m['tradeDate'].isna())]
stk_df_m.loc[stk_df_m['1+ret_mom'].isna(),'1+ret_mom'] = 1 # 缺失位置填充为1,以便连乘。
stk_df_m['mom'] = stk_df_m.groupby('secID').rolling(11,min_periods=11)['1+ret_mom'].apply(np.prod, raw=True).values - 1
stk_df_m['mom_6m'] = stk_df_m.groupby('secID').rolling(6,min_periods=6)['1+ret_mom'].apply(np.prod, raw=True).values - 1
# 当只用numpy function时可以选raw=True,只用ndarray格式的数据,运算速度快很多。
stk_df_m
stk_df_m['ret'] = stk_df_m.groupby('secID')['closePrice'].apply(lambda x: x / x.shift() - 1)
stk_df_m['size'] = np.log(stk_df_m['negMarketValue'])
stk_df_m.drop(['tradeDate','closePrice'],axis=1,inplace=True)
stk_df_m = pd.merge(stk_df_m, shibor_df_m, on='ym')
stk_df_m['exret'] = stk_df_m['ret'] - stk_df_m['rf']
# 把日期对齐。
# 例:
# ret_date == 2020.03
# size_date == 2020.02
# cumret_date == 2020.01
stk_df_m['exret'] = stk_df_m.groupby(['secID'])['exret'].shift(-1)
stk_df_m['ret_date'] = stk_df_m.groupby('secID')['ym'].shift(-1)
stk_df_m['mom'] = stk_df_m.groupby(['secID'])['mom'].shift()
stk_df_m['mom_6m'] = stk_df_m.groupby(['secID'])['mom_6m'].shift()
stk_df_m['mom_date'] = stk_df_m.groupby('secID')['ym'].shift()
stk_df_m.dropna(inplace=True)
stk_df_m = stk_df_m[['secID','ret_date','exret','mom_date','mom','mom_6m','ym','negMarketValue','size']]
stk_df_m.rename(columns={'negMarketValue':'mktcap'},inplace=True)
stk_df_m.sort_values(['secID','ym'],inplace=True)
stk_df_m
stk_df_m[['mom','mom_6m']].describe()
pb_df = pd.read_pickle('./data/pb_df.pkl')
pb_df['tradeDate'] = pd.to_datetime(pb_df['tradeDate'])
pb_df['PB'] = pd.to_numeric(pb_df['PB'])
pb_df['ym'] = pb_df['tradeDate'].dt.to_period('M')
pb_df.sort_values(['secID','tradeDate'],inplace=True)
pb_df = pb_df.groupby(['secID','ym'],as_index=False).last()
pb_df['bm'] = 1 / pb_df['PB']
pb_df.drop(['tradeDate','PB'],axis=1,inplace=True)
pb_df = pb_df[pb_df['bm'] >= 0]
pb_df
stk_df_m
beta_df_m
ret_df = pd.merge(stk_df_m, beta_df_m, on=['secID','ym'],how='left') # beta 的 na 值不管它,不是重点,保留左边的dataframe
ret_df = pd.merge(ret_df, pb_df,on=['secID','ym'] ,how='left')
ret_df
# ret_df_full = ret_df.copy()
# ret_df = ret_df[ret_df['ret_date']>='2015'].copy()
# ret_df = ret_df_full.copy()
q = dict()
keys = ['q'+str(i) for i in range(1, 10)]
values = np.arange(0.1, 1.0, 0.1)
q.update(zip(keys,values))
quantile_df = pd.DataFrame()
for key, value in q.items():
quantile_df[key] = ret_df.groupby(['mom_date'])['mom'].quantile(value)
ret_df_q = pd.merge(ret_df, quantile_df, on='mom_date')
ret_df_q
portfolios = dict()
drop_cols = [col for col in ret_df_q.columns if col[0]=='q']
portfolios['p1'] = ret_df_q.loc[ret_df_q['mom'] <= ret_df_q['q1']].copy().drop(drop_cols, axis=1)
for i in range(2,10):
idx = (ret_df_q[f'q{i-1}'] <= ret_df_q['mom']) & (ret_df_q['mom'] <= ret_df_q[f'q{i}'])
portfolios[f'p{i}'] = ret_df_q.loc[idx].copy().drop(drop_cols, axis=1)
portfolios['p10'] = ret_df_q.loc[ret_df_q['mom'] >= ret_df_q['q9']].copy().drop(drop_cols, axis=1)
portfolios_crs_mean = dict()
for k in portfolios.keys():
portfolios_crs_mean[k] = portfolios[k].groupby(['ret_date'])['exret'].mean()
mean_values = {}
t_values = {}
for k in portfolios_crs_mean.keys():
y = portfolios_crs_mean[k]
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=6)
mean_values[k] = reg.params[0]
t_values[k] = reg.tvalues[0]
# Portfolio 10-1
y = portfolios_crs_mean['p10'] - portfolios_crs_mean['p1']
const = np.full(shape=len(y), fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=6)
mean_values['p10-p1'] = reg.params[0]
t_values['p10-p1'] = reg.tvalues[0]
pd.DataFrame([mean_values.values(),t_values.values()],index=['mean','t-value'],
columns=mean_values.keys())
q_size = dict()
keys = ['q_size_1']
values = [0.5]
q_size.update(zip(keys,values))
q_mom = dict()
keys = ['q_mom_1','q_mom_2']
values = [0.3, 0.7]
q_mom.update(zip(keys,values))
q_size_df = pd.DataFrame()
for key, value in q_size.items():
q_size_df[key] = ret_df.groupby(['ym'])['size'].quantile(value)
q_mom_df = pd.DataFrame()
for key, value in q_mom.items():
q_mom_df[key] = ret_df.groupby(['mom_date'])['mom'].quantile(value)
ret_df_q = pd.merge(ret_df, q_size_df, on='ym')
ret_df_q = pd.merge(ret_df_q, q_mom_df, on='mom_date')
portfolios_size = dict()
portfolios_size['size1'] = ret_df_q.loc[ret_df_q['size'] <= ret_df_q['q_size_1'],
['secID','ym','ret_date','exret','size','mktcap']]
portfolios_size['size2'] = ret_df_q.loc[ret_df_q['size'] >= ret_df_q['q_size_1'],
['secID','ym','ret_date','exret','size','mktcap']]
portfolios_mom = dict()
portfolios_mom['mom1'] = ret_df_q.loc[ret_df_q['mom'] <= ret_df_q['q_mom_1'],
['secID','ym','ret_date','exret','mom']]
portfolios_mom['mom2'] = ret_df_q.loc[(ret_df_q['mom'] >= ret_df_q['q_mom_1']) & \
(ret_df_q['mom'] <= ret_df_q['q_mom_2']),
['secID','ym','ret_date','exret','mom']]
portfolios_mom['mom3'] = ret_df_q.loc[ret_df_q['mom'] >= ret_df_q['q_mom_2'],
['secID','ym','ret_date','exret','mom']]
portfolios = dict()
for mom_group in portfolios_mom.keys():
for size_group in portfolios_size.keys():
portfolios[f'{mom_group}_{size_group}'] = pd.merge(portfolios_size[size_group],
portfolios_mom[mom_group][['secID','ret_date','mom']],
on=['secID','ret_date'])
mean_portfolios_ret = dict()
for pf in portfolios.keys():
mean_portfolios_ret[pf] = portfolios[pf].groupby('ret_date')['exret'].mean()
print(mean_portfolios_ret[pf].shape) # print 看一下会不会存在某个月份上没有mom和size分组没有任何交叉
# Fast merge by stacking
mean_portfolios_ret_df = pd.DataFrame(np.vstack([pf for pf in mean_portfolios_ret.values()])).T
mean_portfolios_ret_df.columns = mean_portfolios_ret.keys()
mean_portfolios_ret_df.index = mean_portfolios_ret['mom1_size1'].index
# Newey-West adjustment
mean_values = {}
t_values = {}
for k in mean_portfolios_ret.keys():
y = mean_portfolios_ret[k]
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=4)
mean_values[k] = reg.params[0]
t_values[k] = reg.tvalues[0]
pd.DataFrame([mean_values.values(),t_values.values()],index=['ret_mean','t_values'],columns=mean_values.keys())
ret_df['exret100'] = ret_df['exret'] * 100
def fm_reg(df,cols):
df_ = df.dropna()
if df_.shape[0] < 15:
return [None]*(len(cols)+1)
reg = LinearRegression(fit_intercept=True).fit(y=df_.loc[:,'exret100'], X=df_.loc[:,cols])
return np.insert(reg.coef_, 0, reg.intercept_)
cols = ['mom']
temp = ret_df.groupby('ret_date').apply(fm_reg, cols=cols)
reg_result_df = pd.DataFrame(temp.values.tolist())
reg_result_df.index=temp.index
reg_result_df.columns = ['intercept'] + cols
reg_result_df.dropna(inplace=True)
# Mean of coefs with NW adjustment
mean_values = {}
t_values = {}
for k in reg_result_df.columns:
y = reg_result_df[k]
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=6)
mean_values[k] = reg.params[0]
t_values[k] = reg.tvalues[0]
pd.DataFrame([mean_values.values(),t_values.values()],index=['ret_mean','t_values'],columns=mean_values.keys())
cols = ['beta','size','bm','mom']
temp = ret_df.groupby('ret_date').apply(fm_reg, cols=cols)
reg_result_df = pd.DataFrame(temp.values.tolist())
reg_result_df.index=temp.index
reg_result_df.columns = ['intercept'] + cols
reg_result_df.dropna(inplace=True)
# Mean of coefs with NW adjustment
mean_values = {}
t_values = {}
for k in reg_result_df.columns:
y = reg_result_df[k]
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=6)
mean_values[k] = reg.params[0]
t_values[k] = reg.tvalues[0]
pd.DataFrame([mean_values.values(),t_values.values()],index=['ret_mean','t_values'],columns=mean_values.keys())
ret_df['rev'] = ret_df.groupby('secID')['exret'].shift()
ret_df
q = dict()
keys = ['q'+str(i) for i in range(1, 10)]
values = np.arange(0.1, 1.0, 0.1)
q.update(zip(keys,values))
quantile_df = pd.DataFrame()
for key, value in q.items():
quantile_df[key] = ret_df.groupby(['ym'])['rev'].quantile(value)
ret_df_q = pd.merge(ret_df, quantile_df, on='ym')
portfolios = dict()
drop_cols = [col for col in ret_df_q.columns if col[0]=='q']
portfolios['p1'] = ret_df_q.loc[ret_df_q['rev'] <= ret_df_q['q1']].copy().drop(drop_cols, axis=1)
for i in range(2,10):
idx = (ret_df_q[f'q{i-1}'] <= ret_df_q['rev']) & (ret_df_q['rev'] <= ret_df_q[f'q{i}'])
portfolios[f'p{i}'] = ret_df_q.loc[idx].copy().drop(drop_cols, axis=1)
portfolios['p10'] = ret_df_q.loc[ret_df_q['rev'] >= ret_df_q['q9']].copy().drop(drop_cols, axis=1)
portfolios_crs_mean = dict()
for k in portfolios.keys():
portfolios_crs_mean[k] = portfolios[k].groupby(['ret_date'])['exret'].mean()
mean_values = {}
t_values = {}
for k in portfolios_crs_mean.keys():
y = portfolios_crs_mean[k]
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=6)
mean_values[k] = reg.params[0]
t_values[k] = reg.tvalues[0]
# Portfolio 10-1
y = portfolios_crs_mean['p10'] - portfolios_crs_mean['p1']
const = np.full(shape=len(y), fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=6)
mean_values['p10-p1'] = reg.params[0]
t_values['p10-p1'] = reg.tvalues[0]
pd.DataFrame([mean_values.values(),t_values.values()],index=['mean','t-value'],
columns=mean_values.keys())
q_size = dict()
keys = ['q_size_1']
values = [0.5]
q_size.update(zip(keys,values))
q_rev = dict()
keys = ['q_rev_1','q_rev_2']
values = [0.3, 0.7]
q_rev.update(zip(keys,values))
q_size_df = pd.DataFrame()
for key, value in q_size.items():
q_size_df[key] = ret_df.groupby(['ym'])['size'].quantile(value)
q_rev_df = pd.DataFrame()
for key, value in q_rev.items():
q_rev_df[key] = ret_df.groupby(['ym'])['rev'].quantile(value)
ret_df_q = pd.merge(ret_df, q_size_df, on='ym')
ret_df_q = pd.merge(ret_df_q, q_rev_df, on='ym')
portfolios_size = dict()
portfolios_size['size1'] = ret_df_q.loc[ret_df_q['size'] <= ret_df_q['q_size_1'],
['secID','ym','ret_date','exret','size','mktcap']]
portfolios_size['size2'] = ret_df_q.loc[ret_df_q['size'] >= ret_df_q['q_size_1'],
['secID','ym','ret_date','exret','size','mktcap']]
portfolios_rev = dict()
portfolios_rev['rev1'] = ret_df_q.loc[ret_df_q['rev'] <= ret_df_q['q_rev_1'],
['secID','ym','ret_date','exret','rev']]
portfolios_rev['rev2'] = ret_df_q.loc[(ret_df_q['rev'] >= ret_df_q['q_rev_1']) & \
(ret_df_q['rev'] <= ret_df_q['q_rev_2']),
['secID','ym','ret_date','exret','rev']]
portfolios_rev['rev3'] = ret_df_q.loc[ret_df_q['rev'] >= ret_df_q['q_rev_2'],
['secID','ym','ret_date','exret','rev']]
portfolios = dict()
for rev_group in portfolios_rev.keys():
for size_group in portfolios_size.keys():
portfolios[f'{rev_group}_{size_group}'] = pd.merge(portfolios_size[size_group],
portfolios_rev[rev_group][['secID','ret_date','rev']],
on=['secID','ret_date'])
mean_portfolios_ret = dict()
for pf in portfolios.keys():
mean_portfolios_ret[pf] = portfolios[pf].groupby('ret_date')['exret'].mean()
print(mean_portfolios_ret[pf].shape) # print 看一下会不会存在某个月份上没有rev和size分组没有任何交叉
# Fast merge by stacking
mean_portfolios_ret_df = pd.DataFrame(np.vstack([pf for pf in mean_portfolios_ret.values()])).T
mean_portfolios_ret_df.columns = mean_portfolios_ret.keys()
mean_portfolios_ret_df.index = mean_portfolios_ret['rev1_size1'].index
# Newey-West adjustment
mean_values = {}
t_values = {}
for k in mean_portfolios_ret.keys():
y = mean_portfolios_ret[k]
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=4)
mean_values[k] = reg.params[0]
t_values[k] = reg.tvalues[0]
pd.DataFrame([mean_values.values(),t_values.values()],index=['ret_mean','t_values'],columns=mean_values.keys())
cols = ['beta','size','bm','mom','rev']
temp = ret_df.groupby('ret_date').apply(fm_reg, cols=cols)
reg_result_df = pd.DataFrame(temp.values.tolist())
reg_result_df.index=temp.index
reg_result_df.columns = ['intercept'] + cols
reg_result_df.dropna(inplace=True)
# Mean of coefs with NW adjustment
mean_values = {}
t_values = {}
for k in reg_result_df.columns:
y = reg_result_df[k]
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=6)
mean_values[k] = reg.params[0]
t_values[k] = reg.tvalues[0]
pd.DataFrame([mean_values.values(),t_values.values()],index=['ret_mean','t_values'],columns=mean_values.keys())
portfolios_vwret = {}
for pf in portfolios.keys():
temp = portfolios[pf].groupby('ret_date')['mktcap'].agg({'mktcap_sum':np.sum})
portfolios[pf] = pd.merge(portfolios[pf], temp, on='ret_date')
portfolios[pf]['weight'] = portfolios[pf]['mktcap'] / portfolios[pf]['mktcap_sum']
portfolios[pf]['weighted_exret'] = portfolios[pf]['exret'] * portfolios[pf]['weight']
portfolios_vwret[pf] = portfolios[pf].groupby('ret_date')['weighted_exret'].sum()
portfolios_vwret_df = pd.DataFrame(np.vstack([pf for pf in portfolios_vwret.values()])).T
portfolios_vwret_df.index = portfolios_vwret['rev1_size1'].index
portfolios_vwret_df.columns = portfolios_vwret.keys()
portfolios_vwret_df.rename(columns={"rev1_size1": "Small_LowRet",
"rev2_size1": "Small_MedRet",
"rev3_size1": "Small_HighRet",
"rev1_size2": "Big_LowRet",
"rev2_size2": "Big_MedRet",
"rev3_size2": "Big_HighRet"},
inplace=True)
portfolios_vwret_df
rev_df = (portfolios_vwret_df['Small_LowRet'] + portfolios_vwret_df['Big_LowRet']) / 2 - \
(portfolios_vwret_df['Small_HighRet'] + portfolios_vwret_df['Big_HighRet']) / 2
rev_df = rev_df.reset_index()
rev_df.columns = ['ret_date','rev']
rev_df
factors_df = pd.read_csv('./output_data/factors/ff3.csv')
factors_df
factors_df['ret_date'] = pd.to_datetime(factors_df['ret_date'])
factors_df['ret_date'] = factors_df['ret_date'].dt.to_period('M')
factors_df = pd.merge(factors_df, rev_df, on='ret_date')
factors_df['ret_date'] = factors_df['ret_date'].dt.to_timestamp(freq='day',how='end').dt.normalize()
factors_df.set_index('ret_date',inplace=True)
factors_df
factors_df.to_csv('./output_data/factors/ff3_rev.csv')
factors_df.to_pickle('./output_data/factors/ff3_rev.pkl')
((1 + factors_df).cumprod()*100).plot()
((1 + factors_df['2018':]).cumprod()*100).plot()
rev_long_df = (portfolios_vwret_df['Small_LowRet'] + portfolios_vwret_df['Big_LowRet']) / 2
rev_long_df = rev_long_df.reset_index()
rev_long_df.columns=['ret_date','rev_long']
rev_long_df
factors_long_df = pd.read_csv('./output_data/factors/ff3_long_only.csv')
factors_long_df['ret_date'] = pd.to_datetime(factors_long_df['ret_date'])
factors_long_df['ret_date'] = factors_long_df['ret_date'].dt.to_period('M')
factors_long_df = pd.merge(factors_long_df, rev_long_df, on='ret_date')
factors_long_df['ret_date'] = factors_long_df['ret_date'].dt.to_timestamp(freq='day',how='end').dt.normalize()
factors_long_df.set_index('ret_date',inplace=True)
factors_long_df
((1 + factors_long_df).cumprod()*100).plot()
((1 + factors_long_df['2018':]).cumprod()*100).plot()
factors_long_df.to_csv('./output_data/factors/ff3_rev_long_only.csv')
factors_long_df.to_pickle('./output_data/factors/ff3_rev_long_only.pkl')