import numpy as np
import pandas as pd
import datetime as dt
pd.set_option('display.max_rows', 16)
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (16.0, 9.0)
import seaborn as sns
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression
import gc
plt.rcParams['figure.figsize'] = (16.0, 9.0)
START = '2007-01-01'
END = '2024-04-15'
# Security Id
stk_info = DataAPI.SecIDGet(assetClass="E",pandas="1")
cond1 = (stk_info['exchangeCD'] == 'XSHE') | (stk_info['exchangeCD'] == 'XSHG')
cond2 = (stk_info['listStatusCD'] == 'L') | (stk_info['listStatusCD'] == 'DE')
cond3 = stk_info['transCurrCD']=='CNY'
stk_info = stk_info[cond1 & cond2 & cond3].copy()
stk_id = stk_info['secID']
stk_id.nunique()
st_df = DataAPI.SecSTGet(beginDate=START,endDate=END,secID=stk_id,field=['secID','tradeDate','STflg'],pandas="1")
st_df['tradeDate'] = pd.to_datetime(st_df['tradeDate'],format="%Y-%m-%d")
shibor_df = DataAPI.MktIborGet(secID="Shibor1M.IRCN",beginDate=START,endDate=END,field=['secID','tradeDate','rate'],pandas="1")
shibor_df['rate'] = shibor_df['rate']*0.01/12
shibor_df['tradeDate'] = pd.to_datetime(shibor_df['tradeDate'])
shibor_df.drop('secID',axis=1,inplace=True)
shibor_df.rename(columns={'rate':'rf'},inplace=True)
shibor_df['ym'] = shibor_df['tradeDate'].dt.to_period('M')
shibor_df.sort_values('tradeDate',inplace=True)
shibor_df_m = shibor_df.groupby('ym',as_index=False).last()
shibor_df_m.drop('tradeDate',axis=1,inplace=True)
shibor_df_m
beta_df = pd.read_pickle('./data/beta_df.pkl')
beta_df['tradeDate'] = pd.to_datetime(beta_df['tradeDate'], format="%Y-%m-%d")
beta_df['ym'] = beta_df['tradeDate'].dt.to_period('M')
beta_df.drop(['Beta60','Beta120'],axis=1,inplace=True)
beta_df['Beta252'] = pd.to_numeric(beta_df['Beta252'])
# Winsorization
# up_q = 0.99999
# lower_q = 0.00001
# beta_df['Beta252_winsor'] = beta_df['Beta252'].clip(lower=beta_df['Beta252'].quantile(lower_q),upper=beta_df['Beta252'].quantile(up_q))
# Monthly
beta_df_m = beta_df.groupby(['secID','ym'],as_index=False)['Beta252'].last()
beta_df_m.rename(columns={'Beta252':'beta'},inplace=True)
beta_df_m
pb_df = pd.read_pickle('./data/pb_df.pkl')
pb_df['tradeDate'] = pd.to_datetime(pb_df['tradeDate'])
pb_df['PB'] = pd.to_numeric(pb_df['PB'])
pb_df['ym'] = pb_df['tradeDate'].dt.to_period('M')
pb_df.sort_values(['secID','tradeDate'],inplace=True)
pb_df = pb_df.groupby(['secID','ym'],as_index=False).last()
pb_df['bm'] = 1 / pb_df['PB']
pb_df.drop(['tradeDate','PB'],axis=1,inplace=True)
pb_df = pb_df[pb_df['bm'] >= 0]
pb_df
# %%time
# # About 10 mins
# # # 从优矿下载股票信息,时间较长。由于优矿的限制,每次下载3年的数据
# stk_dict = {}
# begin_ = dt.datetime.strptime(START, '%Y-%m-%d').year
# end_ = dt.datetime.strptime(START, '%Y-%m-%d').year+3
# field = ['secID','tradeDate','preClosePrice','closePrice','negMarketValue','turnoverValue','turnoverRate']
# while begin_ <= 2024:
# if begin_ == 2024:
# yesterday = dt.datetime.today() - dt.timedelta(days=1)
# yesterday.strftime('%Y%m%d')
# stk_dict[begin_] = DataAPI.MktEqudAdjAfGet(secID=stk_id,
# beginDate=f'{begin_}0101',
# endDate=yesterday,
# field=field,pandas="1")
# else:
# stk_dict[begin_] = DataAPI.MktEqudAdjAfGet(secID=stk_id,
# beginDate=f'{begin_}0101',
# endDate=f'{end_}1231',
# field=field,pandas="1")
# begin_ = end_ + 1
# end_ = begin_ + 3
# for i in range(len(stk_dict)):
# stk_df = pd.DataFrame(np.vstack([_df for _df in stk_dict.values()]),columns=field)
# stk_df.to_pickle('./data/stk_df.pkl')
# stk_df = pd.read_pickle('./data/stk_df.pkl')
# stk_df['tradeDate'] = pd.to_datetime(stk_df['tradeDate'], format='%Y-%m-%d')
# stk_df.sort_values(['secID','tradeDate'],inplace=True)
# # drop ST stocks
# print(stk_df.shape)
# stk_df = pd.merge(stk_df, st_df, on=['secID','tradeDate'],how='left')
# stk_df = stk_df[stk_df['STflg'].isna()].copy()
# stk_df.drop('STflg',axis=1,inplace=True)
# print(stk_df.shape)
# num_cols = ['preClosePrice','closePrice','negMarketValue','turnoverValue','turnoverRate']
# for col in num_cols:
# stk_df[col] = pd.to_numeric(stk_df[col])
# stk_df.info()
# # # If the trading days are required to be consecutive, fill missing days first. This could possibly produce a much larger df when using
# ## daily data, and if the missing dates are a lot for some securities
# def fill_missing(df, full_dates, id_col='secID', date_col='tradeDate'):
# """
# This function fills the missing dates for stocks.
# Parameters:
# df: The dataframe. Could be a sub-dataframe created by "groupby".
# The dataframe must be sorted on the "date_col".
# full_dates: the unique dates covering all securities in the full dataframe.
# Need to be sorted.
# id_col: the security id.
# date_col: the dates column for the security
# Returns:
# A dataframe with the missing dates filled with NA.
# """
# stk_id = df[id_col].unique()
# # Newer version of pandas will allow comparison between "Timestamp" and "datetime64"
# # date_start = np.where(full_dates == df[date_col].min())[0][0]
# # date_end = np.where(full_dates == df[date_col].max())[0][0]
# date_start = np.where(full_dates == df[date_col].min().to_datetime64())[0][0]
# date_end = np.where(full_dates == df[date_col].max().to_datetime64())[0][0]
# dates = full_dates[date_start:date_end+1]
# idx = pd.MultiIndex.from_product([stk_id,dates],
# names=(id_col,date_col))
# df = df.set_index([id_col,date_col]).reindex(idx).reset_index()
# return df
# full_dates = np.sort(stk_df['tradeDate'].unique())
# full_dates
# %%time
# stk_df = stk_df.groupby('secID').apply(fill_missing, full_dates=full_dates)
# # About 1min
Amihud (2002)'s liquidity measure is perhaps the most widely known.
$$ illiq_i = \frac{1}{D} \sum^D_{d=1} \frac{|R_{i,d}|}{VOLD_{i,d}} $$VOLD: the dollar volume, measured as "close_price * shares_traded"
Intuition: the effect of volume on moving prices. If a small volume moves price significantly (nonsignificantly), the stock is quite illiquid (liquid).
Period choices: Amihud(2002) uses t-11 month through t month (12 months) daily data as the measure for month t.
Let's use the two measures:
Also, let's use the actual dollar volume within one day
# stk_df
# stk_df.reset_index(drop=True, inplace=True)
# stk_df['ret_daily'] = stk_df['closePrice'] / stk_df['preClosePrice'] - 1
# stk_df['illiq_daily'] = abs(stk_df['ret_daily'].values) / stk_df['turnoverValue'].values
# stk_df.loc[np.isinf(stk_df['illiq_daily']),'illiq_daily'] = np.nan
# stk_df['ym'] = stk_df['tradeDate'].dt.to_period('M')
# stk_df.to_pickle('./data/stk_df_filled.pkl')
stk_df = pd.read_pickle('./data/stk_df_filled.pkl')
stk_df.loc[np.isinf(stk_df['illiq_daily'])]
stk_df[(stk_df['secID']=='000001.XSHE') & (stk_df['tradeDate']>='2010-06-01') & (stk_df['tradeDate']<='2010-09-05')]
stk_df_m = stk_df.groupby(['secID','ym'],as_index=False).last()
stk_df_m['ret'] = stk_df_m.groupby('secID')['closePrice'].apply(lambda x: x / x.shift() - 1)
stk_df_m['size'] = np.log(stk_df_m['negMarketValue'])
stk_df_m.drop(['preClosePrice'],axis=1,inplace=True)
stk_df_m = pd.merge(stk_df_m, shibor_df_m, on='ym')
stk_df_m['exret'] = stk_df_m['ret'] - stk_df_m['rf']
stk_df_m.sort_values(['secID','ym'],inplace=True)
stk_df_m.rename(columns={'negMarketValue':'mktcap'},inplace=True)
stk_df_m
stk_unfilled_df = pd.read_pickle('./data/stk_df.pkl')
stk_unfilled_df['tradeDate'] = pd.to_datetime(stk_unfilled_df['tradeDate'], format='%Y-%m-%d')
stk_unfilled_df['ym'] = stk_unfilled_df['tradeDate'].dt.to_period('M')
stk_unfilled_df.sort_values(['secID','tradeDate'],inplace=True)
# drop ST stocks
print(stk_unfilled_df.shape)
stk_unfilled_df = pd.merge(stk_unfilled_df, st_df, on=['secID','tradeDate'],how='left')
stk_unfilled_df = stk_unfilled_df[stk_unfilled_df['STflg'].isna()].copy()
stk_unfilled_df.drop('STflg',axis=1,inplace=True)
print(stk_unfilled_df.shape)
# Monthly
stk_unfilled_df_m = stk_unfilled_df.groupby(['secID','ym'],as_index=False).last()
stk_unfilled_df_m['ret_mom'] = stk_unfilled_df_m.groupby('secID')['closePrice'].apply(lambda x: x / x.shift() - 1) #这个ret_mom不用作后面ret的计算,后面仍保留monthly ret
stk_unfilled_df_m.sort_values(['secID','ym'],inplace=True)
stk_unfilled_df_m['1+ret_mom'] = stk_unfilled_df_m['ret_mom'] + 1
stk_unfilled_df_m
stk_unfilled_df_m['mom'] = stk_unfilled_df_m.groupby('secID').rolling(11,min_periods=9)['1+ret_mom'].apply(np.prod, raw=True).values - 1
stk_unfilled_df_m
stk_df_m = pd.merge(stk_df_m, stk_unfilled_df_m[['secID','ym','1+ret_mom']],on=['secID','ym'],how='left')
stk_df_m[(stk_df_m['secID']=='000001.XSHE') & (stk_df_m['ym']>='2010-06') & (stk_df_m['ym']<='2010-10')]
stk_df_m[(stk_df_m['secID']=='000007.XSHE')&(stk_df_m['ym']>='2021-02')]
stk_df_m.loc[stk_df_m['1+ret_mom'].isna(),'1+ret_mom'] = 1 # 缺失位置填充为1,以便连乘。
stk_df_m['mom'] = stk_df_m.groupby('secID').rolling(11,min_periods=11)['1+ret_mom'].apply(np.prod, raw=True).values - 1
stk_df_m
stk_df_m['rev'] = stk_df_m['exret'].values
stk_df_m['ret'] = stk_df_m.groupby(['secID'])['ret'].shift(-1)
stk_df_m['rf'] = stk_df_m.groupby(['secID'])['rf'].shift(-1)
stk_df_m['exret'] = stk_df_m.groupby(['secID'])['exret'].shift(-1)
stk_df_m['ret_date'] = stk_df_m.groupby('secID')['ym'].shift(-1)
stk_df_m['mom'] = stk_df_m.groupby(['secID'])['mom'].shift()
stk_df_m['mom_date'] = stk_df_m.groupby('secID')['ym'].shift()
stk_df_m.drop(['ret_daily','turnoverValue','turnoverRate','illiq_daily','1+ret_mom'],axis=1,inplace=True)
stk_df_m
MIN_NOBS = 15
illiq_df = stk_df.groupby(['secID','ym'],as_index=False)['illiq_daily'].mean()
idx = stk_df.groupby(['secID','ym'],as_index=False)['ret_daily'].count()['ret_daily'] < MIN_NOBS
illiq_df.loc[idx, 'illiq_daily'] = np.nan
illiq_df.rename(columns={'illiq_daily':'illiq_1m'},inplace=True)
illiq_df
illiq_df['illiq_1m'] = illiq_df['illiq_1m'] * 1e6
illiq_df
illiq_df.loc[illiq_df['illiq_1m'].isna()]
illiq_df['illiq_1m'].dropna().describe()
# # This code choses 12 months, instead of trading days.
# def amihud_illiq(df, len_periods='12m', min_nobs=200):
# """
# df: year_month is set as the index, this is faster than using mask.
# """
# year_months = df.index.unique()
# n = len(year_months)
# illiq = np.full(shape=n, fill_value=np.nan)
# start_notnan_month = int(len_periods[:-1])-1
# for i in range(start_notnan_month,n):
# df_ = df[str(year_months[i]-int(len_periods[:-1])+1) : str(year_months[i])].to_numpy()
# if len(df_) < min_nobs:
# continue
# else:
# illiq[i] = df_[:,-1].mean()
# illiq_df = pd.DataFrame({'year_month': year_months, f'illiq{len_periods}': illiq})
# return illiq_df
stk_df['illiq_240d'] = stk_df.groupby(['secID']).rolling(240, min_periods=200)['illiq_daily'].mean().values
illiq_df['illiq_12m'] = stk_df.groupby(['secID','ym'])['illiq_240d'].last().values
illiq_df['illiq_12m'] = illiq_df['illiq_12m']*1e6
illiq_df
pd.merge(stk_df_m[['secID','ret_date','tradeDate','ret','rf','exret','ym','mktcap','size','rev','mom_date','mom']],
beta_df_m[['secID','ym','beta']], on=['secID','ym'],how='left')
ret_df = pd.merge(stk_df_m[['secID','ret_date','tradeDate','ret','rf','exret','ym','mktcap','size','rev','mom_date','mom']],
beta_df_m[['secID','ym','beta']], on=['secID','ym'],how='left')
ret_df = pd.merge(ret_df, pb_df, on=['secID','ym'],how='left')
ret_df = pd.merge(ret_df, illiq_df, on=['secID','ym'],how='left')
ret_df.head()
# ret_df.columns.drop(['mom_date','mom'])
# cols = list(ret_df.columns.drop(['mom_date','mom']))
# cols = cols+['mom_date','mom']
# cols
# ret_df = ret_df[cols].copy()
# ret_df.head()
gc.collect()
q = dict()
keys = ['q'+str(i) for i in range(1, 10)]
values = np.arange(0.1, 1.0, 0.1)
q.update(zip(keys,values))
quantile_df = pd.DataFrame()
for key, value in q.items():
quantile_df[key] = ret_df.groupby(['ym'])['illiq_12m'].quantile(value)
# quantile_df[key] = ret_df.dropna().groupby(['ym'])['illiq_12m'].quantile(value)
ret_df_q = pd.merge(ret_df, quantile_df, on='ym')
# ret_df_q = pd.merge(ret_df.dropna(), quantile_df, on='ym')
portfolios = dict()
drop_cols = [col for col in ret_df_q.columns if col[0]=='q']
portfolios['p1'] = ret_df_q.loc[ret_df_q['illiq_12m'] <= ret_df_q['q1']].copy().drop(drop_cols, axis=1)
for i in range(2,10):
idx = (ret_df_q[f'q{i-1}'] <= ret_df_q['illiq_12m']) & (ret_df_q['illiq_12m'] <= ret_df_q[f'q{i}'])
portfolios[f'p{i}'] = ret_df_q.loc[idx].copy().drop(drop_cols, axis=1)
portfolios['p10'] = ret_df_q.loc[ret_df_q['illiq_12m'] >= ret_df_q['q9']].copy().drop(drop_cols, axis=1)
portfolios_crs_mean = dict()
for k in portfolios.keys():
portfolios_crs_mean[k] = portfolios[k].groupby(['ret_date'])['exret'].mean()
mean_values = {}
t_values = {}
for k in portfolios_crs_mean.keys():
y = portfolios_crs_mean[k]
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=6)
mean_values[k] = reg.params[0]
t_values[k] = reg.tvalues[0]
# Portfolio 10-1
y = portfolios_crs_mean['p10'] - portfolios_crs_mean['p1']
const = np.full(shape=len(y), fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=6)
mean_values['p10-p1'] = reg.params[0]
t_values['p10-p1'] = reg.tvalues[0]
pd.DataFrame([mean_values.values(),t_values.values()],index=['mean','t-value'],
columns=mean_values.keys())
q = dict()
keys = ['q'+str(i) for i in range(1, 10)]
values = np.arange(0.1, 1.0, 0.1)
q.update(zip(keys,values))
quantile_df = pd.DataFrame()
for key, value in q.items():
quantile_df[key] = ret_df.groupby(['ym'])['illiq_1m'].quantile(value)
ret_df_q = pd.merge(ret_df, quantile_df, on='ym')
portfolios = dict()
drop_cols = [col for col in ret_df_q.columns if col[0]=='q']
portfolios['p1'] = ret_df_q.loc[ret_df_q['illiq_1m'] <= ret_df_q['q1']].copy().drop(drop_cols, axis=1)
for i in range(2,10):
idx = (ret_df_q[f'q{i-1}'] <= ret_df_q['illiq_1m']) & (ret_df_q['illiq_1m'] <= ret_df_q[f'q{i}'])
portfolios[f'p{i}'] = ret_df_q.loc[idx].copy().drop(drop_cols, axis=1)
portfolios['p10'] = ret_df_q.loc[ret_df_q['illiq_1m'] >= ret_df_q['q9']].copy().drop(drop_cols, axis=1)
portfolios_crs_mean = dict()
for k in portfolios.keys():
portfolios_crs_mean[k] = portfolios[k].groupby(['ret_date'])['exret'].mean()
mean_values = {}
t_values = {}
for k in portfolios_crs_mean.keys():
y = portfolios_crs_mean[k]
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=6)
mean_values[k] = reg.params[0]
t_values[k] = reg.tvalues[0]
# Portfolio 10-1
y = portfolios_crs_mean['p10'] - portfolios_crs_mean['p1']
const = np.full(shape=len(y), fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=6)
mean_values['p10-p1'] = reg.params[0]
t_values['p10-p1'] = reg.tvalues[0]
pd.DataFrame([mean_values.values(),t_values.values()],index=['mean','t-value'],
columns=mean_values.keys())
pd.DataFrame(portfolios_crs_mean)[['p1','p10']].plot()
ret_df.rename(columns={'illiq_1m':'illiq'},inplace=True)
from myutils.factor_func import double_sort, factor, daily_factor
double_sort?
portfolios = double_sort(ret_df, sort1='illiq')
portfolios
mean_portfolios_ret = dict()
for pf in portfolios.keys():
mean_portfolios_ret[pf] = portfolios[pf].groupby('ret_date')['exret'].mean()
# print(mean_portfolios_ret[pf].shape) # print 看一下会不会存在某个月份上没有illiq和size分组没有任何交叉
# Fast merge by stacking
mean_portfolios_ret_df = pd.DataFrame(np.vstack([pf for pf in mean_portfolios_ret.values()])).T
mean_portfolios_ret_df.columns = mean_portfolios_ret.keys()
mean_portfolios_ret_df.index = mean_portfolios_ret['illiq1_size1'].index
# Newey-West adjustment
mean_values = {}
t_values = {}
for k in mean_portfolios_ret.keys():
y = mean_portfolios_ret[k]
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=4)
mean_values[k] = reg.params[0]
t_values[k] = reg.tvalues[0]
pd.DataFrame([mean_values.values(),t_values.values()],index=['ret_mean','t_values'],columns=mean_values.keys())
mean_portfolios_ret_df.plot()
# Within mktcap1, any difference in illiq groups?
pfs = mean_portfolios_ret_df.columns
cols = list(pfs[pfs.str[-5:] == 'size1'])
mean_portfolios_ret_df[cols].plot()
# Within mktcap1, any difference in illiq groups?
pfs = mean_portfolios_ret_df.columns
cols = list(pfs[pfs.str[-5:] == 'size2'])
mean_portfolios_ret_df[cols].plot()
def fm_reg(df,cols):
df_ = df.dropna()
if df_.shape[0] < 15:
return [None]*(len(cols)+1)
reg = LinearRegression(fit_intercept=True).fit(y=df_.loc[:,'exret'], X=df_.loc[:,cols])
return np.insert(reg.coef_, 0, reg.intercept_)
cols = ['illiq']
temp = ret_df.groupby('ret_date').apply(fm_reg, cols=cols)
reg_result_df = pd.DataFrame(temp.values.tolist())
reg_result_df.index=temp.index
reg_result_df.columns = ['intercept'] + cols
reg_result_df.dropna(inplace=True)
# Mean of coefs with NW adjustment
mean_values = {}
t_values = {}
for k in reg_result_df.columns:
y = reg_result_df[k]
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=6)
mean_values[k] = reg.params[0]
t_values[k] = reg.tvalues[0]
pd.DataFrame([mean_values.values(),t_values.values()],index=['ret_mean','t_values'],columns=mean_values.keys())
cols = ['beta','size','bm','mom','rev','illiq']
temp = ret_df.groupby('ret_date').apply(fm_reg, cols=cols)
reg_result_df = pd.DataFrame(temp.values.tolist())
reg_result_df.index=temp.index
reg_result_df.columns = ['intercept'] + cols
reg_result_df.dropna(inplace=True)
# Mean of coefs with NW adjustment
mean_values = {}
t_values = {}
for k in reg_result_df.columns:
y = reg_result_df[k]
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=6)
mean_values[k] = reg.params[0]
t_values[k] = reg.tvalues[0]
pd.DataFrame([mean_values.values(),t_values.values()],index=['ret_mean','t_values'],columns=mean_values.keys())
illiq_df = factor(ret_df, sort1='illiq', long_only=False)
illiq_df
factors_df = pd.read_pickle('./output_data/factors/ff3_rev.pkl')
factors_df.index = factors_df.index.to_period('M')
factors_df = pd.merge(factors_df,illiq_df,on='ret_date')
factors_df
rf_df = ret_df[['ret_date','rf']].drop_duplicates().sort_values('ret_date').dropna()
factors_df = pd.merge(rf_df,factors_df,on='ret_date').set_index('ret_date')
((factors_df+1).cumprod()*100).plot()
((factors_df['2015':]+1).cumprod()*100).plot()
(np.log(1 + factors_df).cumsum()*100).plot()
illiq_long_df = factor(ret_df, sort1='illiq')
illiq_long_df
factors_long_df = pd.read_pickle('./output_data/factors/ff3_rev_long_only.pkl')
factors_long_df.index = factors_long_df.index.to_period('M')
factors_long_df = pd.merge(factors_long_df,illiq_long_df,on='ret_date')
factors_long_df = pd.merge(rf_df,factors_long_df,on='ret_date').set_index('ret_date')
factors_long_df.rename(columns={'illiq':'illiq_long'},inplace=True)
factors_long_df
((factors_long_df+1).cumprod()*100).plot()
((1 + factors_long_df['2020':]).cumprod()*100).plot()
stk_df
vol_df = stk_df.groupby(['secID','ym'])['ret_daily'].std()
vol_df
vol_df = vol_df.to_frame()
vol_df.columns = ['vol']
vol_df.reset_index(inplace=True)
MIN_NOBS = 15
idx = stk_df.groupby(['secID','ym'],as_index=False)['tradeDate'].count()['tradeDate'] < MIN_NOBS
vol_df.loc[idx, 'vol'] = np.nan
vol_df
下部分需要用到日度因子收益率,先运行 5-factors.ipynb
Idiosyncratic volatility 指的是被已有的因子收益率回归之后,剩下的残差的波动率。传统金融理论认为残差部分属于可分散的风险,不应该被定价。即使认为因子模型还不够好,残差部分有剩余未被解释的系统性风险的部分,这部分的风险溢价(factor risk premium)也应该是正的。然而,美国市场的数据发现,波动率越低,未来的收益越高。这是一个很有意思的异象。
最广为人知的文献是 Ang, Hodrick, Xing, and Zhang (2006)。
我们每个月用因子模型计算一下ivol,用来排序。
pd.read_pickle('./output_data/factors/factors_daily.pkl')
shibor1d = DataAPI.MktIborGet(secID="Shibor1D.IRCN",beginDate=START,endDate=END,field=['tradeDate','rate'],pandas="1")
shibor1d['tradeDate'] = pd.to_datetime(shibor1d['tradeDate'])
shibor1d['rate'] = shibor1d['rate'] * 0.01 / 365
shibor1d.rename(columns={'rate':'rf'},inplace=True)
factors_daily = pd.read_pickle('./output_data/factors/factors_daily.pkl')
illiq_daily = daily_factor(ret_df, stock_df=stk_df,sort1='illiq',long_only=False)
illiq_daily
illiq_daily.name = 'illiq'
factors_daily = pd.merge(factors_daily, illiq_daily, on='tradeDate')
factors_daily
def idiovol_np(df, factor_cols, len_periods='1m', min_nobs=15):
"""
df: year_month is set as index
"""
year_months = df.index.unique()
n = len(year_months)
idiovol = np.full(shape=n, fill_value=np.nan)
start_notnan_month = int(len_periods[:-1])-1
for i in range(start_notnan_month,n):
df_ = df.loc[str(year_months[i]-int(len_periods[:-1])+1) : str(year_months[i]),['exret']+factor_cols].dropna().to_numpy()
if len(df_) < min_nobs:
continue
else:
y_ = df_[:,0] #
X_ = df_[:,1:]
reg = LinearRegression().fit(y=y_, X=X_)
res = y_ - reg.predict(X_)
idiovol[i] = np.std(res)
idiovol_df = pd.DataFrame({'ym': year_months, f'idiovol{len_periods}': idiovol})
return idiovol_df
def idiovol(df,factor_cols,min_nobs=15):
df = df.dropna()
y = df.loc[:,'exret']
X = df.loc[:,factor_cols]
nobs = len(X)
if nobs < min_nobs:
idiovol = np.nan
else:
reg = LinearRegression().fit(y=y,X=X)
res = y - reg.predict(X)
idiovol = np.std(res)
return idiovol
reg_df = pd.merge(stk_df[['secID','tradeDate','ret_daily']],factors_daily,
on=['tradeDate'])
reg_df
reg_df.sort_values(['secID','tradeDate'],inplace=True)
reg_df
reg_df['exret'] = reg_df['ret_daily'] - reg_df['rf']
reg_df['ym'] = reg_df['tradeDate'].dt.to_period('M')
# Testing the time consuming
ids = reg_df['secID'].unique()
ids_ = np.random.choice(ids, 50)
temp = reg_df[reg_df['secID'].isin(ids_)]
temp.set_index('ym',inplace=True)
temp
%%time
result1 = temp.groupby(['secID','ym']).apply(idiovol, factor_cols=['exmktret','size','bm','mom','rev','illiq'])
%%time
result2 = temp.groupby(['secID']).apply(idiovol_np, factor_cols=['exmktret','size','bm','mom','rev','illiq'])
result1
result2
result1 = result1.to_frame().reset_index()
result1.columns = ['secID','ym','r1']
result2 = result2.reset_index().drop('level_1',axis=1)
result = pd.merge(result1, result2, on=['secID','ym'])
result
(result['r1'] - result['idiovol1m']).sum()
result['r1'].to_numpy()
result['idiovol1m'].to_numpy()
np.array_equal(result['r1'].to_numpy(), result['idiovol1m'].to_numpy())
np.allclose(result['r1'].to_numpy(), result['idiovol1m'].to_numpy())
result.fillna(0, inplace=True)
np.array_equal(result['r1'].to_numpy(), result['idiovol1m'].to_numpy())
np.allclose(result['r1'].to_numpy(), result['idiovol1m'].to_numpy())
reg_df.set_index('ym',inplace=True)
reg_df
reg_df['secID'].nunique()
# %%time
# ivol = reg_df.groupby(['secID']).apply(idiovol_np, factor_cols=['exmktret','size','bm','mom','rev','illiq'])
# # Takes about 32min to execute
# ivol = ivol.reset_index().drop('level_1',axis=1)
# ivol
# ivol.to_pickle('./data/idiovol1m.pkl')
ivol = pd.read_pickle('./data/idiovol1m.pkl')
vol_df = pd.merge(vol_df,ivol,on=['secID','ym'])
vol_df
vol_df.loc[vol_df['ym']>='2018',['ym','vol']].boxplot(by='ym',showmeans=True)
vol_df['vol_clip'] = vol_df['vol'].clip(lower=vol_df['vol'].quantile(0.001),
upper=vol_df['vol'].quantile(0.999))
vol_df['ivol_clip'] = vol_df['idiovol1m'].clip(lower=vol_df['idiovol1m'].quantile(0.001),
upper=vol_df['idiovol1m'].quantile(0.999))
vol_df.loc[vol_df['ym']>='2018',['ym','vol_clip']].boxplot(by='ym',showmeans=True,rot=45)
vol_df.loc[vol_df['ym']>='2018',['ym','ivol_clip']].boxplot(by='ym',showmeans=True,rot=45)
vol_df.loc[:,'vol':'ivol_clip'].describe()
vol_df.rename(columns={'idiovol1m':'ivol'},inplace=True)
ret_df = pd.merge(ret_df,vol_df,
on=['secID','ym'])
ret_df
def single_sort(df, sort):
q = dict()
keys = ['q'+str(i) for i in range(1, 10)]
values = np.arange(0.1, 1.0, 0.1)
q.update(zip(keys,values))
quantile_df = pd.DataFrame()
for key, value in q.items():
quantile_df[key] = df.groupby(['ym'])[sort].quantile(value)
df_q = pd.merge(df, quantile_df, on='ym')
portfolios = dict()
drop_cols = [col for col in df_q.columns if col[0]=='q']
portfolios['p1'] = df_q.loc[df_q[sort] <= df_q['q1']].copy().drop(drop_cols, axis=1)
for i in range(2,10):
idx = (df_q[f'q{i-1}'] <= df_q[sort]) & (df_q[sort] <= df_q[f'q{i}'])
portfolios[f'p{i}'] = df_q.loc[idx].copy().drop(drop_cols, axis=1)
portfolios['p10'] = df_q.loc[df_q[sort] >= df_q['q9']].copy().drop(drop_cols, axis=1)
portfolios_crs_mean = dict()
for k in portfolios.keys():
portfolios_crs_mean[k] = portfolios[k].groupby(['ret_date'])['exret'].mean()
mean_values = {}
t_values = {}
for k in portfolios_crs_mean.keys():
y = portfolios_crs_mean[k]
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=6)
mean_values[k] = reg.params[0]
t_values[k] = reg.tvalues[0]
# Portfolio 10-1
y = portfolios_crs_mean['p10'] - portfolios_crs_mean['p1']
const = np.full(shape=len(y), fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=6)
mean_values['p10-p1'] = reg.params[0]
t_values['p10-p1'] = reg.tvalues[0]
display(pd.DataFrame([mean_values.values(),t_values.values()],index=['mean','t-value'],
columns=mean_values.keys()))
single_sort(ret_df, 'vol')
portfolios = double_sort(ret_df, sort1='vol')
mean_portfolios_ret = dict()
for pf in portfolios.keys():
mean_portfolios_ret[pf] = portfolios[pf].groupby('ret_date')['exret'].mean()
print(mean_portfolios_ret[pf].shape) # print 看一下会不会存在某个月份上没有vol和size分组没有任何交叉
# Fast merge by stacking
mean_portfolios_ret_df = pd.DataFrame(np.vstack([pf for pf in mean_portfolios_ret.values()])).T
mean_portfolios_ret_df.columns = mean_portfolios_ret.keys()
mean_portfolios_ret_df.index = mean_portfolios_ret['vol1_size1'].index
# Newey-West adjustment
mean_values = {}
t_values = {}
for k in mean_portfolios_ret.keys():
y = mean_portfolios_ret[k]
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=4)
mean_values[k] = reg.params[0]
t_values[k] = reg.tvalues[0]
pd.DataFrame([mean_values.values(),t_values.values()],index=['ret_mean','t_values'],columns=mean_values.keys())
cols = ['beta','size','bm','mom','rev','illiq','vol']
temp = ret_df.groupby('ret_date').apply(fm_reg, cols=cols)
reg_result_df = pd.DataFrame(temp.values.tolist())
reg_result_df.index=temp.index
reg_result_df.columns = ['intercept'] + cols
reg_result_df.dropna(inplace=True)
# Mean of coefs with NW adjustment
mean_values = {}
t_values = {}
for k in reg_result_df.columns:
y = reg_result_df[k]
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=6)
mean_values[k] = reg.params[0]
t_values[k] = reg.tvalues[0]
pd.DataFrame([mean_values.values(),t_values.values()],index=['ret_mean','t_values'],columns=mean_values.keys())
cols = ['beta','size','bm','mom','rev','illiq','vol_clip']
temp = ret_df.groupby('ret_date').apply(fm_reg, cols=cols)
reg_result_df = pd.DataFrame(temp.values.tolist())
reg_result_df.index=temp.index
reg_result_df.columns = ['intercept'] + cols
reg_result_df.dropna(inplace=True)
# Mean of coefs with NW adjustment
mean_values = {}
t_values = {}
for k in reg_result_df.columns:
y = reg_result_df[k]
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=6)
mean_values[k] = reg.params[0]
t_values[k] = reg.tvalues[0]
pd.DataFrame([mean_values.values(),t_values.values()],index=['ret_mean','t_values'],columns=mean_values.keys())
single_sort(ret_df, 'ivol')
portfolios = double_sort(ret_df, sort1='ivol')
mean_portfolios_ret = dict()
for pf in portfolios.keys():
mean_portfolios_ret[pf] = portfolios[pf].groupby('ret_date')['exret'].mean()
print(mean_portfolios_ret[pf].shape) # print 看一下会不会存在某个月份上没有vol和size分组没有任何交叉
# Fast merge by stacking
mean_portfolios_ret_df = pd.DataFrame(np.vstack([pf for pf in mean_portfolios_ret.values()])).T
mean_portfolios_ret_df.columns = mean_portfolios_ret.keys()
mean_portfolios_ret_df.index = mean_portfolios_ret['ivol1_size1'].index
# Newey-West adjustment
mean_values = {}
t_values = {}
for k in mean_portfolios_ret.keys():
y = mean_portfolios_ret[k]
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=4)
mean_values[k] = reg.params[0]
t_values[k] = reg.tvalues[0]
pd.DataFrame([mean_values.values(),t_values.values()],index=['ret_mean','t_values'],columns=mean_values.keys())
cols = ['beta','size','bm','mom','rev','illiq','ivol_clip']
temp = ret_df.groupby('ret_date').apply(fm_reg, cols=cols)
reg_result_df = pd.DataFrame(temp.values.tolist())
reg_result_df.index=temp.index
reg_result_df.columns = ['intercept'] + cols
reg_result_df.dropna(inplace=True)
# Mean of coefs with NW adjustment
mean_values = {}
t_values = {}
for k in reg_result_df.columns:
y = reg_result_df[k]
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=6)
mean_values[k] = reg.params[0]
t_values[k] = reg.tvalues[0]
pd.DataFrame([mean_values.values(),t_values.values()],index=['ret_mean','t_values'],columns=mean_values.keys())
cols = ['beta','size','bm','mom','rev','illiq','ivol_clip']
temp = ret_df[ret_df['ret_date']>='2015'].groupby('ret_date').apply(fm_reg, cols=cols)
reg_result_df = pd.DataFrame(temp.values.tolist())
reg_result_df.index=temp.index
reg_result_df.columns = ['intercept'] + cols
reg_result_df.dropna(inplace=True)
# Mean of coefs with NW adjustment
mean_values = {}
t_values = {}
for k in reg_result_df.columns:
y = reg_result_df[k]
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=6)
mean_values[k] = reg.params[0]
t_values[k] = reg.tvalues[0]
pd.DataFrame([mean_values.values(),t_values.values()],index=['ret_mean','t_values'],columns=mean_values.keys())
cols = ['beta','size','bm','mom','rev','illiq','ivol_clip']
temp = ret_df[ret_df['ret_date']>='2018'].groupby('ret_date').apply(fm_reg, cols=cols)
reg_result_df = pd.DataFrame(temp.values.tolist())
reg_result_df.index=temp.index
reg_result_df.columns = ['intercept'] + cols
reg_result_df.dropna(inplace=True)
# Mean of coefs with NW adjustment
mean_values = {}
t_values = {}
for k in reg_result_df.columns:
y = reg_result_df[k]
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=6)
mean_values[k] = reg.params[0]
t_values[k] = reg.tvalues[0]
pd.DataFrame([mean_values.values(),t_values.values()],index=['ret_mean','t_values'],columns=mean_values.keys())
ivol_factor = factor(ret_df, sort1='ivol',long_high=False,long_only=False)
ivol_factor
factors_df = pd.merge(factors_df,ivol_factor,on='ret_date',how='left')
ax = ((factors_df+1).cumprod()*100).plot(grid=True)
ax.set_facecolor("#fcf9eb")
ax.set_xlabel("")
fig = ax.get_figure()
fig.savefig('quant.png',bbox_inches='tight')
((factors_df['2018':]+1).cumprod()*100).plot()
factors_df.to_pickle('./output_data/factors/factors_all.pkl')
ret_df
ret_df.to_pickle('./output_data/factor_exposure/all_exposure.pkl')
cols = ['size','bm','rev','illiq','ivol']
ret_df[['secID','tradeDate','rev']].dropna().pivot(index='tradeDate',
columns='secID',values='rev')
temp = ret_df[['secID','tradeDate','size']].dropna().pivot(index='tradeDate',
columns='secID',values='size').reset_index()
temp['tradeDate'] = temp['tradeDate'].dt.strftime('%Y/%m/%d')
temp.set_index('tradeDate')
for c in cols:
temp = ret_df[['secID','tradeDate',c]].dropna().pivot(index='tradeDate',columns='secID',values=c)
temp.reset_index(inplace=True)
temp['tradeDate'] = temp['tradeDate'].dt.strftime('%Y/%m/%d')
temp.set_index('tradeDate',inplace=True)
temp.to_csv(f'./output_data/factor_exposure/{c}_exposure.csv')
factors_daily = pd.read_pickle('./output_data/factors/factors_daily_long_only.pkl')
illiq_daily = daily_factor(ret_df, stock_df=stk_df,sort1='illiq',long_only=True)
illiq_daily.name = 'illiq_long'
factors_daily = pd.merge(factors_daily, illiq_daily, on='tradeDate')
reg_df = pd.merge(stk_df[['secID','tradeDate','ret_daily']],factors_daily,
on=['tradeDate'])
reg_df.sort_values(['secID','tradeDate'],inplace=True)
reg_df['exret'] = reg_df['ret_daily'] - reg_df['rf']
reg_df['ym'] = reg_df['tradeDate'].dt.to_period('M')
reg_df.set_index('ym',inplace=True)
reg_df
gc.collect()
import sys
def sizeof_fmt(num, suffix='B'):
''' by Fred Cirera, https://stackoverflow.com/a/1094933/1870254, modified'''
for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
if abs(num) < 1024.0:
return "%3.1f %s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f %s%s" % (num, 'Yi', suffix)
for name, size in sorted(((name, sys.getsizeof(value)) for name, value in locals().items()),
key= lambda x: -x[1])[:10]:
print("{:>30}: {:>8}".format(name, sizeof_fmt(size)))
del stk_unfilled_df, beta_df
del _119
del _121, _144, __, _186, _102
# %%time
# ivol = reg_df.groupby(['secID']).apply(idiovol_np, factor_cols=['exmktret','size_long','bm_long','mom_long','rev_long','illiq_long'])
# # Takes about 32min 25s to execute
# ivol = ivol.reset_index().drop('level_1',axis=1)
# display(ivol)
# ivol.to_pickle('./data/idiovol1m_long_only.pkl')
vol_df = stk_df.groupby(['secID','ym'])['ret_daily'].std()
vol_df = vol_df.to_frame()
vol_df.columns = ['vol']
vol_df.reset_index(inplace=True)
MIN_NOBS = 15
idx = stk_df.groupby(['secID','ym'],as_index=False)['tradeDate'].count()['tradeDate'] < MIN_NOBS
vol_df.loc[idx, 'vol'] = np.nan
ivol = pd.read_pickle('./data/idiovol1m_long_only.pkl')
vol_df = pd.merge(vol_df,ivol,on=['secID','ym'])
vol_df['vol_clip'] = vol_df['vol'].clip(lower=vol_df['vol'].quantile(0.001),
upper=vol_df['vol'].quantile(0.999))
vol_df['ivol_clip'] = vol_df['idiovol1m'].clip(lower=vol_df['idiovol1m'].quantile(0.001),
upper=vol_df['idiovol1m'].quantile(0.999))
vol_df.rename(columns={'idiovol1m':'ivol'},inplace=True)
try:
ret_df.drop(['vol','ivol','vol_clip','ivol_clip'],axis=1,inplace=True)
except KeyError:
pass
ret_df = pd.merge(ret_df,vol_df,
on=['secID','ym'])
ret_df
# ivol.loc[ivol['ym']=='2015-09'].describe()
# temp = ivol.loc[ivol['ym']=='2015-09']
# temp.loc[temp['idiovol1m'].idxmin()]
# temp['secID']
# temp.loc[temp['idiovol1m'] <= temp['idiovol1m'].quantile(0.11)]
# ret_df.loc[ret_df['ret_date']=='2015-10']
# q = dict()
# keys = ['q'+str(i) for i in range(1, 10)]
# values = np.arange(0.1, 1.0, 0.1)
# q.update(zip(keys,values))
# quantile_df = pd.DataFrame()
# for key, value in q.items():
# quantile_df[key] = ret_df.groupby(['ym'])['ivol'].quantile(value)
# df_q = pd.merge(ret_df, quantile_df, on='ym')
# portfolios = dict()
# drop_cols = [col for col in df_q.columns if col[0]=='q']
# portfolios['p1'] = df_q.loc[df_q['ivol'] <= df_q['q1']].copy().drop(drop_cols, axis=1)
# for i in range(2,10):
# idx = (df_q[f'q{i-1}'] <= df_q['ivol']) & (df_q['ivol'] <= df_q[f'q{i}'])
# portfolios[f'p{i}'] = df_q.loc[idx].copy().drop(drop_cols, axis=1)
# portfolios['p10'] = df_q.loc[df_q['ivol'] >= df_q['q9']].copy().drop(drop_cols, axis=1)
# pf_n_stks.loc[pd.Period('2015-08'):pd.Period('2015-12')]
# ret_df.loc[ret_df['ym']=='2015-10']
# df_q.loc[df_q['ret_date']=='2015-10']
# pf_n_stks = pd.DataFrame()
# for key, value in portfolios.items():
# pf_n_stks[key] = portfolios[key].groupby('ret_date')['secID'].nunique()
# display(pf_n_stks)
# pf_n_stks.plot()
# portfolios['p1'].loc[portfolios['p1']['ret_date']=='2015-10']
# def single_sort(df, sort):
# q = dict()
# keys = ['q'+str(i) for i in range(1, 10)]
# values = np.arange(0.1, 1.0, 0.1)
# q.update(zip(keys,values))
# quantile_df = pd.DataFrame()
# for key, value in q.items():
# quantile_df[key] = df.groupby(['ym'])[sort].quantile(value)
# df_q = pd.merge(df, quantile_df, on='ym')
# portfolios = dict()
# drop_cols = [col for col in df_q.columns if col[0]=='q']
# portfolios['p1'] = df_q.loc[df_q[sort] <= df_q['q1']].copy().drop(drop_cols, axis=1)
# for i in range(2,10):
# idx = (df_q[f'q{i-1}'] <= df_q[sort]) & (df_q[sort] <= df_q[f'q{i}'])
# portfolios[f'p{i}'] = df_q.loc[idx].copy().drop(drop_cols, axis=1)
# portfolios['p10'] = df_q.loc[df_q[sort] >= df_q['q9']].copy().drop(drop_cols, axis=1)
# portfolios_crs_mean = dict()
# for k in portfolios.keys():
# portfolios_crs_mean[k] = portfolios[k].groupby(['ret_date'])['exret'].mean()
# mean_values = {}
# t_values = {}
# for k in portfolios_crs_mean.keys():
# y = portfolios_crs_mean[k]
# const = np.full(shape=len(y),fill_value=1)
# reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=6)
# mean_values[k] = reg.params[0]
# t_values[k] = reg.tvalues[0]
# # Portfolio 10-1
# y = portfolios_crs_mean['p10'] - portfolios_crs_mean['p1']
# const = np.full(shape=len(y), fill_value=1)
# reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=6)
# mean_values['p10-p1'] = reg.params[0]
# t_values['p10-p1'] = reg.tvalues[0]
# print(y.loc[y.isna()])
# # print(portfolios_crs_mean['p10'].loc[portfolios_crs_mean['p10'].isna()])
# # print(portfolios_crs_mean['p1'].loc[portfolios_crs_mean['p1'].isna()])
# print(portfolios_crs_mean['p10'].loc[pd.Period('2015-10')])
# # print(portfolios_crs_mean['p1'].loc[pd.Period('2015-10')])
# print(portfolios_crs_mean['p1'].loc[pd.Period('2015-09'):pd.Period('2015-12')])
# display(pd.DataFrame([mean_values.values(),t_values.values()],index=['mean','t-value'],
# columns=mean_values.keys()))
single_sort(ret_df, 'ivol_clip')
cols = ['beta','size','bm','mom','rev','illiq','ivol_clip']
temp = ret_df.groupby('ret_date').apply(fm_reg, cols=cols)
reg_result_df = pd.DataFrame(temp.values.tolist())
reg_result_df.index=temp.index
reg_result_df.columns = ['intercept'] + cols
reg_result_df.dropna(inplace=True)
# Mean of coefs with NW adjustment
mean_values = {}
t_values = {}
for k in reg_result_df.columns:
y = reg_result_df[k]
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HAC', maxlags=6)
mean_values[k] = reg.params[0]
t_values[k] = reg.tvalues[0]
pd.DataFrame([mean_values.values(),t_values.values()],index=['ret_mean','t_values'],columns=mean_values.keys())
ivol_factor = factor(ret_df, sort1='ivol',long_high=False,long_only=True)
factors_long_df = pd.merge(factors_long_df,ivol_factor,on='ret_date',how='left')
((factors_long_df+1).cumprod()*100).plot()
((factors_long_df['2018':]+1).cumprod()*100).plot()
factors_long_df.rename(columns={'ivol':'ivol_long'},inplace=True)
factors_long_df
factors_long_df.to_pickle('./output_data/factors/factors_all_long_only.pkl')