import numpy as np
import pandas as pd
import datetime as dt
pd.set_option('display.max_rows', 16)
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (16.0, 9.0)
import seaborn as sns
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression
import gc
import matplotlib.ticker as ticker
plt.rcParams['figure.figsize'] = (16.0, 9.0)
ret_df = pd.read_pickle('./output_data/factor_exposure/all_exposure.pkl')
ret_df
ic_illiq = ret_df.groupby('ret_date')[['exret','illiq']].corr(method='spearman')
ic_illiq.reset_index(inplace=True)
ic_illiq
ic_illiq = ic_illiq[ic_illiq['level_1'] != 'illiq'].drop(['exret','level_1'],axis=1)
ic_illiq
ic_illiq.set_index('ret_date',inplace=True)
# ic_illiq.index = ic_illiq.index.to_timestamp()
# fig, ax = plt.subplots()
# plt.bar(ic_illiq.index, ic_illiq['illiq'])
# plt.gca().xaxis.set_major_locator(dates.MonthLocator())
# plt.gca().xaxis.set_major_formatter(dates.DateFormatter("%b\n%Y"))
# # fig.autofmt_xdate()
# plt.show()
ax = ic_illiq.plot.bar(rot=45)
positions = [i if not i%4 else 0 for i in ax.get_xticks()]
positions.append(ax.get_xticks()[-1])
positions
ax.set_xticks(positions)
ax.set_xticklabels([t if not i%4 else "" for i,t in enumerate(ax.get_xticklabels())])
# ax.xaxis.set_major_locator(ticker.MultipleLocator(20))
# ax.xaxis.set_minor_locator(ticker.MultipleLocator(2.5))
plt.show()
ic_size = ret_df.groupby('ret_date')[['exret','size']].corr(method='spearman')
ic_size.reset_index(inplace=True)
ic_size = ic_size[ic_size['level_1'] != 'size'].drop(['exret','level_1'],axis=1)
ic_size.set_index('ret_date',inplace=True)
ax = ic_size.plot.bar(rot=45)
positions = [i if not i%4 else 0 for i in ax.get_xticks()]
positions.append(ax.get_xticks()[-1])
positions
ax.set_xticks(positions)
ax.set_xticklabels([t if not i%4 else "" for i,t in enumerate(ax.get_xticklabels())])
plt.show()
ic_rev = ret_df.groupby('ret_date')[['exret','rev']].corr(method='spearman')
ic_rev.reset_index(inplace=True)
ic_rev = ic_rev[ic_rev['level_1'] != 'rev'].drop(['exret','level_1'],axis=1)
ic_rev.set_index('ret_date',inplace=True)
ax = ic_rev.plot.bar(rot=45)
positions = [i if not i%4 else 0 for i in ax.get_xticks()]
positions.append(ax.get_xticks()[-1])
positions
ax.set_xticks(positions)
ax.set_xticklabels([t if not i%4 else "" for i,t in enumerate(ax.get_xticklabels())])
plt.show()
ic_rev = ret_df.groupby('ret_date')[['exret','mom']].corr(method='spearman')
ic_rev.reset_index(inplace=True)
ic_rev = ic_rev[ic_rev['level_1'] != 'mom'].drop(['exret','level_1'],axis=1)
ic_rev.set_index('ret_date',inplace=True)
ax = ic_rev.plot.bar(rot=45)
positions = [i if not i%4 else 0 for i in ax.get_xticks()]
positions.append(ax.get_xticks()[-1])
positions
ax.set_xticks(positions)
ax.set_xticklabels([t if not i%4 else "" for i,t in enumerate(ax.get_xticklabels())])
plt.show()
cols = ['beta','size','bm','mom','rev','illiq','ivol']
ic_df = pd.DataFrame()
for col in cols:
temp = ret_df.groupby('ret_date')[['exret',col]].corr(method='spearman')
temp.reset_index(inplace=True)
ic_df[col] = temp[temp['level_1'] != col].drop(['exret','level_1','ret_date'],axis=1)[col].values
ic_df.index = np.sort(ret_df['ret_date'].unique().dropna())
ic_df[cols].rolling(3).mean().plot(subplots=True,grid=True)
information ratio = IC.mean()/IC.std()
# Information ratio
ic_df.apply(lambda x: x.mean()/x.std())
START = '2007-01-01'
END = '2024-03-31'
# Security Id
fund_info = DataAPI.SecIDGet(assetClass="F",pandas="1")
fund_info
# fund_id = ['110022','009550','001938','009341',
# '000751','004997','009863','001714',
# '001410','004851','005827','110011',
# '260108','163402','320007','163417',
# '001511','161728','161005','161131']
# fund_id =["519212","519191","519185","001678",
# "004685","001437","519198","002049",
# "010761","000398","011891","001470",
# "166301","008488","006195","002601",
# "001959","011403","013142","001763"]
# fund_id = [id_+'.OFCN' for id_ in fund_id]
fund_id = ['014283', '001614', '016325', '001323', '001322', '001167', '210009', '014273', '290012', '016307', '014277', '210002', '162102', '004890', '620007', '014279', '217021', '010383', '005310', '004685']
len(fund_id)
fund_name = fund_info.loc[fund_info['ticker'].isin(fund_id),['ticker','secShortName']]
fund_name
fund_name.loc[~fund_name['secShortName'].str.contains('债')]
# fund_name = fund_name.loc[~fund_name['secShortName'].str.contains('债')].copy()
fund_name.loc[fund_name.duplicated('ticker',keep=False)] # keep='last'
fund_name = fund_name.drop_duplicates('ticker',keep='last')
fund_name
# Fund_NAV_Month.csv 从 CSMAR 下载
# 基金市场系列 -> 公募基金 -> 基金表现 -> 基金月净值文件
fund_df = pd.read_csv('./data/Fund_NAV_Month.csv',dtype={'Symbol':str})
fund_df
top_fund_df = fund_df[fund_df['Symbol'].isin(fund_id)].copy()
top_fund_df['Symbol'].nunique()
top_fund_df.rename(columns={'Symbol':'ticker','TradingMonth':'ret_date','ReturnNAV':'ret'},inplace=True)
top_fund_df = pd.merge(top_fund_df, fund_name, on='ticker')
top_fund_df = top_fund_df[['ret_date','ticker','secShortName','ret']].copy()
top_fund_df['ret_date'] = pd.to_datetime(top_fund_df['ret_date']).dt.to_period('M')
top_fund_df
# factor_ret = pd.read_pickle('./output_data/factors/factors_all.pkl')
factor_ret = pd.read_pickle('./output_data/factors/factors_all_long_only.pkl')
factor_ret
reg_df = pd.merge(factor_ret.reset_index(), top_fund_df, on='ret_date')
reg_df['exret'] = reg_df['ret']-reg_df['rf']
reg_df
# results = {}
# for fund in fund_id:
# df_ = reg_df[reg_df['ticker'] == fund].copy()
# X = df_[['exmktret','SMB','HML','rev','illiq','ivol']]
# y = df_['exret']
# X = sm.add_constant(X)
# reg = sm.OLS(y, X).fit().get_robustcov_results(cov_type='HAC', maxlags=4)
# results[fund] = pd.DataFrame([reg.params,reg.tvalues],columns=['alpha','exmktret','SMB','HML','rev','illiq','ivol'],
# index=['coefs','tvalues'])
results = {}
for fund in fund_id:
df_ = reg_df[reg_df['ticker'] == fund].copy()
if df_.shape[0] == 0:
continue
else:
X = df_[['exmktret','small_only','high_only','rev_long','illiq_long','ivol_long']]
y = df_['exret']
X = sm.add_constant(X)
reg = sm.OLS(y, X).fit().get_robustcov_results(cov_type='HAC', maxlags=4)
results[fund] = pd.DataFrame([reg.params,reg.tvalues],columns=['alpha','exmktret','small_only','high_only','rev_long','illiq_long','ivol_long'],
index=['coefs','tvalues'])
fund_id = list(set(fund_id).intersection(reg_df['ticker'].unique()))
for fund in fund_id[0:10]:
print(fund_name.loc[fund_name['ticker']==fund,'secShortName'].values)
display(results[fund])
print('#################'*5)
for fund in fund_id[10:20]:
print(fund_name.loc[fund_name['ticker']==fund,'secShortName'].values)
display(results[fund])