import pandas as pd
import numpy as np
from tqdm import tqdm
import gc
import matplotlib.pyplot as plt
import talib as ta
import datetime as dt
pd.set_option('display.max_rows', 16)
import statsmodels.api as sm
plt.rcParams['figure.figsize'] = (16.0, 9.0)
ta.__version__
START = '2007-01-01'
END = '2024-12-31'
index_info = DataAPI.SecIDGet(assetClass="IDX",pandas="1")
index_id = index_info[index_info['secShortName'].isin(['上证综指','深证综指','创业板指','沪深300','中证500','中证1000'])].drop_duplicates('secShortName').secID.values
index_df = DataAPI.MktIdxdGet(indexID=index_id,beginDate=START,endDate=END,field=['indexID','secShortName','tradeDate','openIndex','highestIndex','lowestIndex','closeIndex','turnoverVol','turnoverValue','CHGPct'],pandas="1")
index_df
# Security Id
stk_info = DataAPI.SecIDGet(assetClass="E",pandas="1")
cond1 = (stk_info['exchangeCD'] == 'XSHE') | (stk_info['exchangeCD'] == 'XSHG')
cond2 = (stk_info['listStatusCD'] == 'L') | (stk_info['listStatusCD'] == 'DE')
cond3 = stk_info['transCurrCD']=='CNY'
stk_info = stk_info[cond1 & cond2 & cond3].copy()
stk_id = stk_info['secID']
# ST
st_df = DataAPI.SecSTGet(beginDate=START,endDate=END,secID=stk_id,field=['secID','tradeDate','STflg'],pandas="1")
st_df['tradeDate'] = pd.to_datetime(st_df['tradeDate'],format="%Y-%m-%d")
# %%time
# # About 8 mins
# # # 从优矿下载股票信息,时间较长。由于优矿的限制,每次下载3年的数据
# stk_dict = {}
# begin_ = dt.datetime.strptime(START, '%Y-%m-%d').year
# end_ = dt.datetime.strptime(START, '%Y-%m-%d').year+3
# field = ["secID","tradeDate",'preClosePrice',"closePrice",'openPrice','highestPrice','lowestPrice',"negMarketValue","turnoverValue",'turnoverRate']
# while begin_ <= 2024:
# if begin_ == 2024:
# yesterday = dt.datetime.today() - dt.timedelta(days=1)
# yesterday.strftime('%Y%m%d')
# stk_dict[begin_] = DataAPI.MktEqudAdjAfGet(secID=stk_id,
# beginDate=f'{begin_}0101',
# endDate=yesterday,
# field=field,pandas="1")
# else:
# stk_dict[begin_] = DataAPI.MktEqudAdjAfGet(secID=stk_id,
# beginDate=f'{begin_}0101',
# endDate=f'{end_}1231',
# field=field,pandas="1")
# begin_ = end_ + 1
# end_ = begin_ + 3
# for i in range(len(stk_dict)):
# stk_df = pd.DataFrame(np.vstack([_df for _df in stk_dict.values()]),columns=field)
# stk_df.to_pickle('./data/stk_df.pkl')
# %%time
# stk_df = DataAPI.MktEqudAdjAfGet(secID=stk_id,beginDate=START,endDate=END,isOpen=1,
# field=["secID","tradeDate",
# 'preClosePrice',"closePrice",
# 'openPrice','highestPrice','lowestPrice',
# "negMarketValue",
# "turnoverValue",'turnoverRate'],pandas="1")
# stk_df.to_pickle('./data/stk_df.pkl')
# Takes about 6 mins
stk_df = pd.read_pickle('./data/stk_df.pkl')
stk_df.info()
num_cols = ['preClosePrice','closePrice','openPrice','highestPrice','lowestPrice',
'negMarketValue','turnoverValue','turnoverRate']
for col in num_cols:
stk_df[col] = pd.to_numeric(stk_df[col])
stk_df['tradeDate'] = pd.to_datetime(stk_df['tradeDate'], format='%Y-%m-%d')
stk_df.sort_values(['secID','tradeDate'],inplace=True)
# drop ST stocks
print(stk_df.shape)
stk_df = pd.merge(stk_df, st_df, on=['secID','tradeDate'],how='left')
stk_df = stk_df[stk_df['STflg'].isna()].copy()
stk_df.drop('STflg',axis=1,inplace=True)
print(stk_df.shape)
不填充停牌值比较合理,因为技术分析只看量价,直接计算量价关系较为合适
hs300_df = index_df[index_df['secShortName']=='沪深300'].reset_index(drop=True)
hs300_df.rename(columns={'CHGPct':'close_ret'},inplace=True)
hs300_df['open_ret'] = hs300_df['openIndex']/hs300_df['openIndex'].shift()-1
hs300_df
hs300_df.info()
hs300_df['close_ret_demean'] = hs300_df['close_ret'] - hs300_df['close_ret'].mean()
hs300_df['open_ret_demean'] = hs300_df['open_ret'] - hs300_df['open_ret'].mean()
(hs300_df['close_ret']- hs300_df['close_ret_demean']).describe()
hs300_cols = hs300_df.columns
ta.SMA?
ta.SMA(hs300_df['closeIndex'], 5)
hs300_df['closeIndex'].rolling(5).mean()
MA_df = hs300_df.copy()
MA_df['MA30'] = ta.SMA(MA_df['closeIndex'], 30)
MA_df[['tradeDate','closeIndex','MA30']].set_index('tradeDate').loc[:'2007-12'].plot()
Case 1:
Case 2:
MA_df['signal'] = np.nan
MA_df.loc[MA_df['closeIndex'] > MA_df['MA30'], 'signal'] = 1
MA_df.loc[MA_df['closeIndex'] < MA_df['MA30'], 'signal'] = 0
MA_df[~MA_df['signal'].isna()]
MA_df['position_close'] = MA_df['signal'] # 第一天收盘价生成signal后,立即对应于第一天收盘价形成的头寸
MA_df['position_open'] = MA_df['signal'].shift() # 第一天用收盘价生成signal。对应于第二天的开盘价形成的头寸
MA_df['position_close_ret'] = MA_df['position_close'].shift() * MA_df['close_ret']
MA_df['position_open_ret'] = MA_df['position_open'].shift() * MA_df['open_ret']
MA_df['position_close_ret_demean'] = MA_df['position_close'].shift() * MA_df['close_ret_demean']
MA_df['position_open_ret_demean'] = MA_df['position_open'].shift() * MA_df['open_ret_demean']
MA_df['MA30_close_cumret'] = (MA_df['position_close_ret']+1).cumprod()
MA_df['MA30_open_cumret'] = (MA_df['position_open_ret']+1).cumprod()
MA_df['signal'].unique()
MA_df[MA_df['signal']==0]
MA_df.loc[96:99]
3511.43/3803.95 - 1
3407.00 / 3804.96 - 1
## Example
temp = MA_df.loc[99:113,['tradeDate','openIndex','closeIndex','close_ret','open_ret','signal',
'position_close','position_open','position_close_ret','position_open_ret',
'position_close_ret_demean','position_open_ret_demean']].copy()
temp['MA30_close_cumret'] = (temp['position_close_ret']+1).cumprod()
temp['MA30_open_cumret'] = (temp['position_open_ret']+1).cumprod()
display(temp)
# close
print(3877.59 / 3802.30)
# open
print(3804.41 / 3814.19)
MA30_ret_df = MA_df[['tradeDate','openIndex','closeIndex','open_ret','close_ret','MA30',
'signal','position_close','position_open','position_close_ret','position_open_ret',
'position_close_ret_demean','position_open_ret_demean',
'MA30_close_cumret','MA30_open_cumret']].copy()
MA30_ret_df.set_index('tradeDate',inplace=True)
# Close price cumret
fig, axes = plt.subplots(3,1)
MA30_ret_df[['closeIndex','MA30']].plot(ax=axes[0],grid=True)
MA30_ret_df[['position_close']].plot(ax=axes[1],grid=True)
MA30_ret_df[['MA30_close_cumret']].plot(ax=axes[2],grid=True)
# open price cumret
fig, axes = plt.subplots(3,1)
MA30_ret_df[['openIndex','MA30']].plot(ax=axes[0],grid=True)
MA30_ret_df[['position_open']].plot(ax=axes[1], grid=True)
MA30_ret_df[['MA30_open_cumret']].plot(ax=axes[2], grid=True)
hs300_df
MA_df = hs300_df.copy()
ma_length = 20
MA_df[f'MA{ma_length}'] = ta.SMA(MA_df['closeIndex'], ma_length)
MA_df['signal'] = 0
ndays = MA_df.shape[0]
MA_df.loc[MA_df['closeIndex'] > MA_df[f'MA{ma_length}'], 'signal'] = 1
MA_df['open_ret'] = MA_df['openIndex']/MA_df['openIndex'].shift()-1
MA_df['position_close'] = MA_df['signal']
MA_df['position_open'] = MA_df['signal'].shift()
MA_df.rename(columns={'CHGPct':'close_ret'},inplace=True)
MA_df['position_close'] = MA_df['signal']
MA_df['position_close_ret'] = MA_df['position_close'].shift() * MA_df['close_ret']
MA_df['position_open_ret'] = MA_df['position_open'].shift() * MA_df['open_ret']
MA_df['position_close_ret_demean'] = MA_df['position_close'].shift() * MA_df['close_ret_demean']
MA_df['position_open_ret_demean'] = MA_df['position_open'].shift() * MA_df['open_ret_demean']
MA_df[f'MA{ma_length}_close_cumret'] = (MA_df['position_close_ret']+1).cumprod()
MA_df[f'MA{ma_length}_open_cumret'] = (MA_df['position_open_ret']+1).cumprod()
MA_ret_df = MA_df[['tradeDate','openIndex','closeIndex','open_ret','close_ret',f'MA{ma_length}',
'signal','position_close','position_open','position_close_ret','position_open_ret',
'position_close_ret_demean','position_open_ret_demean',
f'MA{ma_length}_close_cumret',f'MA{ma_length}_open_cumret']].copy()
MA_ret_df.set_index('tradeDate',inplace=True)
# Close price cumret
fig, axes = plt.subplots(3,1)
MA_ret_df[['closeIndex',f'MA{ma_length}']].plot(ax=axes[0],grid=True)
MA_ret_df[['position_close']].plot(ax=axes[1],grid=True)
MA_ret_df[[f'MA{ma_length}_close_cumret']].plot(ax=axes[2],grid=True)
# open price cumret
fig, axes = plt.subplots(3,1)
MA_ret_df[['openIndex',f'MA{ma_length}']].plot(ax=axes[0],grid=True)
MA_ret_df[['position_open']].plot(ax=axes[1],grid=True)
MA_ret_df[[f'MA{ma_length}_open_cumret']].plot(ax=axes[2],grid=True)
ta.EMA?
Smoothing = 2
ema_length = 20
EMA_df = hs300_df.copy()
EMA_df['EMA'] = ta.EMA(EMA_df['closeIndex'], ema_length)
EMA_df[['tradeDate','closeIndex','EMA']].set_index('tradeDate').plot()
EMA_df['EMA'] = ta.EMA(EMA_df['closeIndex'], ema_length)
EMA_df['signal'] = 0
EMA_df.loc[EMA_df['closeIndex'] > EMA_df['EMA'], 'signal'] = 1
EMA_df['open_ret'] = EMA_df['openIndex']/EMA_df['openIndex'].shift()-1
EMA_df['position_close'] = EMA_df['signal']
EMA_df['position_open'] = EMA_df['signal'].shift()
EMA_df.rename(columns={'CHGPct':'close_ret'},inplace=True)
EMA_df['position_close_ret'] = EMA_df['position_close'].shift() * EMA_df['close_ret']
EMA_df['position_open_ret'] = EMA_df['position_open'].shift() * EMA_df['open_ret']
EMA_df['position_close_ret_demean'] = EMA_df['position_close'].shift() * EMA_df['close_ret_demean']
EMA_df['position_open_ret_demean'] = EMA_df['position_open'].shift() * EMA_df['open_ret_demean']
EMA_df['EMA_close_cumret'] = (EMA_df['position_close_ret']+1).cumprod()
EMA_df['EMA_open_cumret'] = (EMA_df['position_open_ret']+1).cumprod()
EMA_ret_df = EMA_df[['tradeDate','openIndex','closeIndex','open_ret','close_ret','EMA',
'signal','position_close','position_open','position_close_ret','position_open_ret',
'position_close_ret_demean','position_open_ret_demean',
'EMA_close_cumret','EMA_open_cumret']].copy()
EMA_ret_df.set_index('tradeDate',inplace=True)
# open price cumret
fig, axes = plt.subplots(3,1)
EMA_ret_df[['openIndex','EMA']].plot(ax=axes[0], grid=True)
EMA_ret_df[['position_open']].plot(ax=axes[1], grid=True)
EMA_ret_df[['EMA_open_cumret']].plot(ax=axes[2], grid=True)
EMA_ret_df
def rule_return(df, demean=True, open_ret=True):
"""
df should contain these columns:
signal: the signal generated by the rule
close_ret: return calculated by close price
open_ret: return calculated by open price
close_ret_demean is demeaned return of close_ret, i.e. close_ret - close_ret.mean.
open_ret_demean is similarly defined. The use of demeaned return series is to adjust the
bias created by bullish or bearish markets.
"""
df['close_ret_demean'] = df['close_ret'] - df['close_ret'].mean()
df['open_ret_demean'] = df['open_ret'] - df['open_ret'].mean()
df['position_close'] = df['signal']
df['position_open'] = df['signal'].shift()
df['position_close_ret'] = df['position_close'].shift() * df['close_ret']
df['position_open_ret'] = df['position_open'].shift() * df['open_ret']
df['position_close_ret_demean'] = df['position_close'].shift() * df['close_ret_demean']
df['position_open_ret_demean'] = df['position_open'].shift() * df['open_ret_demean']
df['close_cumret'] = (df['position_close_ret']+1).cumprod()
df['open_cumret'] = (df['position_open_ret']+1).cumprod()
if open_ret:
if demean:
return pd.DataFrame({'tradeDate':df['tradeDate'].values,
'position_open_ret_demean':df['position_open_ret_demean'].values,
'open_cumret':df['open_cumret'].values})
else:
return pd.DataFrame({'tradeDate':df['tradeDate'].values,
'position_open_ret':df['position_open_ret'].values,
'open_cumret':df['open_cumret'].values})
else:
if demean:
return pd.DataFrame({'tradeDate':df['tradeDate'].values,
'position_close_ret_demean':df['position_close_ret_demean'].values,
'close_cumret':df['close_cumret'].values})
else:
return pd.DataFrame({'tradeDate':df['tradeDate'].values,
'position_close_ret':df['position_close_ret'].values,
'close_cumret':df['close_cumret'].values})
ema_length = 20
stk_df
stk_df['EMA'] = stk_df.groupby('secID')['closePrice'].apply(ta.EMA, 20)
stk_df.drop(stk_df.loc[stk_df['openPrice']==0].index, inplace=True) # drop 停牌但有收盘价的数据
stk_df.loc[stk_df['openPrice']==0]
stk_df['open_ret'] = stk_df.groupby('secID')['openPrice'].apply(lambda x: x / x.shift() - 1)
stk_df['close_ret'] = stk_df['closePrice']/stk_df['preClosePrice'] - 1
stk_df['signal'] = 0
stk_df.loc[stk_df['closePrice'] > stk_df['EMA'], 'signal'] = 1
stk_df
%%time
rule_ret_df = stk_df.groupby('secID').apply(rule_return)
rule_ret_df.reset_index(inplace=True)
rule_ret_df.drop('level_1',axis=1,inplace=True)
rule_ret_df
rule_cumret_by_crs = rule_ret_df.groupby('secID')['open_cumret'].last()
rule_cumret_by_crs.describe()
rule_cumret_by_crs.hist(bins=200)
rule_cumret_by_crs.dropna(inplace=True)
y = rule_cumret_by_crs.values
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y-const, const).fit().get_robustcov_results(cov_type='HC0')
mean_values = reg.params[0]
t_values = reg.tvalues[0]
pd.DataFrame([mean_values,t_values],index=['ret_mean','t_values'],columns=['rule_cumret'])
rule_cumret_by_crs.dropna(inplace=True)
y = rule_cumret_by_crs.values
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit()
print(reg.t_test('const = 1'))
平均年化收益:
# # 1.12/(2022-2007+1)
# 2.12**(1/(2022-2007+1)) - 1
1.8689**(1/(2023-2007+1)) - 1
这个统计检验不够好:
# time-series mean of daily return
rule_tsmean_ret_by_crs = rule_ret_df.groupby('secID')['position_open_ret_demean'].mean()
rule_tsmean_ret_by_crs
temp = stk_df[stk_df['secID']==np.random.choice(stk_df['secID'].unique(),1)[0]].copy()
temp['signal'] = 0
temp.loc[temp['closePrice'] > temp['EMA'], 'signal'] = 1
display(temp)
rule_return(temp)['position_open_ret_demean'].mean()
rule_tsmean_ret_by_crs['002976.XSHE']
rule_tsmean_ret_by_crs.dropna(inplace=True)
y = rule_tsmean_ret_by_crs.values
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HC0')
mean_values = reg.params[0]
t_values = reg.tvalues[0]
pd.DataFrame([mean_values,t_values],index=['ret_mean','t_values'],columns=['rule_daily_ret'])
EMA_ret_df
rule_ret_series = EMA_ret_df['position_open_ret_demean'].dropna() # position_open_ret_demean: (raw return demeaned)*position
rule_ret_series_for_bootstrap = rule_ret_series - rule_ret_series.mean() # demean here: H0: the average of the rule's return is zero
n_sample = rule_ret_series.shape[0]
n_boostrap = 1000
n_sample
rule_ret_series.mean()
rule_ret_mean_distr = []
for i in range(n_boostrap):
rule_ret_mean_distr.append(np.random.choice(rule_ret_series_for_bootstrap, n_sample).mean())
rule_ret_mean_distr = pd.Series(rule_ret_mean_distr)
rule_ret_mean_distr.hist(bins=50)
(rule_ret_mean_distr > rule_ret_series.mean()).sum()
(rule_ret_mean_distr > rule_ret_series.mean()).sum() / n_boostrap
np.mean(rule_ret_series)
def white_reality_test(rule_ret, n_boostrap=1000):
n_sample = len(rule_ret)
if n_sample < 100:
return None
else:
mean_rule_ret = np.mean(rule_ret)
rule_ret_for_bootstrap = rule_ret - mean_rule_ret
rule_ret_mean_distr = []
for i in range(n_boostrap):
rule_ret_mean_distr.append(np.random.choice(rule_ret_for_bootstrap, n_sample).mean())
rule_ret_mean_distr = pd.Series(rule_ret_mean_distr)
pvalue = (rule_ret_mean_distr > mean_rule_ret).sum() / n_boostrap
return pvalue
# The p value of the rule's return series
white_reality_test(rule_ret_series)
one_stk_id = np.random.choice(rule_ret_df['secID'].unique(),1)[0]
rule_ret_series = rule_ret_df.loc[rule_ret_df['secID'] == one_stk_id,'position_open_ret_demean']
rule_ret_series.plot()
rule_ret_series.dropna(inplace=True)
white_reality_test(rule_ret_series)
stk_id_300 = np.random.choice(rule_ret_df['secID'].unique(),300,replace=False)
temp = rule_ret_df.loc[rule_ret_df['secID'].isin(stk_id_300)].copy()
temp.dropna(inplace=True)
temp
%%time
stk_white_p = temp.groupby('secID')['position_open_ret_demean'].apply(white_reality_test)
stk_white_p
stk_white_p.describe()
stk_white_p.hist(bins=50)
stk_white_p.loc[stk_white_p < 0.10]
good_EMA_stks = stk_white_p.loc[stk_white_p < 0.10].index
rule_ret_df.loc[rule_ret_df['secID']=='300104.XSHE',['tradeDate','open_cumret']].set_index('tradeDate').plot()
rule_ret_df.loc[rule_ret_df['secID'].isin(good_EMA_stks[0:5])].set_index('tradeDate',inplace=True)
temp2 = rule_ret_df.loc[rule_ret_df['secID'].isin(good_EMA_stks)].copy()
temp2.pivot(index='tradeDate',columns='secID',values='open_cumret').plot()
Moving Average Convergence Divergence (MACD)
$$MACD = EMA_{\text{fast period}} - EMA_{\text{slow period}}$$$$E M A_t(\text{Value})= \left(\text { Value }_t *\left(\frac{\text { Smoothing }}{1+\text { Days }}\right)\right) + E M A_{t-1} *\left(1-\left(\frac{\text { Smoothing }}{1+\text { Days }}\right)\right)$$$Smoothing = 2$, $\text{fast period} = 12$, $\text{slow period} = 26$. 第一个$EMA$用简单平均。比如,计算$EMA_{\text{12 period}}$,那么就等有12个观测值之后,取简单平均得到$EMA_1$,然后$EMA_2$用上述公式计算。
按照级数展开可发现,越靠近当前的价格,权重越大。Smoothing越大,越靠近当前价格的权重越大。
得到 MACD 以后,再计算 MACD 的 Signal,
$$Signal = EMA_{\text{9 period}}(MACD) $$判断标准:当 MACD 上穿 Signal 时,处于上升趋势。当 MACD 下穿 Signal 时,处于下降趋势。
MACD_df = hs300_df.copy()
fastperiod = 12
slowperiod = 26
signalperiod = 9
MACD_df
MACD_df['MACD'], MACD_df['MACD_signal'], _ = ta.MACD(MACD_df['closeIndex'], fastperiod=fastperiod, slowperiod=slowperiod, signalperiod=signalperiod)
MACD_df.loc[MACD_df['tradeDate']<='2007-12-31',['MACD','MACD_signal']].plot()
MACD_df['signal'] = 0
MACD_df.loc[MACD_df['MACD'] > MACD_df['MACD_signal'], 'signal'] = 1
MACD_df['open_ret'] = MACD_df['openIndex']/MACD_df['openIndex'].shift()-1
MACD_df['position_close'] = MACD_df['signal']
MACD_df['position_open'] = MACD_df['signal'].shift()
MACD_df.rename(columns={'CHGPct':'close_ret'},inplace=True)
MACD_df['position_close_ret'] = MACD_df['position_close'].shift() * MACD_df['close_ret']
MACD_df['position_open_ret'] = MACD_df['position_open'].shift() * MACD_df['open_ret']
MACD_df['position_close_ret_demean'] = MACD_df['position_close'].shift() * MACD_df['close_ret_demean']
MACD_df['position_open_ret_demean'] = MACD_df['position_open'].shift() * MACD_df['open_ret_demean']
MACD_df['MACD_close_cumret'] = (MACD_df['position_close_ret']+1).cumprod()
MACD_df['MACD_open_cumret'] = (MACD_df['position_open_ret']+1).cumprod()
MACD_ret_df = MACD_df[['tradeDate','openIndex','closeIndex','open_ret','close_ret','MACD','MACD_signal',
'signal','position_close','position_open','position_close_ret','position_open_ret',
'position_close_ret_demean','position_open_ret_demean',
'MACD_close_cumret','MACD_open_cumret']].copy()
MACD_ret_df.set_index('tradeDate',inplace=True)
# open price cumret
fig, axes = plt.subplots(3,1)
MACD_ret_df[['MACD','MACD_signal']].plot(ax=axes[0], grid=True)
MACD_ret_df[['openIndex']].plot(secondary_y=True,ax=axes[0],grid=True)
MACD_ret_df[['position_open']].plot(ax=axes[1], grid=True)
MACD_ret_df[['MACD_open_cumret']].plot(ax=axes[2], grid=True)
fig, axes = plt.subplots(3,1)
MACD_ret_df.loc[:'2009',['MACD','MACD_signal']].plot(ax=axes[0], grid=True)
MACD_ret_df.loc[:'2009',['openIndex']].plot(secondary_y=True,ax=axes[0],grid=True)
MACD_ret_df.loc[:'2009',['position_open']].plot(ax=axes[1], grid=True)
MACD_ret_df.loc[:'2009',['MACD_open_cumret']].plot(ax=axes[2], grid=True)
stk_df
stk_df.drop('EMA',axis=1,inplace=True)
stk_df['MACD'] = stk_df.groupby('secID')['closePrice'].apply(lambda x: ta.MACD(x)[0])
stk_df['MACD_signal'] = stk_df.groupby('secID')['closePrice'].apply(lambda x: ta.MACD(x)[1])
stk_df.loc[stk_df['secID']=='000001.XSHE',['tradeDate','MACD','MACD_signal']].set_index('tradeDate').loc['2020':].plot()
stk_df['signal'] = 0
stk_df.loc[stk_df['MACD'] > stk_df['MACD_signal'], 'signal'] = 1
stk_df
%%time
rule_ret_df = stk_df.groupby('secID').apply(rule_return)
rule_ret_df.reset_index(inplace=True)
rule_ret_df.drop('level_1',axis=1,inplace=True)
rule_ret_df
rule_cumret_by_crs = rule_ret_df.groupby('secID')['open_cumret'].last()
rule_cumret_by_crs
rule_cumret_by_crs.describe()
rule_cumret_by_crs.hist(bins=75)
rule_cumret_by_crs.dropna(inplace=True)
y = rule_cumret_by_crs.values
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y-const, const).fit().get_robustcov_results(cov_type='HC0')
mean_values = reg.params[0]
t_values = reg.tvalues[0]
pd.DataFrame([mean_values,t_values],index=['ret_mean','t_values'],columns=['rule_cumret'])
rule_cumret_by_crs.dropna(inplace=True)
y = rule_cumret_by_crs.values
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit()
print(reg.t_test('const = 1'))
平均年化收益:
# 2.1293**(1/(2022-2007+1))-1
1.9272**(1/(2023-2007+1)) - 1
# time-series mean of daily return
rule_tsmean_ret_by_crs = rule_ret_df.groupby('secID')['position_open_ret_demean'].mean()
rule_tsmean_ret_by_crs
rule_tsmean_ret_by_crs.dropna(inplace=True)
y = rule_tsmean_ret_by_crs.values
const = np.full(shape=len(y),fill_value=1)
reg = sm.OLS(y, const).fit().get_robustcov_results(cov_type='HC0')
mean_values = reg.params[0]
t_values = reg.tvalues[0]
pd.DataFrame([mean_values,t_values],index=['ret_mean','t_values'],columns=['rule_daily_ret'])
rule_ret_series = MACD_ret_df['position_open_ret_demean'].dropna() # position_open_ret_demean: (raw return demeaned)*position
rule_ret_series_for_bootstrap = rule_ret_series - rule_ret_series.mean() # demean here: H0: the average of the rule's return is zero
n_sample = rule_ret_series.shape[0]
n_boostrap = 1000
n_sample
rule_ret_series.mean()
rule_ret_mean_distr = []
for i in range(n_boostrap):
rule_ret_mean_distr.append(np.random.choice(rule_ret_series_for_bootstrap, n_sample).mean())
rule_ret_mean_distr = pd.Series(rule_ret_mean_distr)
rule_ret_mean_distr.hist(bins=50)
(rule_ret_mean_distr > rule_ret_series.mean()).sum() / n_boostrap
np.mean(rule_ret_series)
def white_reality_test(rule_ret, n_boostrap=1000):
n_sample = len(rule_ret)
if n_sample < 100:
return None
else:
mean_rule_ret = np.mean(rule_ret)
rule_ret_for_bootstrap = rule_ret - mean_rule_ret
rule_ret_mean_distr = []
for i in range(n_boostrap):
rule_ret_mean_distr.append(np.random.choice(rule_ret_for_bootstrap, n_sample).mean())
rule_ret_mean_distr = pd.Series(rule_ret_mean_distr)
pvalue = (rule_ret_mean_distr > mean_rule_ret).sum() / n_boostrap
return pvalue
# The p value of the rule's return series
white_reality_test(rule_ret_series)
one_stk_id = np.random.choice(rule_ret_df['secID'].unique(),1)[0]
rule_ret_series = rule_ret_df.loc[rule_ret_df['secID'] == one_stk_id,'position_open_ret_demean']
rule_ret_series.dropna(inplace=True)
white_reality_test(rule_ret_series)
stk_id_300 = np.random.choice(rule_ret_df['secID'].unique(),300,replace=False)
temp = rule_ret_df.loc[rule_ret_df['secID'].isin(stk_id_300)].copy()
temp.dropna(inplace=True)
temp
%%time
stk_white_p = temp.groupby('secID')['position_open_ret_demean'].apply(white_reality_test)
stk_white_p
stk_white_p.describe()
stk_white_p.hist(bins=50)
stk_white_p.loc[stk_white_p < 0.10]
good_MACD_stks = stk_white_p.loc[stk_white_p < 0.10].index
rule_ret_df.loc[rule_ret_df['secID']=='301296.XSHE','open_cumret']
rule_ret_df.loc[rule_ret_df['secID'].isin(good_MACD_stks[0:5])].set_index('tradeDate',inplace=True)
temp2 = rule_ret_df.loc[rule_ret_df['secID'].isin(good_MACD_stks)].copy()
temp2.pivot(index='tradeDate',columns='secID',values='open_cumret').plot()
def rule_return_open(df, demean=True):
"""
df should contain these columns:
signal: the signal generated by the rule
close_ret: return calculated by close price
open_ret: return calculated by open price
close_ret_demean is demeaned return of close_ret, i.e. close_ret - close_ret.mean.
open_ret_demean is similarly defined. The use of demeaned return series is to adjust the
bias created by bullish or bearish markets.
"""
df['position_open'] = df['signal'].shift()
df['position_open_ret'] = df['position_open'].shift() * df['open_ret']
df['position_open_ret_demean'] = df['position_open'].shift() * df['open_ret_demean']
if demean==True:
return df['position_open_ret_demean']
else:
return df['position_open_ret']
cols = ['tradeDate','openIndex','closeIndex','open_ret','close_ret','open_ret_demean','close_ret_demean']
ta_rules_df = hs300_df[cols].copy()
# MA
ma_params = [30, 20, 10]
for ma_param in ma_params:
ta_rules_df[f'MA{ma_param}'] = ta.SMA(ta_rules_df['closeIndex'], ma_param)
ta_rules_df['signal'] = np.nan
ta_rules_df.loc[ta_rules_df['closeIndex'] > ta_rules_df[f'MA{ma_param}'], 'signal'] = 1
ta_rules_df.loc[ta_rules_df['closeIndex'] < ta_rules_df[f'MA{ma_param}'], 'signal'] = 0
ta_rules_df[f'ret_MA{ma_param}'] = rule_return_open(ta_rules_df)
ta_rules_df = ta_rules_df[cols+[f'ret_MA{ma_param}' for ma_param in ma_params]].copy()
ta_rules_df
cols = ta_rules_df.columns.tolist()
#EMA
ema_params = [30, 20, 10]
for ema_param in ema_params:
ta_rules_df[f'EMA{ema_param}'] = ta.EMA(ta_rules_df['closeIndex'], ema_param)
ta_rules_df['signal'] = np.nan
ta_rules_df.loc[ta_rules_df['closeIndex'] > ta_rules_df[f'EMA{ema_param}'], 'signal'] = 1
ta_rules_df.loc[ta_rules_df['closeIndex'] < ta_rules_df[f'EMA{ema_param}'], 'signal'] = 0
ta_rules_df[f'ret_EMA{ema_param}'] = rule_return_open(ta_rules_df)
ta_rules_df = ta_rules_df[cols+[f'ret_EMA{ema_param}' for ema_param in ema_params]].copy()
ta_rules_df
cols = ta_rules_df.columns.tolist()
# MACD
macd_models = {'MACD1': {'fastperiod':12, 'slowperiod':26, 'signalperiod':9},
'MACD2': {'fastperiod':10, 'slowperiod':20, 'signalperiod':5}}
for macd, param in macd_models.items():
ta_rules_df[macd], ta_rules_df[f'{macd}_signal'], _ = ta.MACD(ta_rules_df['closeIndex'], fastperiod=param['fastperiod'], slowperiod=param['slowperiod'], signalperiod=param['signalperiod'])
ta_rules_df['signal'] = 0
ta_rules_df.loc[ta_rules_df[macd] > ta_rules_df[f'{macd}_signal'], 'signal'] = 1
ta_rules_df[f'ret_{macd}'] = rule_return_open(ta_rules_df)
ta_rules_df.dropna(inplace=True)
ta_rules_df.reset_index(inplace=True,drop=True)
ta_rules_df
multi_rule_cols = ta_rules_df.columns[ta_rules_df.columns.str.startswith('ret')]
multi_rule_ret_df = ta_rules_df[multi_rule_cols].copy()
multi_rule_ret_df
def multi_white_reality_test(target_rule, multi_rule_ret_df, n_bootstrap=500):
"""
target_rule: the name of the rule under consideration. target_rule should be one column of multi_rule_ret_df.
multi_rule_ret_df: the df of multiple rule returns.
"""
n_sample = multi_rule_ret_df.shape[0]
if n_sample < 100:
return None
else:
max_bs_distr = np.full(shape=n_bootstrap, fill_value=np.nan)
multi_rule_ret_df_mean = multi_rule_ret_df - multi_rule_ret_df.mean()
for i in tqdm(range(n_bootstrap)):
idx = np.random.choice(multi_rule_ret_df.index, n_sample)
max_bs_distr[i] = multi_rule_ret_df_mean.loc[idx].mean().max()
pvalue = (max_bs_distr > multi_rule_ret_df[target_rule].mean()).sum() / n_bootstrap
return pvalue, max_bs_distr
p_MACD1, max_bs_distr_MACD1 = multi_white_reality_test(target_rule='ret_MACD1', multi_rule_ret_df=multi_rule_ret_df)
p_MACD1
Compare this with the WRC when there is only one