# _*_coding:utf-8_*_
# 开发团队:阿悦科技
# 开发人员:Administrator
# 开发时间:2020/11/1718:56
# 文件名称:双分组分析.py
# 开发工具:PyCharm
import pandas as pd
import numpy as np
import math
import datetime
import statsmodels.api as sm
import matplotlib.pyplot as plt
from scipy import linalg
import scipy
# 第一部分,进行数据的预处理
def loadMainTrdData():
df = pd.read_csv('TRD_Mnth.csv')
df['Trdmnt'] = pd.to_datetime(df.Trdmnt)
df1 = pd.read_excel('Norisk_Rate.xlsx')
# 读入数据
df = df.drop([0, 1])
# 删除第0行和第1行
df = df[['Stkcd', 'Trdmnt', 'Mretnd', 'Msmvosd']]
df1 = df1[['Trdmnt','rf']]
# 保留四个变量和两个变量
df['Stkcd'] = [str(x).zfill(6) for x in df.Stkcd]
# 将股票代码以6位字符串的形式展现
df['isNormal'] = [x[0] in ['0', '3', '6'] for x in df.Stkcd]
# 以0,3,6开头的股票代码才是正确的
df = df[df.isNormal]
# 将股票代码正确的筛选出来
df = df.drop(['isNormal'], axis=1)
# 将布尔型变量normal删掉,删除一列
df = pd.merge(df, df1, on=['Trdmnt'], how='left')
# 个股匹配无风险利率(月度)
df['Ret_Excess'] = df['Mretnd'] - df['rf']
# 得到超额收益率Ret_Excess
df_pivot = df.pivot(index='Trdmnt', columns='Stkcd', values='Ret_Excess')
# 转换dataframe的形式,index为时间,columns为股票代码,value为超额收益
df_pivot = df_pivot.shift(-1)
# 时间上滞后一期得到滞后超额收益率
df_pivot = df_pivot.unstack().reset_index().rename(columns={0: 'F1_Ret'})
# 列索引变成行索引,将新的收益列命名为'F1_Ret'
df = pd.merge(df, df_pivot, on=['Stkcd', 'Trdmnt'], how='left')
# 将滞后超额收益率与原列表拼接
df = df[df.Trdmnt >= '2005-01']
# 取2005年1月之后的数据
df['F1_Ret'].fillna(0,inplace=True)
# 将该列缺失值部分补充为0
return df
df = loadMainTrdData()
df_pivot = df.pivot(index='Trdmnt', columns='Stkcd', values='Mretnd')
df_pivot = df_pivot.sort_index()
df1=df_pivot.shift(1)
#将已经滞后的收益率归正
df_pivot = df1.unstack().reset_index().rename(columns = {0: 'CumPastRet1'})
df = pd.merge(df, df_pivot, on = ['Stkcd', 'Trdmnt'], how='left')
#计算1个月的累积收益率
df_pivot = df.pivot(index='Trdmnt', columns='Stkcd', values='Mretnd')
df_pivot = df_pivot.sort_index()
df1=df_pivot.shift(1)
df_pivot = df1.rolling(window=3, min_periods=1).apply(lambda x: (1 + x).prod() - 1, raw=False)
#rolling函数滚动累乘
df_pivot = df_pivot.unstack().reset_index().rename(columns = {0: 'CumPastRet3'})
df = pd.merge(df, df_pivot, on = ['Stkcd', 'Trdmnt'], how='left')
#计算3个月的累计收益率
df_pivot = df.pivot(index='Trdmnt', columns='Stkcd', values='Mretnd')
df_pivot = df_pivot.sort_index()
df1=df_pivot.shift(1)
df_pivot = df_pivot.rolling(window=6, min_periods=1).apply(lambda x: (1 + x).prod() - 1, raw=False)
df_pivot = df_pivot.unstack().reset_index().rename(columns = {0: 'CumPastRet6'})
df = pd.merge(df, df_pivot, on = ['Stkcd', 'Trdmnt'], how='left')
#计算6个月的累积收益率
df_pivot = df.pivot(index='Trdmnt', columns='Stkcd', values='Mretnd')
df_pivot = df_pivot.sort_index()
df1=df_pivot.shift(1)
df_pivot = df_pivot.rolling(window=12, min_periods=1).apply(lambda x: (1 + x).prod() - 1, raw=False)
df_pivot = df_pivot.unstack().reset_index().rename(columns = {0: 'CumPastRet12'})
df = pd.merge(df, df_pivot, on = ['Stkcd', 'Trdmnt'], how='left')
#计算12个月的累积收益率
df['CumPastRet2-12']=(1+df['CumPastRet12'])/(1+df['CumPastRet1'])-1
#计算2-12个月的累积收益率
#依赖双分组函数
def cpt_vw_ret(group, avg_name, weight_name):
d = group[avg_name]
w = group[weight_name]
try:
return (d * w).sum() / w.sum()
except ZeroDivisionError:
return np.nan
def get_stock_groups(data, sortname, groups_num):
df = data.copy()
labels = ['G0' + str(i) for i in range(1, groups_num + 1)]
groups = pd.DataFrame(pd.qcut(df[sortname], groups_num, labels=labels).astype(str)).rename(columns={sortname: 'Group'})
groups.index.name = 'ID'
return groups
def dependent_double_sort(data, sortname1, sortname2,TimeName, groups_num1,groups_num2, weighted):
df = data.copy()
PortTag1 = df.groupby([TimeName]).apply(get_stock_groups, sortname1, groups_num1).reset_index().set_index('ID')
df = pd.merge(df, PortTag1['Group'], left_index=True, right_index=True)
df.rename(columns={'Group': 'G1'}, inplace=True)
#按照sortname1进行单分组
PortTag2 = df.groupby([TimeName,'G1']).apply(get_stock_groups, sortname2, groups_num2).reset_index().set_index('ID')
df = pd.merge(df, PortTag2['Group'], left_index=True, right_index=True)
df.rename(columns={'Group': 'G2'}, inplace=True)
#按照sortname2进行依赖双分组
df['Group'] = df['G1'] + df['G2']
df['Weight'] = df[weighted] / df.groupby([TimeName, 'Group'])[weighted].transform('sum')
#按双分组分组后用市值作为每组权重
ret_name = 'F1_Ret'
vwret = df.groupby([TimeName, 'Group']).apply(cpt_vw_ret, ret_name, 'Weight').to_frame().reset_index().rename(columns={0: 'Ret'})
vwret = vwret.set_index(TimeName)
ewret = df.groupby([TimeName, 'Group'])[ret_name].mean().to_frame().reset_index().rename(columns={ret_name: 'Ret'})
ewret = ewret.set_index(TimeName)
return vwret, ewret
#3个月的市值累积收益率依赖双分组
df_temp3 = df[['Trdmnt', 'Stkcd', 'Msmvosd', 'CumPastRet3', 'F1_Ret']].dropna()
vwret3, ewret3 = dependent_double_sort(df_temp3, 'Msmvosd','CumPastRet3', 'Trdmnt', 5, 5,'Msmvosd')
# 3个月的双分组收益匹配FF3因子
FF3 = pd.read_csv('fivefactor_monthly.csv')
FF3.index = [str(x)[:4] + '-' + str(x)[4:7] for x in FF3.trdmn]
vwret3 = vwret3.reset_index().pivot(index='Trdmnt', columns='Group', values='Ret')
vwret3 = vwret3.shift(1).dropna()
vwret = pd.merge(vwret3, FF3[['mkt_rf', 'smb', 'hml']], left_index=True, right_index=True)
#以G01G01为例计算回归
Y = vwret['G01G01']
X = vwret[['mkt_rf', 'smb', 'hml']]
X = X.assign(const=1)
results = sm.OLS(Y, X).fit(cov_type='HAC', cov_kwds={'maxlags': 2}, use_correction=True, use_t=True)
print(results.summary())
# Fama-MacBeth 回归
# 对每一个时间t做OLS回归
def getOLS(group, x1, x2, x3, x4, y):
x1 = group[x1]
x2 = group[x2]
x3 = group[x3]
x4 = group[x4]
x = np.column_stack((x1, x2, x3, x4))
X = sm.add_constant(x)
y = group[y]
res = sm.OLS(y, X).fit()
return res.params
# 将未来一个月的超额收益率,交易量(对数),市值,过去 3 个月的累积收益率,收益率提取
df1 = pd.read_csv('TRD_Mnth1.csv')
# print(df1)
df2=df1[['Stkcd','Trdmnt','Mnshrtrd']]
# print(M)
df2['Stkcd'] = [str(x).zfill(6) for x in df2.Stkcd]
df2['Trdmnt']=pd.to_datetime(df2.Trdmnt)
df = pd.merge(df, df2, on = ['Stkcd', 'Trdmnt'], how='left')
# print(df)
df3=df[['Stkcd','Trdmnt','Mnshrtrd','Msmvosd','CumPastRet3','F1_Ret','Mretnd']]
df3['Mnshrtrd']=df3['Mnshrtrd'].apply(np.log)
#解释变量为交易量(对数)
# print(df3)
df3_pivot = df3.pivot(index='Trdmnt', columns='Stkcd', values='Mretnd')
df3_pivot = df3_pivot.sort_index()
df3_pivot = df3_pivot.rolling(window=12, min_periods=10).std()
#定义波动率为过去12个月的收益率的标准差,要求至少有10个观测
df3_pivot = df3_pivot.unstack().reset_index().rename(columns = {0: 'Std'})
# print(df3_pivot)
df3 = pd.merge(df3, df3_pivot, on = ['Stkcd', 'Trdmnt'], how='left')
df3=df3.dropna()
res = df3.groupby(['Trdmnt']).apply(getOLS,['Mnshrtrd'],['CumPastRet3'],['Std'],['Msmvosd'] ,'F1_Ret')
print(res)
def NWttest_1var(Y, L):
'''
Newey and West T检验
Y:ser
评论6