import datetime
import warnings
import numpy as np
import pandas as pd
import requests
from ray import serve
# def vector_backtest(transfer_position: pd.DataFrame, asset_price=None, asset_yield=None, return_holding_weight=False):
def vector_backtest(transfer_position: dict, asset_price: dict):
"""
基于矩阵运算的回测算法
Parameters
----------
transfer_position : list or DataFrame
调仓记录,其中日期格式和资产代码格式与传入的asset_yield或asset_price保持一致即可,权重需乘100%;
若有现金部分,需设为'cash',传入现金比例。
若为list,格式为(key名需保持一致)::
[{'date': '2020-12-31',
'value': [{'tradingcode': '000001.SZ', 'weight': 30},
{'tradingcode': '000002.SZ', 'weight': 60},
{'tradingcode': 'cash', 'weight': 10}]
},
{'date': '2021-01-31',
'value': [{'tradingcode': '000001.SZ', 'weight': 20},
{'tradingcode': '000002.SZ', 'weight': 50},
{'tradingcode': '000004,SZ', 'weight': 30}]
}]
若为DataFrame,格式为::
'000001.SZ' '000002.SZ' '000004.SZ' 'cash'
'2020-12-31' 30 60 0.0(或NAN) 10
'2021-01-31' 20 50 30 0.0
asset_price : DataFrame
资产的价格序列,index为日期,columns为资产名称或代码等,即每一列为一个资产的价格序列
asset_yield : DataFrame
资产的收益率序列,可选择传入收益率序列或价格序列,index为日期,columns为资产名称或代码等,即每一列为一个资产的收益率序列(乘以100%)
return_holding_weight : bool
是否返回每日的持仓
Returns
-------
portfolio_nav : pd.Series
组合的净值序列,index为日期
holding_weight : DataFrame
组合每日的持仓权重, index为日期,columns为资产名称或代码。return_holding_weight=False时不返回
"""
transfer_position = pd.DataFrame(transfer_position)
asset_price = pd.DataFrame(asset_price)
asset_yield = None
return_holding_weight = False
if isinstance(transfer_position, list) and transfer_position:
# 将list转换为dataframe格式
transfer_position_df = pd.DataFrame(transfer_position).groupby('date').apply(
lambda x: pd.DataFrame(list(x['value'])[0]))
transfer_position_df = (transfer_position_df.reset_index())[['date', 'tradingcode', 'weight']].copy()
transfer_position = transfer_position_df.pivot(index='date', columns='tradingcode', values='weight')
transfer_position.sort_index(inplace=True)
# transfer_position.fillna(0.0, inplace=True)
# 权重归一化
transfer_position = (transfer_position.div(transfer_position.sum(axis=1), axis=0) * 100).copy()
transfer_position.fillna(0.0, inplace=True)
# 删除有现金的一列,后面会重新计算现金比例
if 'cash' in transfer_position.columns.tolist():
transfer_position.drop(labels='cash', axis=1, inplace=True)
if asset_price is None and asset_yield is None:
raise ValueError("asset_price and asset_yield cannot both be None")
# 收益率序列转换为价格序列
if asset_price is None:
asset_yield.sort_index(inplace=True)
asset_yield.fillna(0.0, inplace=True)
asset_price = (asset_yield / 100 + 1).cumprod(axis=0)
asset_price.sort_index(inplace=True)
# 对价格序列做处理,<= 0的置为NaN
asset_price = asset_price.where(asset_price > 0, np.nan)
asset_price.fillna(method="ffill", inplace=True)
# 调仓日期
transfer_index = np.sort(transfer_position.index)
# 价格序列日期
price_index = np.sort(asset_price.index)
# 判断价格日期是否包含所有调仓日期
if any(transfer_index > price_index[-1]) or any(transfer_index < price_index[0]):
warnings.warn("the earliest and latest date of asset_price or asset_yield is {} and {}, "
"not contain all transfer date {}".format(price_index[0], price_index[-1], transfer_index))
transfer_position = transfer_position.loc[
transfer_index[(transfer_index <= price_index[-1]) & (transfer_index >= price_index[0])]].copy()
transfer_index = np.sort(transfer_position.index)
# 若调仓日期不是交易日,则往后取最近的一个交易日
transfer_index = [np.min(price_index[price_index >= transfer_index[i]]) for i in range(len(transfer_index))]
transfer_position.index = transfer_index
# 若有重复的调仓日,取最新调仓记录进行调仓
transfer_position = transfer_position[~transfer_position.index.duplicated(keep='last')].copy()
transfer_index = np.sort(transfer_position.index)
# 若价格序列的最后日期等于最后的调仓日期,则计算组合净值时无需考虑这次调仓,但返回每日调仓时,需要将该日的持仓权重改为调仓后的权重
is_last_transfer = False
if transfer_index[-1] == price_index[-1]:
is_last_transfer = True
transfer_index = transfer_index[:-1].copy()
# 获取调仓日的后一个交易日,即权重生效日
price_index = list(price_index)
reset_index = [transfer_index[0]] + [price_index[price_index.index(transfer_index[i]) + 1] for i in
range(1, len(transfer_index))]
# 判断价格序列的资产是否包含所有涉及调仓的资产
transfer_assets = transfer_position.columns.tolist()
price_assets = asset_price.columns.tolist()
if not set(transfer_assets) <= set(price_assets):
add_assets = list(set(transfer_assets) - set(price_assets))
warnings.warn("asset_price or asset_yield does not contain data for asset {}".format(add_assets))
for add_asset in add_assets:
asset_price[add_asset] = np.nan
# 使资产价格序列和调仓权重的列名顺序一致
asset_price = asset_price[transfer_assets].copy()
transfer_position = transfer_position[transfer_assets].copy()
# 判断调仓日所有资产是否都有行情值,若没有,相当于未下单成功,则持仓设为0,加入现金部分
transfer_price = (asset_price.loc[transfer_index]).copy()
transfer_position[transfer_price.isnull()] = 0.0
transfer_position['cash'] = 100.0 - transfer_position.sum(axis=1)
asset_price['cash'] = 1.0
# 保证资产价格序列和调仓权重的列名顺序一致
transfer_assets = transfer_position.columns.tolist()
asset_price = asset_price[transfer_assets].copy()
transfer_position = transfer_position[transfer_assets].copy()
# 基准矩阵,计算每两个调仓期区间的累计收益
bench_price = pd.DataFrame(index=asset_price.index, columns=asset_price.columns)
_bench_price = (asset_price.loc[transfer_index]).copy()
_bench_price.index = reset_index
bench_price.loc[reset_index] = _bench_price.loc[reset_index]
bench_price.fillna(method='ffill', inplace=True)
# 权重矩阵
weight_ts = pd.DataFrame(index=asset_price.index, columns=asset_price.columns)
_weight_ts = (transfer_position.loc[transfer_index]).copy()
_weight_ts.index = reset_index
weight_ts.loc[reset_index] = _weight_ts.loc[reset_index]
weight_ts.fillna(method='ffill', inplace=True)
# 计算每两个调仓期区间的组合收益
interval_cum_return = asset_price / bench_price
interval_cum_return = pd.DataFrame((interval_cum_return * weight_ts / 100).sum(axis=1), columns=['returns'])
transfer_cum_return