module fundit::bfiMatcher
use fundit::sqlUtilities;
use fundit::operationDataPuller;
use fundit::performanceDataPuller;
/*
* 返回预设的指标最小值
*
*/
def get_min_threshold(data_name) {
ret = 0;
if(data_name == 'correlation') { ret = 0.64; }
else if(data_name == 'ret_count') { ret = 48; }
return ret;
}
/*
* 取BFI所需要的指数/因子ID
*
* NOTE: Java使用的逻辑如下(除了FA),暂时没有差别
* SELECT a.fund_Id FROM mfdb.fund_performance AS a
LEFT JOIN pfdb.cm_class_asset_index AS b ON a.fund_id = b.index_id
WHERE a.end_date = 'YYYY-MM' AND a.fund_id LIKE 'IN%' AND b.isvalid = 1 ORDER BY a.fund_id ASC
*/
def get_bfi_index_list() {
return ['FA00000VML','FA00000VMM','FA00000VMN','FA00000VMO','FA00000WKG','FA00000WKH','IN00000008','IN0000000D','IN0000000M','IN0000000S',
'IN0000000T','IN0000000U','IN0000000V','IN0000000W','IN0000000X','IN0000000Y','IN0000000Z','IN00000010','IN00000011','IN00000012',
'IN00000013','IN00000014','IN00000015','IN00000016','IN00000017','IN00000077','IN00000078','IN00000079','IN0000007A','IN0000007B',
'IN0000007C','IN0000007D','IN0000007E','IN0000007F','IN0000007G','IN0000007M','IN0000007N','IN0000007O','IN00000080','IN00000088',
'IN0000008O','IN0000009M','IN0000028E','IN000002CM'];
}
defg regressionT(y, x) {
r = SELECT beta, tstat FROM ols(y, x, true, 1) WHERE rowNo(beta) = 1;
return r[0]['tstat'];
}
/*
* 计算 bfi-matching 所需要的数据指标(月度)
*
*
*/
def cal_monthly_closity(ret1, ret2, win) {
t0 = SELECT end_date.month() AS end_date, end_date as price_date, ret1.ret AS ret1, ret2.ret AS ret2, tmoving(count, end_date, end_date, win) AS ret_count
FROM ret1
INNER JOIN ret2 ON ret1.end_date = ret2.end_date
ORDER BY end_date;
t = SELECT end_date, price_date,
tmcorr(t0.end_date, ret1, ret2, win) AS corr,
iif(tmstd(end_date, ret1-ret2, win) == 0, null, tmavg(end_date, ret1-ret2, win)\tmstd(end_date, ret1-ret2, win)) AS info,
tmoving(regressionT, end_date, [ret1, ret2], win) AS t_value,
tmbeta(end_date, ret1, ret2, win) AS beta // 用 ols() 算的值和这个一样
FROM t0
WHERE ret_count >= get_min_threshold('ret_count')
ORDER BY end_date;
UPDATE t SET corr = NULL WHERE corr < get_min_threshold('correlation');
return SELECT end_date.month().last() AS end_date,
corr.last() AS corr,
info.last() * sqrt(get_annulization_multiple('w')) AS info, // annuulized info ratio
t_value.last() AS t_value,
beta.last() AS beta
FROM t
GROUP BY end_date.month();
}
/*
* 计算目标和BFI所用指数因子的相关系数
*
* @param entity_info
: NEED COLUMNS entity_id, inception_date, price_date
*
* TODO: correlation is OK; beta, info, t_value are way off!
*
* NOTE: 与Java把月末日期作为截止日期不同的是,这里用每月最后一个周五作为截止日,所以数值会与MySQL中存储的略为不同
*
*/
def cal_entity_index_coe(entity_type, entity_info) {
// entity_info = get_fund_info(['MF00003PW1', 'MF00003PWC']).join(take(2024.10.31, 2) AS price_date).rename!('fund_id', 'entity_id');
if(entity_info.isVoid() || entity_info.size() == 0) return null;
// 简单起见,取数据集中最新日期作为截止日期
end_day = entity_info.price_date.max();
ret_entity = get_entity_weekly_rets(entity_type, entity_info);
if(ret_entity.isVoid() || ret_entity.size() == 0) return null;
// 取BFI用得到的指数/因子列表
v_indexes = get_bfi_index_list();
// 手搓一个带日期的数据表
index_info = table(v_indexes AS entity_id, take(end_day, v_indexes.size()) AS price_date);
ret_index = get_entity_weekly_rets('MI', index_info);
if(ret_index.isVoid() || ret_index.size() == 0) return null;
// 两次循环遍历所有entity和指数
entity_coe = table(1000:0, ['entity_id', 'index_id', 'end_date', 'coe_1y', 'coe_3y', 'coe_5y', 'info_ratio_1y', 'info_ratio_3y', 'info_ratio_5y',
't_value_1y', 't_value_3y', 't_value_5y', 'beta_1y', 'beta_3y', 'beta_5y'],
[SYMBOL, SYMBOL, MONTH, DOUBLE, DOUBLE, DOUBLE, DOUBLE, DOUBLE, DOUBLE,
DOUBLE, DOUBLE, DOUBLE, DOUBLE, DOUBLE, DOUBLE]);
for(entity in entity_info.entity_id) {
ret1 = SELECT fund_id AS entity_id, price_date.weekEnd(4) AS end_date, price_date.weekEnd(4) AS price_date, ret_1w AS ret
FROM ret_entity WHERE fund_id = entity AND price_date.weekEnd(4) <= end_day;
for(index in v_indexes) {
ret2 = SELECT index_id AS benchmark_id, price_date.weekEnd(4) AS end_date, price_date.weekEnd(4) AS price_date, ret_1w AS ret
FROM ret_index WHERE index_id = index AND price_date.weekEnd(4) <= end_day;
benchmarks = table(take(entity, ret1.size()) AS entity_id, take(index, ret1.size()) AS benchmark_id, ret1.price_date.weekEnd(4) AS end_date);
closity_1y = cal_monthly_closity(ret1, ret2, 1y);
closity_3y = cal_monthly_closity(ret1, ret2, 3y);
closity_5y = cal_monthly_closity(ret1, ret2, 5y);
INSERT INTO entity_coe
SELECT entity, index, c1.end_date, c1.corr AS coe_1y, c3.corr AS coe_3y, c5.corr AS coe_5y,
c1.info AS info_ratio_1y, c3.info AS info_ratio_3y, c5.info AS info_ratio_5y,
c1.t_value AS t_value_1y, c3.t_value AS t_value_3y, c5.t_value AS t_value_5y,
c1.beta AS beta_1y, c3.beta AS beta_3y, c5.beta AS beta_5y
FROM closity_1y c1
LEFT JOIN closity_3y c3 ON c1.end_date = c3.end_date
LEFT JOIN closity_5y c5 ON c1.end_date = c5.end_date;
}
}
return entity_coe;
}