login(`admin, `123456) loadPlugin("ODBC") go clearCachedModules() use fundit::fundCalculator use fundit::dataPuller /* Sample 1: 单基金或组合RBSA ------------------------------ */ IN00000008 = (0.007075 -0.007000 -0.002121 0.008571 -0.025775 0.018936 0.012031 0.005613 0.017171 0.003189 -0.020797 -0.005986); IN00000077 = (-0.002598 0.001488 0.001404 0.001031 0.002389 0.002999 -0.000246 -0.000533 -0.000656 0.001313 0.001352 0.001514); IN0000007G =(0.000373 0.000367 0.000379 0.000272 0.000461 0.000355 0.000349 0.000195 0.000478 0.000331 0.000330 0.000324); IN0000009M = (0.010477 0.010086 0.019914 0.027885 0.061534 0.011593 -0.032239 -0.003192 0.011932 0.024807 0.006475 0.014500); mt0 = table(IN00000008, IN00000077, IN0000007G, IN0000009M); MF00003PW1 = (0.017450 0.002639 -0.026316 -0.005405 -0.013587 -0.001377 0.024828 0.014805 0.011936 -0.003932 -0.021053 0.006721); res = cal_rbsa(MF00003PW1, mt0, false) // lb = 0 时仍有微小负数,估计可以换成0 // 不清除后面的12维数字是干啥的 beta = res[1][0:4] print(beta) /* Sample 1 end --------------------------------------------- */ /* Sample 2 start ------------------------------ */ // pull all historical weekly returns from mysql, then save it to local // tb_all = get_fund_weekly_rets(null, 2021.07.01, null, true) // save_table(tb_all, "mfdb.fund_performance_weekly", false) // pull historical weekly returns from pre-saved local table // NOTE: if the table doesn't exist, run the commented code above tb_all_weekly_ret = load_table_from_local("fundit", "mfdb.fund_performance_weekly") // get index weekly return based on RBSA asset_type def get_standard_rbsa_index_return(asset_type_id, start_date, end_date) { index_ids = "" // asset allocation: stock-bond-cash-gold if(asset_type_id == "AS0000005Q") index_ids = "'IN00000008','IN00000077','IN0000007G','IN0000009M'" // asset allocation: stock-bond-cash-gold else if(asset_type_id == "Large4Assets" ) index_ids = "'IN00000008','IN00000077','IN0000007G','IN0000009M'" // Fundit style: large-small-cash else if(asset_type_id == "Cap3Style" ) index_ids = "'FA00000WKG','FA00000WKH','IN0000007G'" // Fundit sector: 防守-周期-敏感-科技-cash else if(asset_type_id == "CSI5" ) index_ids = "'FA00000VML','FA00000VMM','FA00000VMN','FA00000VMO','IN0000007G'" // CNI style: LG-LV-SG-SV-cash else if(asset_type_id == "CNI5Style" ) index_ids = "'IN0000000S','IN0000000T','IN0000000U','IN0000000V','IN0000007G'" // CNI style: LG-LV-SG-SV-MG-MV-cash else if(asset_type_id == "CNI7Style" ) index_ids = "'IN0000000S','IN0000000T','IN0000000U','IN0000000V','IN0000000W', 'IN0000000X','IN0000007G'" // CSI old sector: 材料-电信-工业-公用-金融-可选-能源-消费-信息-医药-cash else if(asset_type_id == "CSI11" ) index_ids = "'IN0000000Y','IN0000000Z','IN00000010','IN00000011','IN00000012','IN00000013','IN00000014','IN00000015','IN00000016','IN00000017','IN0000007G'" // bond type: gov-cash-cnvt-corp else if(asset_type_id == "BondType" ) index_ids = "'IN0000007A','IN0000007G','IN0000008J','IN000002CM'" ret = get_index_weekly_rets(index_ids, start_date, end_date) return ret } win = 24 step = 24 // pick the fund to be compared with the_fund = 'MF00003PW1' the_fund_ret = (SELECT * FROM tb_all_weekly_ret WHERE year_week >= '202211' AND fund_id = the_fund).ret_1w // get all mutual fund ids for testing fund_ids = (SELECT DISTINCT fund_id FROM tb_all_weekly_ret).fund_id // all rbsa results will be saved here tb_all_rbsa = table(fund_ids.size()*(the_fund_ret.size()):0, "fund_id" "date" "asset_type_id" "weights", [STRING, STRING, STRING, STRING]) // all the distances will be saved here tb_dis = table(fund_ids.size():0, "fund_id" "date" "asset_type_id" "dist", [STRING, STRING, STRING, DOUBLE]) // asset_type_id = "Cap3Style" asset_type = "CSI5" tb_index_raw_ret = get_standard_rbsa_index_return(asset_type, 2021.07.01, 2024.07.26) // transform the data structure for rbsa calculation tb_index_weekly_ret = SELECT ret_1w FROM tb_index_raw_ret PIVOT BY year_week, index_id tb_index_weekly_ret.rename!("year_week", "date") // loop thru the fund list and calculate historical rbsa // NOTE: it takes about 6 minutes for 20,000 mutual funds with 3 year history for(f_id in fund_ids) { ret = SELECT year_week AS date, ret_1w AS ret FROM tb_all_weekly_ret WHERE fund_id = f_id ORDER BY year_week // calculate historical rbsa tb = cal_rolling_rbsa(ret, tb_index_weekly_ret, false, win, step) INSERT INTO tb_all_rbsa SELECT f_id, date, asset_type, weights FROM tb WHERE status = "solved" } SELECT * FROM tb_all_rbsa WHERE fund_id = the_fund // tb_all_weekly_ret = null // save to local db, just for the sake of saving time to run rbsa again save_table(tb_all_rbsa, "mfdb.fund_rbsa_weekly", false) tb_all_rbsa = null asset_type = "CSI5" the_dates = (SELECT DISTINCT date FROM tb_all_rbsa WHERE fund_id = the_fund AND asset_type_id = asset_type).date // loop thru all the dates for(d in the_dates) { // the rbsa weights of target fund of one date the_weights = (SELECT * FROM tb_all_rbsa WHERE fund_id = the_fund AND date = d AND asset_type_id = asset_type).weights[0] // all rbsa results with the same date t = SELECT * FROM tb_all_rbsa WHERE date = d AND asset_type_id = asset_type // calculate the distance between the target fund and any fund for(r in t) { // euclidean distance tb_dis.tableInsert(r.fund_id, d, asset_type, ((the_weights.split(" ")$DOUBLE).euclidean(r.weights.split(" ")$DOUBLE)).round(4)) // tanimoto distance // tb_dis.tableInsert(r.fund_id, d, asset_type, ((the_weights.split(" ")$DOUBLE).tanimoto(r.weights.split(" ")$DOUBLE)).round(4)) } } select * from tb_dis where fund_id = the_fund n = the_dates.size() select fund_id, avg from ( select fund_id, count(dist) as cnt, avg(dist) as avg from tb_dis where fund_id <> the_fund and asset_type_id = "Large4Assets" group by fund_id ) order by avg limit 500 select fund_Id, avg from ( select fund_id, sum(dist) as total_dist, count(dist) as cnt, avg(dist) as avg from tb_dis where fund_id <> the_fund and asset_type_id <> "Cap3Style" group by fund_id ) where cnt = n*2 order by avg limit 500 select * from tb_all_rbsa where fund_id IN ('MF00003PW1', 'MF00006EQ6', 'MF00003QZR','MF000074ZM','MF00006FQ5') t = select ret_1w from tb_all_weekly_ret where fund_id IN ('MF00003PW1', 'MF00006EQ6', 'MF00003QZR','MF000074ZM','MF00006FQ5') and year_week >= '202211' pivot by year_week, fund_id plot([t.MF00003PW1, t.MF00006EQ6], t.year_week, , LINE) plot([t.MF00003PW1, t.MF00003QZR], t.year_week, , LINE) plot([t.MF00003PW1, t.MF000074ZM], t.year_week, , LINE) plot([t.MF00003PW1, t.MF00006FQ5], t.year_week, , LINE) // calculate correlation t = (SELECT ret_1w FROM tb_all_weekly_ret WHERE year_week >= '202211' PIVOT BY year_week, fund_id ).dropColumns!("year_week") v_cols = t.colNames() m = matrix(t) tb_corr = table(fund_ids.size():0, "fund_id" "corr", [STRING, DOUBLE]) i = 0 for(c in m) { if(c.dropna().size() == the_fund_ret.rows()) { tb_corr.tableInsert(v_cols[i], the_fund_ret.corr(c).round(6)) } i += 1 } select * from tb_corr order by corr desc limit 500 /* Sample 2 end --------------------------------------------- */