指数ETF基金的组合分析方法初探

本文在Creative Commons许可证下发布

试想一下，大多数基金“推荐”的配置策略都假设某种股票/债券组合。如果我们想寻求成本最小收益最高的组合（以yahoo finance上的数据来分析，因为美国股市数据更容易获得）。那么什么才是一个好的组合成为了我们的问题？指数基金包括几乎所有的股票和债券。几乎包含了美国股票及债券市场的组成的四种ETF是VTI、VXUS、BND、BNDX。让我们从这些开始数据分析。使用R语言来完成分析程序

  1 # Load package
  2 library(tidyquant)
  3 library(broom)
  4 
  5 # Load data for portfolios
  6 symbols <- c("SPY", "SHY", "GLD")
  7 symbols_low <- tolower(symbols)
  8 
  9 prices <- getSymbols(symbols, src = "yahoo",
 10                      from = "1990-01-01",
 11                      auto.assign = TRUE) %>% 
 12   map(~Ad(get(.))) %>% 
 13   reduce(merge) %>% 
 14   `colnames<-`(symbols_low)
 15 
 16 prices_monthly <- to.monthly(prices, indexAt = "last", OHLC = FALSE)
 17 ret <- ROC(prices_monthly)["2005/2019"]
 18 
 19 # Load benchmark data
 20 bench_sym <- c("VTI", "VXUS", "BND", "BNDX")
 21 bench <- getSymbols(bench_sym, src = "yahoo",
 22                     from = "1990-01-01",
 23                     auto.assign = TRUE) %>% 
 24   map(~Ad(get(.))) %>% 
 25   reduce(merge) %>% 
 26   `colnames<-`(tolower(bench_sym))
 27 bench <- to.monthly(bench, indexAt = "last", OHLC = FALSE)
 28 bench_ret <- ROC(bench)["2014/2019"]
 29 
 30 # Create different weights and portflios
 31 # Equal weigthed
 32 wt1 <- rep(1/(ncol(ret)), ncol(ret))
 33 port1 <- Return.portfolio(ret, wt1) %>% 
 34   `colnames<-`("ret")
 35 
 36 # Risk portfolio
 37 wt2 <- c(0.9, 0.1, 0)
 38 port2 <- Return.portfolio(ret, weights = wt2) %>% 
 39   `colnames<-`("ret")
 40 
 41 # Naive portfolio
 42 wtn <- c(0.5, 0.5, 0)
 43 portn <- Return.portfolio(ret, wtn)
 44 
 45 # Data frame of portfolios
 46 port_comp <- data.frame(date = index(port1), equal = as.numeric(port1),
 47                         risky = as.numeric(port2),
 48                         naive = as.numeric(portn)) 
 49 
 50 # Benchmark portfolio
 51 wtb <- c(0.24, 0.21, 0.22, 0.33)
 52 portb <- Return.portfolio(bench_ret, wtb, rebalance_on = "quarters") %>% 
 53   `colnames<-`("bench")
 54 
 55 # Graph of portfolios vs. benchmark
 56 port_comp %>% 
 57   filter(date >= "2014-01-01") %>% 
 58   mutate(bench = portb) %>% 
 59   gather(key,value, -date) %>% 
 60   group_by(key) %>% 
 61   mutate(value = cumprod(value+1)) %>% 
 62   ggplot(aes(date, value*100, color = key)) +
 63   geom_line() +
 64   scale_color_manual("", labels = c("Bench", "Equal", "Naive", "Risky"),
 65                      values = c("purple", "blue", "black", "red")) +
 66   labs(x = "",
 67        y = "Index",
 68        title = "The three portfolios with a benchmark",
 69        caption = "Source: Yahoo, OSM estimates") +
 70   theme(legend.position = "top",
 71         plot.caption = element_text(hjust = 0))
 72 
 73 # summary
 74 port_comp %>% 
 75   filter(date >= "2014-01-01") %>% 
 76   mutate(bench = as.numeric(portb)) %>% 
 77   rename("Equal" = equal,
 78          "Naive" = naive,
 79          "Risky" = risky,
 80          "Bench" = bench) %>% 
 81   gather(Asset, value, -date) %>% 
 82   group_by(Asset) %>% 
 83   summarise(`Mean (%)` = round(mean(value, na.rm = TRUE),3)*1200,
 84             `Volatility (%)` = round(sd(value, na.rm = TRUE)*sqrt(12),3)*100,
 85             `Sharpe` = round(mean(value, na.rm = TRUE)/sd(value, na.rm=TRUE)*sqrt(12),2),
 86             `Cumulative (%)` = round(prod(1+value, na.rm = TRUE),3)*100) %>% 
 87   knitr::kable(caption = "Annualized performance metrics") 
 88 
 89 # Portfolio
 90 mean_ret <- apply(ret[,c("spy", "shy", "gld")],2,mean)
 91 cov_port <- cov(ret[,c("spy", "shy", "gld")])
 92 
 93 port_exam <- data.frame(ports = colnames(port_comp)[-1],
 94                         ret = as.numeric(apply(port_comp[,-1],2, mean)),
 95                         vol = as.numeric(apply(port_comp[,-1], 2, sd)))
 96 
 97 bench_exam <- data.frame(ports = "bench",
 98                          ret = mean(bench_ret),
 99                          vol = sd(bench_ret))
100 
101 bench_spy <- data.frame(ports = "sp",
102                         ret = mean(ret$spy),
103                         vol = sd(ret$spy))
104 
105 bench_spy_14 <- data.frame(ports = "sp",
106                         ret = mean(ret$spy["2014/2019"]),
107                         vol = sd(ret$spy["2014/2019"]))
108 
109 mean_ret_14 <- apply(ret[,c("spy", "shy", "gld")]["2014/2019"],2,mean)
110 
111 cov_port_14 <- cov(ret[,c("spy", "shy", "gld")]["2014/2019"])
112 
113 port_exam_14 <- port_comp %>% 
114   filter(date >= "2014-01-01") %>% 
115   select(-date) %>% 
116   gather(ports, value) %>%  
117   group_by(ports) %>% 
118   summarise_all(list(ret = mean, vol = sd)) %>% 
119   data.frame()
120 
121                         
122 ### Random weighting
123 # wts for full period
124 wts <- matrix(nrow = 1000, ncol = 3)
125 set.seed(123)
126 for(i in 1:1000){
127   a <- runif(1,0,1)
128   b <- c()
129   for(j in 1:2){
130     b[j] <- runif(1,0,1-sum(a,b))
131   }
132   if(sum(a,b) < 1){
133     inc <- (1-sum(a,b))/3
134     vec <- c(a+inc, b+inc)
135   }else{
136     vec <- c(a,b)
137   }
138   wts[i,] <- sample(vec,replace = FALSE)
139 }
140 
141 # wts for 2014
142 wts1 <- matrix(nrow = 1000, ncol = 3)
143 set.seed(123)
144 for(i in 1:1000){
145   a <- runif(1,0,1)
146   b <- c()
147   for(j in 1:2){
148     if(j == 2){
149       b[j] <- 1 - sum(a,b)
150     }
151     else {
152       b[j] <- runif(1,0,1-sum(a,b))
153     }
154   vec <- c(a,b)
155   }
156   wts1[i,] <- sample(vec,replace = FALSE)
157 }
158 
159 # Calculate random portfolios
160 # Weighting: wts
161 port <- matrix(nrow = 1000, ncol = 2)
162 for(i in 1:1000){
163   port[i,1] <- as.numeric(sum(wts[i,] * mean_ret))
164   port[i,2] <- as.numeric(sqrt(t(wts[i,] %*% cov_port %*% wts[i,])))
165 }
166 
167 colnames(port) <- c("returns", "risk")
168 port <- as.data.frame(port)
169 port <- port %>% 
170   mutate(sharpe = returns/risk)
171 
172 # Calculate random portfolios since 2014
173 # Weighting: wts1
174 port_14 <- matrix(nrow = 1000, ncol = 2)
175 for(i in 1:1000){
176   port_14[i,1] <- as.numeric(sum(wts1[i,] * mean_ret_14))
177   port_14[i,2] <- as.numeric(sqrt(t(wts1[i,] %*% cov_port_14 %*% wts1[i,])))
178 }
179 
180 colnames(port_14) <- c("returns", "risk")
181 port_14 <- as.data.frame(port_14)
182 port_14 <- port_14 %>% 
183   mutate(sharpe = returns/risk)
184 
185 # Grraph with Sharpe ratio
186 port %>% 
187   ggplot(aes(risk*sqrt(12)*100, returns*1200, color = sharpe)) +
188   geom_point(size = 1.2, alpha = 0.4) +
189   geom_point(data = port_exam, aes(port_exam[1,3]*sqrt(12)*100,
190                                    port_exam[1,2]*1200),
191              color = "red", size = 6) +
192   geom_point(data = port_exam, aes(port_exam[2,3]*sqrt(12)*100,
193                                    port_exam[2,2]*1200),
194              color = "purple", size = 7) +
195   geom_point(data = port_exam, aes(port_exam[3,3]*sqrt(12)*100,
196                                    port_exam[3,2]*1200),
197              color = "black", size = 5) +
198   scale_x_continuous(limits = c(0,14)) +
199   labs(x = "Risk (%)",
200        y = "Return (%)",
201        title = "Simulated portfolios",
202        color = "Sharpe ratio") +
203   scale_color_gradient(low = "red", high = "green") +
204   theme(legend.position = c(0.075,.8), 
205         legend.key.size = unit(.5, "cm"),
206         legend.background = element_rect(fill = NA))
207 
208 # Graph since 2014
209 port_14 %>% 
210   ggplot(aes(risk*sqrt(12)*100, returns*1200, color = sharpe)) +
211   geom_point(size = 1.2, alpha = 0.4) +
212   geom_point(data = port_exam_14, aes(port_exam_14[1,3]*sqrt(12)*100,
213                                    port_exam_14[1,2]*1200),
214              color = "blue", size = 6) +
215   geom_point(data = port_exam_14, aes(port_exam_14[3,3]*sqrt(12)*100,
216                                    port_exam_14[3, 2]*1200),
217              color = "purple", size = 7) +
218   geom_point(data = port_exam_14, aes(port_exam_14[2,3]*sqrt(12)*100,
219                                    port_exam_14[2,2]*1200),
220              color = "black", size = 5) +
221   scale_x_continuous(limits = c(0,14)) +
222   labs(x = "Risk (%)",
223        y = "Return (%)",
224        title = "Simulated portfolios since 2014",
225        color = "Sharpe ratio") +
226   scale_color_gradient(low = "red", high = "green") +
227   theme(legend.position = c(0.075,0.8), 
228         legend.background = element_rect(fill = NA),
229         legend.key.size = unit(.5, "cm"))
230 
231 # Portfolios benchmarked vs Vanguard
232 port_14 %>%
233   mutate(Bench = returns - bench_exam$ret) %>%
234   # mutate(Bench = ifelse(Bench > 0, 1, 0)) %>% 
235   ggplot(aes(risk*sqrt(12)*100, returns*1200, color = Bench)) +
236   geom_point(size = 1.2, alpha = 0.4) +
237   scale_color_gradient(low = "red", high = "green") +
238   geom_point(data = port_exam_14, aes(port_exam_14[1,3]*sqrt(12)*100,
239                                    port_exam_14[1,2]*1200),
240              color = "blue", size = 6) +
241   geom_point(data = port_exam_14, aes(port_exam_14[3,3]*sqrt(12)*100,
242                                    port_exam_14[3,2]*1200),
243              color = "purple", size = 7) +
244   geom_point(data = port_exam_14, aes(port_exam_14[2,3]*sqrt(12)*100,
245                                    port_exam_14[2,2]*1200),
246              color = "black", size = 5) +  
247   labs(x = "Risk (%)",
248        y = "Return (%)",
249        title = "Simulated portfolios since 2014") +
250   theme(legend.position = c(0.06,0.8), 
251         legend.background = element_rect(fill = NA),
252         legend.key.size = unit(.5, "cm"))
253 
254 # Portfolios benchmarked vs Vanguard
255 port_14 %>%
256   mutate(Bench = returns - bench_exam$ret) %>%
257   mutate(Bench = ifelse(Bench > 0, 1, 0)) %>%
258   ggplot(aes(risk*sqrt(12)*100, returns*1200, color = Bench)) +
259   geom_point(size = 1.2, alpha = 0.4) +
260   scale_color_gradient(low = "red", high = "green") +
261   geom_point(data = port_exam_14, aes(port_exam_14[1,3]*sqrt(12)*100,
262                                    port_exam_14[1,2]*1200),
263              color = "blue", size = 6) +
264   geom_point(data = port_exam_14, aes(port_exam_14[3,3]*sqrt(12)*100,
265                                    port_exam_14[3,2]*1200),
266              color = "purple", size = 7) +
267   geom_point(data = port_exam_14, aes(port_exam_14[2,3]*sqrt(12)*100,
268                                    port_exam_14[2,2]*1200),
269              color = "black", size = 5) +  
270   labs(x = "Risk (%)",
271        y = "Return (%)",
272        title = "Simulated portfolios") +
273   theme(legend.position = c(0.05,0.8), 
274         legend.background = element_rect(fill = NA),
275         legend.key.size = unit(.5, "cm"))
276 
277 # Count how many portfolios are negative
278 pos_b <- port_14 %>%
279   mutate(Bench = returns - bench_exam$ret) %>%
280   mutate(Bench = ifelse(Bench > 0, 1, 0)) %>%
281   summarise(bench = round(mean(Bench),2)*100) %>%
282   as.numeric()
283 
284 port_list_14 <- list()
285 for(i in 1:1000){
286   port_list_14[[i]] <- Return.portfolio(ret["2014/2019"], wts[i,]) %>%
287     data.frame() %>%
288     summarise(returns = mean(portfolio.returns),
289               excess_ret = mean(portfolio.returns) - mean(portb$bench),
290               track_err = sd(portfolio.returns - portb$bench),
291               risk = sd(portfolio.returns))
292 }
293 
294 
295 port_info <- port_list_14 %>% bind_rows
296 rfr <- mean(ret$shy)
297 
298 # Graph info
299 port_info %>% 
300   mutate(info_ratio = excess_ret/track_err) %>% 
301   ggplot(aes(risk*sqrt(12)*100, returns*1200, color = info_ratio)) +
302   geom_point(size = 1.2, alpha = 0.4) +
303   geom_point(data = port_exam_14, aes(port_exam_14[1,3]*sqrt(12)*100,
304                                       port_exam_14[1,2]*1200),
305              color = "blue", size = 6) +
306   geom_point(data = port_exam_14, aes(port_exam_14[3,3]*sqrt(12)*100,
307                                       port_exam_14[3,2]*1200),
308              color = "purple", size = 7) +
309   geom_point(data = port_exam_14, aes(port_exam_14[2,3]*sqrt(12)*100,
310                                       port_exam_14[2,2]*1200),
311              color = "black", size = 5) +  
312   labs(x = "Risk (%)",
313        y = "Return (%)",
314        title = "Simulated portfolios") +
315   theme(legend.position = c(0.075,0.8), 
316         legend.background = element_rect(fill = NA),
317         legend.key.size = unit(.5, "cm")) +
318   scale_color_gradient("Information ratio", low = "red", high = "green")

总结一下结论？如果您有定义良好的约束条件，那么查看不同的投资组合分配以获得所需的风险/回报参数是非常好的。如果你没有，那么合并一个足够广泛的组合来包含尽可能多的可投资风险资产是有帮助的。使用调整后的Sharpe比率来观察组合的超额回报率是很有用的，这个投资组合比率揭示了一个重要的信息：即一个包含大部分相似资产的投资组合是否因偏离基准而得到收益上补偿。在这种情况下，我们的投资组合并不是，但那可能是由于gold exposure。因此，使用不关联资产的投资组合可以降低总投资金额，比如关注某个特定指数的成分股来指定投资组合，就能够最大限度的利用资金。