diff --git a/25_07_phd_defense/assets/01_common.R b/25_07_phd_defense/assets/01_common.R new file mode 100644 index 0000000..b14e29f --- /dev/null +++ b/25_07_phd_defense/assets/01_common.R @@ -0,0 +1,31 @@ +text_size <- 16 +width <- 12 +height <- 6 + +# col_lightgray <- "#e7e7e7" +# col_blue <- "#F24159" +# col_b_smooth <- "#F7CE14" +# col_p_smooth <- "#58A64A" +# col_pointwise <- "#772395" +# col_b_constant <- "#BF236D" +# col_p_constant <- "#F6912E" +# col_optimum <- "#666666" + +# https://www.schemecolor.com/retro-rainbow-pastels.php +col_lightgray <- "#e7e7e7" +col_blue <- "#F24159" +col_b_smooth <- "#5391AE" +col_p_smooth <- "#85B464" +col_pointwise <- "#E2D269" +col_b_constant <- "#7A4E8A" +col_p_constant <- "#BC677B" +col_optimum <- "#666666" +col_auto <- "#EA915E" + +T_selection <- c(32, 128, 512) + +# Lambda grid +lamgrid <- c(-Inf, 2^(-15:25)) + +# Gamma grid +gammagrid <- sort(1 - sqrt(seq(0, 0.99, .05))) diff --git a/25_07_phd_defense/assets/crps_learning/algos_changing.gif b/25_07_phd_defense/assets/crps_learning/algos_changing.gif new file mode 100644 index 0000000..497b469 Binary files /dev/null and b/25_07_phd_defense/assets/crps_learning/algos_changing.gif differ diff --git a/25_07_phd_defense/assets/crps_learning/algos_constant.gif b/25_07_phd_defense/assets/crps_learning/algos_constant.gif new file mode 100644 index 0000000..0f535a5 Binary files /dev/null and b/25_07_phd_defense/assets/crps_learning/algos_constant.gif differ diff --git a/25_07_phd_defense/assets/crps_learning/pre_vs_post.gif b/25_07_phd_defense/assets/crps_learning/pre_vs_post.gif new file mode 100644 index 0000000..e6d09c8 Binary files /dev/null and b/25_07_phd_defense/assets/crps_learning/pre_vs_post.gif differ diff --git a/25_07_phd_defense/assets/crps_learning/pre_vs_post_lambda.gif b/25_07_phd_defense/assets/crps_learning/pre_vs_post_lambda.gif new file mode 100644 index 0000000..10a3715 Binary files /dev/null and b/25_07_phd_defense/assets/crps_learning/pre_vs_post_lambda.gif differ diff --git a/25_07_phd_defense/assets/crps_learning/uneven_grid.gif b/25_07_phd_defense/assets/crps_learning/uneven_grid.gif new file mode 100644 index 0000000..7f1e566 Binary files /dev/null and b/25_07_phd_defense/assets/crps_learning/uneven_grid.gif differ diff --git a/25_07_phd_defense/assets/crps_learning/weights_lambda.gif b/25_07_phd_defense/assets/crps_learning/weights_lambda.gif new file mode 100644 index 0000000..14a90ea Binary files /dev/null and b/25_07_phd_defense/assets/crps_learning/weights_lambda.gif differ diff --git a/25_07_phd_defense/logos_combined.xcf b/25_07_phd_defense/assets/logos_combined.xcf similarity index 100% rename from 25_07_phd_defense/logos_combined.xcf rename to 25_07_phd_defense/assets/logos_combined.xcf diff --git a/25_07_phd_defense/index.qmd b/25_07_phd_defense/index.qmd index bab3782..315860c 100644 --- a/25_07_phd_defense/index.qmd +++ b/25_07_phd_defense/index.qmd @@ -1,18 +1,18 @@ --- title: "Data Science Methods for Forecasting in Energy and Economics" date: 2025-07-10 -author: +author: - name: Jonathan Berrisch affiliations: - ref: hemf -affiliations: +affiliations: - id: hemf name: University of Duisburg-Essen, House of Energy Markets and Finance -format: +format: revealjs: embed-resources: true footer: "" - logo: logos_combined.png + logo: assets/logos_combined.png theme: [default, clean.scss] smaller: true fig-format: svg @@ -21,6 +21,10 @@ execute: highlight-style: github --- + + ## Outline ::: {.hidden} @@ -103,8 +107,6 @@ xaringanExtra::use_freezeframe(responsive = TRUE) --- -class: center, middle, sydney-blue - # Motivation name: motivation @@ -334,11 +336,11 @@ The cumulative regret: Popular loss functions for point forecasting `r Citet(my_bib, "gneiting2011making")`: .pull-left[ - $\ell_2$-loss $\ell_2(x, y) = | x -y|^2$ - - optimal for mean prediction + - optimal for mean prediction ] .pull-right[ -- $\ell_1$-loss $\ell_1(x, y) = | x -y|$ - - optimal for median predictions +- $\ell_1$-loss $\ell_1(x, y) = | x -y|$ + - optimal for median predictions ] @@ -347,7 +349,7 @@ Popular loss functions for point forecasting `r Citet(my_bib, "gneiting2011makin ::: {.column width="48%"} - $\ell_2$-loss $\ell_2(x, y) = | x -y|^2$ - - optimal for mean prediction + - optimal for mean prediction ::: @@ -357,8 +359,8 @@ Popular loss functions for point forecasting `r Citet(my_bib, "gneiting2011makin ::: {.column width="48%"} -- $\ell_1$-loss $\ell_1(x, y) = | x -y|$ - - optimal for median predictions +- $\ell_1$-loss $\ell_1(x, y) = | x -y|$ + - optimal for median predictions ::: @@ -494,7 +496,7 @@ We apply Bernstein Online Aggregation (BOA). It lets us weaken the exp-concavity It's strictly proper `r Citet(my_bib, "gneiting2007strictly")`. -Using the CRPS, we can calculate time-adaptive weight $w_{t,k}$. However, what if the experts' performance is not uniform over all parts of the distribution? +Using the CRPS, we can calculate time-adaptive weight $w_{t,k}$. However, what if the experts' performance is not uniform over all parts of the distribution? The idea: utilize this relation: @@ -550,7 +552,7 @@ The same algorithm satisfies that there exist a $C>0$ such that for $x>0$ it hol \label{eq_boa_opt_select} \end{equation} -if $Y_t$ is bounded, the considered loss $\ell$ is convex $G$-Lipschitz and weak exp-concave in its first coordinate. +if $Y_t$ is bounded, the considered loss $\ell$ is convex $G$-Lipschitz and weak exp-concave in its first coordinate. This is for losses that satisfy **A1** and **A2**. @@ -601,7 +603,7 @@ for all $x_1,x_2 \in \mathbb{R}$ and $t>0$ that Pointwise can outperform constant procedures -QL is convex but not exp-concave: +QL is convex but not exp-concave: `r fontawesome::fa("arrow-right")` Almost optimal convergence w.r.t. *convex aggregation* \eqref{eq_boa_opt_conv} `r fontawesome::fa("check", fill ="#00b02f")`
@@ -655,8 +657,8 @@ The gradient based fully adaptive Bernstein online aggregation (BOAG) applied po $$\widehat{\mathcal{R}}_{t,\pi} = 2\overline{\widehat{\mathcal{R}}}^{\text{QL}}_{t,\pi}.$$ -If $Y_t|\mathcal{F}_{t-1}$ is bounded -and has a pdf $f_t$ satifying $f_t>\gamma >0$ on its +If $Y_t|\mathcal{F}_{t-1}$ is bounded +and has a pdf $f_t$ satifying $f_t>\gamma >0$ on its support $\text{spt}(f_t)$ then \ref{eq_boa_opt_select} holds with $\beta=1$ and $$\widehat{\mathcal{R}}_{t,\min} = 2\overline{\widehat{\mathcal{R}}}^{\text{QL}}_{t,\min}$$. @@ -698,15 +700,785 @@ Simple Example: ::: {.column width="48%"} -foo +::: {.panel-tabset} + +## CDFs + +```{r, echo = FALSE, fig.width=7, fig.height=6, fig.align='center', cache = FALSE} +source("assets/01_common.R") +load("assets/crps_learning/01_motivation_01.RData") +ggplot(df, aes(x = x, y = y, xend = xend, yend = yend)) + + stat_function( + fun = pnorm, n = 10000, + args = list(mean = dev[2], sd = experts_sd[2]), + aes(col = "Expert 2"), size = 1.5 + ) + + stat_function( + fun = pnorm, n = 10000, + args = list(mean = dev[1], sd = experts_sd[1]), + aes(col = "Expert 1"), size = 1.5 + ) + + stat_function( + fun = pnorm, + n = 10000, + size = 1.5, aes(col = "DGP") # , linetype = "dashed" + ) + + geom_point(aes(col = "ECDF"), size = 1.5, show.legend = FALSE) + + geom_segment(aes(col = "ECDF")) + + geom_segment(data = tibble( + x_ = -5, + xend_ = min(y), + y_ = 0, + yend_ = 0 + ), aes(x = x_, xend = xend_, y = y_, yend = yend_)) + + theme_minimal() + + theme( + text = element_text(size = text_size), + legend.position = "bottom", + legend.key.width = unit(1.5, "cm") + ) + + ylab("Probability p") + + xlab("Value") + + scale_colour_manual(NULL, values = c("#969696", "#252525", col_auto, col_blue)) + + guides(color = guide_legend( + nrow = 2, + byrow = FALSE # , + # override.aes = list( + # size = c(1.5, 1.5, 1.5, 1.5) + # ) + )) + + scale_x_continuous(limits = c(-5, 7.5)) +``` + +## Weights + +```{r, echo = FALSE, fig.width=7, fig.height=6, fig.align='center', cache = FALSE} +source("assets/01_common.R") +load("assets/crps_learning/01_motivation_02.RData") +ggplot() + + geom_line(data = weights[weights$var != "1Optimum", ], size = 1.5, aes(x = prob, y = val, col = var)) + + geom_line( + data = weights[weights$var == "1Optimum", ], size = 1.5, aes(x = prob, y = val, col = var) # , linetype = "dashed" + ) + + theme_minimal() + + theme( + text = element_text(size = text_size), + legend.position = "bottom", + legend.key.width = unit(1.5, "cm") + ) + + xlab("Probability p") + + ylab("Weight w") + + scale_colour_manual( + NULL, + values = c("#969696", col_pointwise, col_p_constant, col_p_smooth), + labels = modnames[-c(3, 5)] + ) + + guides(color = guide_legend( + ncol = 3, + byrow = FALSE, + title.hjust = 5, + # override.aes = list( + # linetype = c(rep("solid", 5), "dashed") + # ) + )) + + ylim(c(0, 1)) +``` + +:::: + +::: + +::: + +## The Smoothing Procedure + +:::: {.columns} + +::: {.column width="48%"} + +We are using penalized cubic b-splines: + +Let $\varphi=(\varphi_1,\ldots, \varphi_L)$ be bounded basis functions on $(0,1)$ Then we approximate $w_{t,k}$ by + +\begin{align} +w_{t,k}^{\text{smooth}} = \sum_{l=1}^L \beta_l \varphi_l = \beta'\varphi +\end{align} + +with parameter vector $\beta$. The latter is estimated penalized $L_2$-smoothing which minimizes + +\begin{equation} + \| w_{t,k} - \beta' \varphi \|^2_2 + \lambda \| \mathcal{D}^{d} (\beta' \varphi) \|^2_2 + \label{eq_function_smooth} +\end{equation} + +with differential operator $\mathcal{D}$ + +Smoothing can be applied ex-post or inside of the algorithm ( `r fontawesome::fa("arrow-right", fill ="#000000")` [Simulation](#simulation)). + +::: + +::: {.column width="2%"} + +::: + +::: {.column width="48%"} + +We receive the constant solution for high values of $\lambda$ when setting $d=1$ + +
+ +
::: :::: +# The Proposed CRPS-Learning Algorithm + +--- + +## The Proposed CRPS-Learning Algorithm + +:::: {.columns} + +::: {.column width="48%"} + +**Initialization:** + +Array of expert predicitons: $\widehat{X}_{t,k,p}$ + +Vector of Prediction targets: $Y_t$ + +Starting Weights: $w_0=(w_{0,1},\ldots, w_{0,K})$, + +Penalization parameter: $\lambda\geq 0$ + +B-spline and penalty matrices $B$ and $D$ on $\mathcal{P}= (p_1,\ldots,p_M)$ + +Hat matrix: $$\mathcal{H} = B(B'B+ \lambda D'D)^{-1} B'$$ + +Cumulative Regret: $R_{0,k} = 0$ + +Range parameter: $E_{0,k}=0$ + +::: + +::: {.column width="2%"} + +::: + +::: {.column width="48%"} + +**Core**: + +for(t in 1:T) { for(p in $\mathcal{P}$) { + +     $\widetilde{X}_{t,k}(p) = \sum_{k=1}^K w_{t-1,k,p} \widehat{X}_{t,k}(p)$ .grey[\# Prediction] + +     for(k in 1:K){ + +       $r_{t,k,p} = \text{QL}_p^{\nabla}(\widehat{X}_{t,k}(p),Y_t) - \text{QL}_p^{\nabla}(\widetilde{X}_{t}(p),Y_t)$ + +       $E_{t,k,p} = \max(E_{t-1,k,p}, |r_{t,k,p}|)$ + +       $\eta_{t,k,p}=\min\left(1/2E_{t,k,p}, \sqrt{\log(K)/ \sum_{i=1}^t (r^2_{i, k,p})}\right)$ + +       $R_{t,k,p} = R_{t-1,k,p} + \frac{1}{2} \left( r_{t,k,p} \left( 1+ \eta_{t,k,p} r_{t,k,p} \right) + 2E_{t,k,p} \mathbb{1}(\eta_{t,k,p}r_{t,k,p} > \frac{1}{2}) \right)$ + +       $w_{t,k,p} = \eta_{t,k,p} \exp \left(- \eta_{t,k,p} R_{t,k,p} \right) w_{0,k,p} / \left( \frac{1}{K} \sum_{k = 1}^K \eta_{t,k,p} \exp \left( - \eta_{t,k,p} R_{t,k,p}\right) \right)$ + +   }.grey[\#k]}.grey[\#p] + +   for(k in 1:K){ + +     $w_{t,k} = \mathcal{H} w_{t,k}(\mathcal{P})$ .grey[\# Smoothing] + +} .grey[\#k]} .grey[\#t] + +::: + +:::: + +## Simulation Study + +:::: {.columns} + +::: {.column width="48%"} + +Data Generating Process of the [simple probabilistic example](#simple_example) + +- Constant solution $\lambda \rightarrow \infty$ +- Pointwise Solution of the proposed BOAG +- Smoothed Solution of the proposed BOAG + - Weights are smoothed during learning + - Smooth weights are used to calculate Regret, adjust weights, etc. +- Smooth ex-post solution + - Weights are smoothed after the learning + - Algorithm always uses non-smoothed weights + +::: + +::: {.column width="2%"} + +::: + +::: {.column width="48%"} + +::: {.panel-tabset} + +## QL Deviation + +![](assets/crps_learning/pre_vs_post.gif) + +## CRPS vs. Lambda + +CRPS Values for different $\lambda$ (1000 runs) + +![](assets/crps_learning/pre_vs_post_lambda.gif) + +:::: + +::: + +:::: + +## Simulation Study + +The same simulation carried out for different algorithms (1000 runs): + +
+ +
+ +## Simulation Study + +:::: {.columns} + +::: {.column width="48%"} + +**New DGP:** + +\begin{align} + Y_t & \sim \mathcal{N}\left(\frac{\sin(0.005 \pi t )}{2},\,1\right) \\ + \widehat{X}_{t,1} & \sim \widehat{F}_{1} = \mathcal{N}(-1,\,1) \\ + \widehat{X}_{t,2} & \sim \widehat{F}_{2} = \mathcal{N}(3,\,4) \label{eq_dgp_sim2} +\end{align} + +`r fontawesome::fa("arrow-right", fill ="#000000")` Changing optimal weights + +`r fontawesome::fa("arrow-right", fill ="#000000")` Single run example depicted aside + +`r fontawesome::fa("arrow-right", fill ="#000000")` No forgetting leads to long-term constant weights + +::: + +::: {.column width="2%"} + +::: + +::: {.column width="48%"} + +**Weights of expert 2** + +```{r, echo = FALSE, fig.width=7, fig.height=5, fig.align='center', cache = FALSE} +load("assets/crps_learning/changing_weights.rds") +mod_labs <- c("Optimum", "Pointwise", "Smooth", "Constant") +names(mod_labs) <- c("TOptimum", "Pointwise", "Smooth", "Constant") +colseq <- c(grey(.99), "orange", "red", "purple", "blue", "darkblue", "black") +weights_preprocessed %>% + mutate(w = 1 - w) %>% + ggplot(aes(t, p, fill = w)) + + geom_raster(interpolate = TRUE) + + facet_grid(Mod ~ ., labeller = labeller(Mod = mod_labs)) + + theme_minimal() + + theme( + # plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm"), + text = element_text(size = 15), + legend.key.height = unit(1, "inch") + ) + + scale_x_continuous(expand = c(0, 0)) + + xlab("Time t") + + scale_fill_gradientn( + limits = c(0, 1), + colours = colseq, + breaks = seq(0, 1, 0.2) + ) + + ylab("Weight w") +``` + +::: + +:::: + +## Simulation Results + +The simulation using the new DGP carried out for different algorithms (1000 runs): + +
+ +
+ +## Possible Extensions + +:::: {.columns} + +::: {.column width="48%"} + +**Forgetting** + +- Only taking part of the old cumulative regret into account +- Exponential forgetting of past regret + +\begin{align*} + R_{t,k} & = R_{t-1,k}(1-\xi) + \ell(\widetilde{F}_{t},Y_i) - \ell(\widehat{F}_{t,k},Y_i) \label{eq_regret_forget} +\end{align*} + +**Fixed Shares** `r Citet(my_bib, "herbster1998tracking")` + + - Adding fixed shares to the weights + - Shrinkage towards a constant solution + +\begin{align*} + \widetilde{w}_{t,k} = \rho \frac{1}{K} + (1-\rho) w_{t,k} + \label{fixed_share_simple}. +\end{align*} + +::: + +::: {.column width="2%"} + +::: + +::: {.column width="48%"} + +**Non-Equidistant Knots** + +- Non-equidistant spline-basis could be used +- Potentially improves the tail-behavior +- Destroys shrinkage towards constant + +
+ +
+ +::: + +:::: + +## Application Study: Overview + +:::: {.columns} + +::: {.column width="29%"} + +Data: + +- Forecasting European emission allowances (EUA) +- Daily month-ahead prices +- Jan 13 - Dec 20 (Phase III, 2092 Obs) + +Combination methods: + +- Naive, BOAG, EWAG, ML-PolyG, BMA + +Tuning paramter grids: + +- Smoothing Penalty: $\Lambda= \{0\}\cup \{2^x|x\in \{-4,-3.5,\ldots,12\}\}$ +- Learning Rates: $\mathcal{E}= \{2^x|x\in \{-1,-0.5,\ldots,9\}\}$ + +::: + +::: {.column width="2%"} + +::: + +::: {.column width="69%"} + +```{r, echo = FALSE, fig.width=7, fig.height=5, fig.align='center', cache = FALSE} +load("assets/crps_learning/overview_data.rds") + +data %>% + ggplot(aes(x = Date, y = value)) + + geom_line(size = 1, col = col_blue) + + theme_minimal() + + ylab("Value") + + facet_wrap(. ~ name, scales = "free", ncol = 1) + + theme( + text = element_text(size = 15), + strip.background = element_blank(), + strip.text.x = element_blank() + ) -> p1 + +data %>% + ggplot(aes(x = value)) + + geom_histogram(aes(y = ..density..), size = 1, fill = col_blue, bins = 50) + + ylab("Density") + + xlab("Value") + + theme_minimal() + + theme( + strip.background = element_rect(fill = col_lightgray, colour = col_lightgray), + text = element_text(size = 15) + ) + + facet_wrap(. ~ name, scales = "free", ncol = 1, strip.position = "right") -> p2 + +overview <- cowplot::plot_grid(plotlist = list(p1, p2), align = "hv", axis = "tblr", rel_widths = c(0.65, 0.35)) +overview +``` + +::: + +:::: + +## Application Study: Experts + +Simple exponential smoothing with additive errors (**ETS-ANN**): + +\begin{align*} +Y_{t} = l_{t-1} + \varepsilon_t \quad \text{with} \quad l_t = l_{t-1} + \alpha \varepsilon_t \quad \text{and} \quad \varepsilon_t \sim \mathcal{N}(0,\sigma^2) +\end{align*} + +Quantile regression (**QuantReg**): For each $p \in \mathcal{P}$ we assume: + +\begin{align*} +F^{-1}_{Y_t}(p) = \beta_{p,0} + \beta_{p,1} Y_{t-1} + \beta_{p,2} |Y_{t-1}-Y_{t-2}| +\end{align*} + +ARIMA(1,0,1)-GARCH(1,1) with Gaussian errors (**ARMA-GARCH**): + +\begin{align*} +Y_{t} = \mu + \phi(Y_{t-1}-\mu) + \theta \varepsilon_{t-1} + \varepsilon_t \quad \text{with} \quad \varepsilon_t = \sigma_t Z, \quad \sigma_t^2 = \omega + \alpha \varepsilon_{t-1}^2 + \beta \sigma_{t-1}^2 \quad \text{and} \quad Z_t \sim \mathcal{N}(0,1) +\end{align*} + +ARIMA(0,1,0)-I-EGARCH(1,1) with Gaussian errors (**I-EGARCH**): + +\begin{align*} +Y_{t} = \mu + Y_{t-1} + \varepsilon_t \quad \text{with} \quad \varepsilon_t = \sigma_t Z, \quad \log(\sigma_t^2) = \omega + \alpha Z_{t-1}+ \gamma (|Z_{t-1}|-\mathbb{E}|Z_{t-1}|) + \beta \log(\sigma_{t-1}^2) \quad \text{and} \quad Z_t \sim \mathcal{N}(0,1) +\end{align*} + +ARIMA(0,1,0)-GARCH(1,1) with student-t errors (**I-GARCHt**): + +\begin{align*} +Y_{t} = \mu + Y_{t-1} + \varepsilon_t \quad \text{with} \quad \varepsilon_t = \sigma_t Z, \quad \sigma_t^2 = \omega + \alpha \varepsilon_{t-1}^2 + \beta \sigma_{t-1}^2 \quad \text{and} \quad Z_t \sim t(0,1, \nu) +\end{align*} +## Results + +::: {.panel-tabset} + +## Significance + +```{r, echo = FALSE, fig.width=7, fig.height=5.5, fig.align='center', cache = FALSE, results='asis'} +load("assets/crps_learning/bernstein_application_study_estimations+learnings_rev1.RData") + +quantile_loss <- function(X, y, tau) { + t(t(y - X) * tau) * (y - X > 0) + t(t(X - y) * (1 - tau)) * (y - X < 0) +} +QL <- FCSTN * NA +for (k in 1:dim(QL)[1]) { + QL[k, , ] <- quantile_loss(FCSTN[k, , ], as.numeric(yoos), Qgrid) +} + +## TABLE AREA + +KK <- length(mnames) +TTinit <- 1 ## without first, as all comb. are uniform +RQL <- apply(QL[1:KK, -c(1:TTinit), ], c(1, 3), mean) +dimnames(RQL) <- list(mnames, Qgrid) +RQLm <- apply(RQL, c(1), mean, na.rm = TRUE) +# sort(RQLm - RQLm[K + 1]) +## +qq <- apply(QL[1:KK, -c(1:TTinit), ], c(1, 2), mean) +# t.test(qq[K + 1, ] - qq[K + 3, ]) +# t.test(qq[K + 1, ] - qq[K + 4, ]) + + +library(xtable) +Pall <- numeric(KK) +for (i in 1:KK) Pall[i] <- t.test(qq[K + 1, ] - qq[i, ], alternative = "greater")$p.val + +Mall <- (RQLm - RQLm[K + 1]) * 10000 +Mout <- matrix(Mall[-c(1:(K + 3))], 5, 6) +dimnames(Mout) <- list(moname, mtname) + +Pallout <- format(round(Pall, 3), nsmall = 3) +Pallout[Pallout == "0.000"] <- "<.001" +Pallout[Pallout == "1.000"] <- ">.999" + +MO <- K +IDX <- c(1:K) +OUT <- t(Mall[IDX]) +OUT.num <- OUT +class(OUT.num) <- "numeric" + +xxx <- OUT.num +xxxx <- OUT +table <- OUT +table_col <- OUT +i.p <- 1 +for (i.p in 1:MO) { + xmax <- -min(Mall) * 5 # max(Mall) + xmin <- min(Mall) + cred <- rev(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, .8, .5)) # , .5,0,0,0,1,1,1) ## red + cgreen <- rev(c(.5, .5, .55, .6, .65, .7, .75, .8, .85, .9, .95, 1, 1, .9)) # , .5,0,1,1,1,0,0) ## green + cblue <- rev(c(.55, .5, .5, .5, .5, .5, .5, .5, .5, .5, .5, .5, .5, .5)) # , .5,1,1,0,0,0,1) ## blue + crange <- c(xmin, xmax) ## range + ## colors in plot: + fred <- round(approxfun(seq(crange[1], crange[2], length = length(cred)), cred)(pmin(xxx[, i.p], xmax)), 3) + fgreen <- round(approxfun(seq(crange[1], crange[2], length = length(cgreen)), cgreen)(pmin(xxx[, i.p], xmax)), 3) + fblue <- round(approxfun(seq(crange[1], crange[2], length = length(cblue)), cblue)(pmin(xxx[, i.p], xmax)), 3) + tmp <- format(round(xxx[, i.p], 3), nsmall = 3) + xxxx[, i.p] <- paste("\\cellcolor[rgb]{", fred, ",", fgreen, ",", fblue, "}", tmp, " {\\footnotesize (", Pallout[IDX[i.p]], ")}", sep = "") + table[, i.p] <- paste0(tmp, " (", Pallout[i.p], ")") + table_col[, i.p] <- rgb(fred, fgreen, fblue, maxColorValue = 1) +} # i.p + +table_out <- kbl(table, align = rep("c", ncol(table))) + +for (cols in 1:ncol(table)) { + table_out <- table_out %>% + column_spec(cols, background = table_col[, cols]) +} +table_out %>% + kable_material() +``` + +```{r, echo = FALSE, fig.width=7, fig.height=5.5, fig.align='center', cache = FALSE, results='asis'} +MO <- 6 +OUT <- Mout +OUT.num <- OUT +class(OUT.num) <- "numeric" + +xxx <- OUT.num +xxxx <- OUT +i.p <- 1 +table2 <- OUT +table_col2 <- OUT +for (i.p in 1:MO) { + xmax <- -min(Mall) * 5 # max(Mall) + xmin <- min(Mall) + cred <- rev(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, .8, .5)) # , .5,0,0,0,1,1,1) ## red + cgreen <- rev(c(.5, .5, .55, .6, .65, .7, .75, .8, .85, .9, .95, 1, 1, .9)) # , .5,0,1,1,1,0,0) ## green + cblue <- rev(c(.55, .5, .5, .5, .5, .5, .5, .5, .5, .5, .5, .5, .5, .5)) # , .5,1,1,0,0,0,1) ## blue + crange <- c(xmin, xmax) ## range + ## colors in plot: + fred <- round(approxfun(seq(crange[1], crange[2], length = length(cred)), cred)(pmin(xxx[, i.p], xmax)), 3) + fgreen <- round(approxfun(seq(crange[1], crange[2], length = length(cgreen)), cgreen)(pmin(xxx[, i.p], xmax)), 3) + fblue <- round(approxfun(seq(crange[1], crange[2], length = length(cblue)), cblue)(pmin(xxx[, i.p], xmax)), 3) + tmp <- format(round(xxx[, i.p], 3), nsmall = 3) + xxxx[, i.p] <- paste("\\cellcolor[rgb]{", fred, ",", fgreen, ",", fblue, "}", tmp, " {\\footnotesize (", Pallout[K + 3 + 5 * (i.p - 1) + 1:5], ")}", sep = "") + table2[, i.p] <- paste0(tmp, " (", Pallout[K + 3 + 5 * (i.p - 1) + 1:5], ")") + table_col2[, i.p] <- rgb(fred, fgreen, fblue, maxColorValue = 1) +} # i.p + +table_out2 <- kableExtra::kbl(table2, align = rep("c", ncol(table2))) + +for (cols in 1:ncol(table2)) { + table_out2 <- table_out2 %>% + column_spec(1 + cols, + background = table_col2[, cols] + ) +} + +table_out2 %>% + kable_material() %>% + column_spec(1, bold = T) +``` + +## QL + +```{r, echo = FALSE, fig.width=13, fig.height=5.5, fig.align='center', cache = FALSE} + +##### Performance across probabilities +M <- length(mnames) +Msel <- c(1:K, K + 1, K + 1 + 2 + 1:4 * 5 - 2) ## experts + naive + smooth +modnames <- mnames[Msel] + +tCOL <- c( + "#E6CC00", "#CC6600", "#E61A1A", "#99004D", "#F233BF", + "#666666", "#0000CC", "#1A80E6", "#1AE680", "#00CC00" +) + + +t(RQL) %>% + as_tibble() %>% + select(Naive) %>% + mutate(Naive = 0) %>% + mutate(p = 1:99 / 100) %>% + pivot_longer(-p, values_to = "Loss differences") -> dummy + +t(RQL) %>% + as_tibble() %>% + select(mnames[Msel]) %>% + mutate(p = 1:99 / 100) %>% + pivot_longer(!p & !Naive) %>% + mutate(`Loss differences` = value - Naive) %>% + select(-value, -Naive) %>% + rbind(dummy) %>% + mutate( + p = as.numeric(p), + name = stringr::str_replace(name, "-P-smooth", ""), + name = factor(name, levels = stringr::str_replace(mnames[Msel], "-P-smooth", ""), ordered = T), + `Loss differences` = `Loss differences` * 1000 + ) %>% + ggplot(aes(x = p, y = `Loss differences`, colour = name)) + + geom_line(linewidth = 1) + + theme_minimal() + + theme( + text = element_text(size = text_size), + legend.position = "bottom" + ) + + xlab("Probability p") + + scale_color_manual(NULL, values = tCOL) + + guides(colour = guide_legend(nrow = 2, byrow = TRUE)) +``` + +## Cumulative Loss Difference + +```{r, echo = FALSE, fig.width=13, fig.height=5.5, fig.align='center', cache = FALSE} +DQL <- t(apply(apply(QL[1:KK, -c(1:TTinit), ], c(1, 2), mean), 1, cumsum)) + +rownames(DQL) <- mnames + +t(DQL) %>% + as_tibble() %>% + select(Naive) %>% + mutate( + `Difference of cumulative loss` = 0, + Date = ytime[-c(1:(TT + TTinit + 1))], + name = "Naive" + ) %>% + select(-Naive) -> dummy + + +data <- t(DQL) %>% + as_tibble() %>% + select(mnames[Msel]) %>% + mutate(Date = ytime[-c(1:(TT + TTinit + 1))]) %>% + pivot_longer(!Date & !Naive) %>% + mutate(`Difference of cumulative loss` = value - Naive) %>% + select(-value, -Naive) %>% + rbind(dummy) %>% + mutate( + name = stringr::str_replace(name, "-P-smooth", ""), + name = factor(name, levels = stringr::str_replace(mnames[Msel], "-P-smooth", "")) + ) + +data %>% + ggplot(aes(x = Date, y = `Difference of cumulative loss`, colour = name)) + + geom_line(size = 1) + + theme_minimal() + + theme( + text = element_text(size = text_size), + legend.position = "bottom" + ) + + scale_color_manual(NULL, values = tCOL) + + guides(colour = guide_legend(nrow = 2, byrow = TRUE)) +``` + +## Weights (BOAG P-Smooth) + +```{r, echo = FALSE, fig.width=13, fig.height=5.5, fig.align='center', cache = FALSE} +load("assets/crps_learning/weights_data.RData") +weights_data %>% + ggplot(aes(Date, p, fill = w)) + + geom_raster(interpolate = TRUE) + + facet_grid(Mod ~ .) + + theme_minimal() + + theme( + plot.margin = unit(c(0.2, 0.2, 0.2, 0.2), "cm"), + text = element_text(size = text_size), + legend.key.height = unit(0.9, "inch") + ) + + ylab("p") + + scale_fill_gradientn( + limits = c(0, 1), + colours = colseq, + breaks = seq(0, 1, 0.2) + ) + + scale_x_date(expand = c(0, 0)) +``` + +## Weights (Last) + +```{r, echo = FALSE, fig.width=13, fig.height=5.5, fig.align='center', cache = FALSE} +load("assets/crps_learning/weights_example.RData") +weights %>% + ggplot(aes(x = p, y = weights, col = Model)) + + geom_line(size = 1.5) + + theme_minimal() + + theme( + plot.margin = unit(c(0.2, 0.3, 0.2, 0.2), "cm"), + text = element_text(size = text_size), + legend.position = "bottom", + legend.title = element_blank(), + panel.spacing = unit(1.5, "lines") + ) + + scale_color_manual(NULL, values = tCOL[1:K]) + + facet_grid(. ~ K) +``` + +:::: + +## Wrap-Up + +:::: {.columns} + +::: {.column width="48%"} + +Potential Downsides: +- Pointwise optimization can induce quantile crossing + - Can be solved by sorting the predictions + +Upsides: +- Pointwise learning outperforms the Naive solution significantly +- Online learning is much faster than batch methods +- Smoothing further improves the predictive performance +- Asymptotically not worse than the best convex combination + +::: + +::: {.column width="2%"} + +::: + +::: {.column width="48%"} + +Important: + +- The choice of the learning rate is crucial +- The loss function has to meet certain criteria + +The [`r fontawesome::fa("github")` profoc](https://profoc.berrisch.biz/) R Package: + +- Implements all algorithms discussed above +- Is written using RcppArmadillo `r fontawesome::fa("arrow-right", fill ="#000000")` its fast +- Accepts vectors for most parameters + - The best parameter combination is chosen online +- Implements + - Forgetting, Fixed Share + - Different loss functions + gradients + +::: + +:::: + +:::: {.notes} + +Execution Times: + +T = 5000 + +Opera: + +Ml-Poly > 157 ms +Boa > 212 ms + +Profoc: + +Ml-Poly > 17 +BOA > 16 ## Columns Template @@ -731,6 +1503,19 @@ foo :::: +## Paneltabset Template + +::: {.panel-tabset} + +## Baz + +Bar + +## Bam + +Foo + +:::: # References diff --git a/25_07_phd_defense/logos_combined.png b/25_07_phd_defense/logos_combined.png deleted file mode 100644 index a6b9b9b..0000000 Binary files a/25_07_phd_defense/logos_combined.png and /dev/null differ