diff --git a/.gitignore b/.gitignore index fa2357b..d70b31b 100644 --- a/.gitignore +++ b/.gitignore @@ -86,3 +86,4 @@ data/* # Ignore html files for now # TODO: Remove later *.html +25_07_phd_defense/index_cache/* diff --git a/25_07_phd_defense/index.qmd b/25_07_phd_defense/index.qmd index 2fdd145..97c29f0 100644 --- a/25_07_phd_defense/index.qmd +++ b/25_07_phd_defense/index.qmd @@ -22,23 +22,25 @@ execute: highlight-style: github --- +## Outline + -## Outline - ::: {.hidden} $$ \newcommand{\A}{{\mathbb A}} $$ ::: -
- ::: {style="font-size: 150%;"} -[{{< fa bars-staggered >}}]{style="color: #404040;"}   Introduction & Research Motivation +::: + +
+ +[{{< fa bars-staggered >}}]{style="color: #404040;"}   [Introduction & Research Motivation](#motivation) [{{< fa bars-staggered >}}]{style="color: #404040;"}   Overview of the Thesis @@ -50,10 +52,6 @@ $$ [{{< fa binoculars >}}]{style="color: #404040;"}   Contributions & Outlook -::: - -## PHD DeFence - ```{r, setup, include=FALSE} # Compile with: rmarkdown::render("crps_learning.Rmd") library(latex2exp) @@ -62,10 +60,10 @@ library(dplyr) library(tidyr) library(purrr) library(kableExtra) +library(RefManageR) knitr::opts_chunk$set( dev = "svglite" # Use svg figures ) -library(RefManageR) BibOptions( check.entries = TRUE, bib.style = "authoryear", @@ -84,31 +82,7 @@ col_constant <- "#dd9002" col_optimum <- "#666666" ``` -```{r xaringan-panelset, echo=FALSE} -xaringanExtra::use_panelset() -``` - -```{r xaringanExtra-freezeframe, echo=FALSE} -xaringanExtra::use_freezeframe(responsive = TRUE) -``` - -# Outline - -- [Motivation](#motivation) -- [The Framework of Prediction under Expert Advice](#pred_under_exp_advice) -- [The Continious Ranked Probability Scrore](#crps) -- [Optimality of (Pointwise) CRPS-Learning](#crps_optim) -- [A Simple Probabilistic Example](#simple_example) -- [The Proposed CRPS-Learning Algorithm](#proposed_algorithm) -- [Simulation Results](#simulation) -- [Possible Extensions](#extensions) -- [Application Study](#application) -- [Wrap-Up](#conclusion) -- [References](#references) - ---- - -# Motivation +# CRPS Learning ## Motivation @@ -141,7 +115,7 @@ The Idea: ## Time -```{r, echo = FALSE, fig.height=6} +```{r, echo = FALSE, fig.height=6, cache = TRUE} par(mfrow = c(3, 3), mar = c(2, 2, 2, 2)) set.seed(1) # Data @@ -207,7 +181,7 @@ arrows(13, 0.75, 15, 1, , lwd = 4, bty = "n") ## Distribution -```{r, echo = FALSE, fig.height=6} +```{r, echo = FALSE, fig.height=6, cache = TRUE} par(mfrow = c(3, 3), mar = c(2, 2, 2, 2)) set.seed(1) # Data @@ -277,8 +251,6 @@ plot(rowSums(X * w), lwd = 4, type = "l", xlab = "", ylab = "", xaxt = "n", yaxt :::: -# The Framework of Prediction under Expert Advice - ## The Framework of Prediction under Expert Advice ### The sequential framework @@ -323,32 +295,26 @@ Weights are updated sequentially according to the past performance of the $K$ ex That is, a loss function $\ell$ is needed. This is used to compute the **cumulative regret** $R_{t,k}$ -\begin{equation} - R_{t,k} = \widetilde{L}_{t} - \widehat{L}_{t,k} = \sum_{i = 1}^t \ell(\widetilde{X}_{i},Y_i) - \ell(\widehat{X}_{i,k},Y_i) - \label{eq_regret} -\end{equation} +$$ +R_{t,k} = \widetilde{L}_{t} - \widehat{L}_{t,k} = \sum_{i = 1}^t \ell(\widetilde{X}_{i},Y_i) - \ell(\widehat{X}_{i,k},Y_i) +$${#eq-regret} The cumulative regret: + - Indicates the predictive accuracy of the expert $k$ until time $t$. - Measures how much the forecaster *regrets* not having followed the expert's advice Popular loss functions for point forecasting `r Citet(my_bib, "gneiting2011making")`: -.pull-left[ -- $\ell_2$-loss $\ell_2(x, y) = | x -y|^2$ - - optimal for mean prediction -] -.pull-right[ -- $\ell_1$-loss $\ell_1(x, y) = | x -y|$ - - optimal for median predictions -] - :::: {.columns} ::: {.column width="48%"} -- $\ell_2$-loss $\ell_2(x, y) = | x -y|^2$ - - optimal for mean prediction +$\ell_2$ loss: + +$$\ell_2(x, y) = | x -y|^2$${#eq-elltwo} + +Strictly proper for *mean* prediction ::: @@ -358,8 +324,11 @@ Popular loss functions for point forecasting `r Citet(my_bib, "gneiting2011makin ::: {.column width="48%"} -- $\ell_1$-loss $\ell_1(x, y) = | x -y|$ - - optimal for median predictions +$\ell_1$ loss: + +$$\ell_1(x, y) = | x -y|$${#eq-ellone} + +Strictly proper for *median* predictions ::: @@ -370,9 +339,9 @@ Popular loss functions for point forecasting `r Citet(my_bib, "gneiting2011makin #### The naive combination -\begin{equation} - w_{t,k}^{\text{Naive}} = \frac{1}{K} -\end{equation} +$$ +w_{t,k}^{\text{Naive}} = \frac{1}{K} +$${#eq-wtk_naive} #### The exponentially weighted average forecaster (EWA) @@ -703,7 +672,7 @@ Simple Example: ## CDFs -```{r, echo = FALSE, fig.width=7, fig.height=6, fig.align='center', cache = FALSE} +```{r, echo = FALSE, fig.width=7, fig.height=6, fig.align='center', cache = TRUE} source("assets/01_common.R") load("assets/crps_learning/01_motivation_01.RData") ggplot(df, aes(x = x, y = y, xend = xend, yend = yend)) + @@ -751,7 +720,7 @@ ggplot(df, aes(x = x, y = y, xend = xend, yend = yend)) + ## Weights -```{r, echo = FALSE, fig.width=7, fig.height=6, fig.align='center', cache = FALSE} +```{r, echo = FALSE, fig.width=7, fig.height=6, fig.align='center', cache = TRUE} source("assets/01_common.R") load("assets/crps_learning/01_motivation_02.RData") ggplot() + @@ -832,9 +801,6 @@ We receive the constant solution for high values of $\lambda$ when setting $d=1$ :::: - -# The Proposed CRPS-Learning Algorithm - --- ## The Proposed CRPS-Learning Algorithm @@ -980,7 +946,7 @@ The same simulation carried out for different algorithms (1000 runs): **Weights of expert 2** -```{r, echo = FALSE, fig.width=7, fig.height=5, fig.align='center', cache = FALSE} +```{r, echo = FALSE, fig.width=7, fig.height=5, fig.align='center', cache = TRUE} load("assets/crps_learning/changing_weights.rds") mod_labs <- c("Optimum", "Pointwise", "Smooth", "Constant") names(mod_labs) <- c("TOptimum", "Pointwise", "Smooth", "Constant") @@ -1094,7 +1060,7 @@ Tuning paramter grids: ::: {.column width="69%"} -```{r, echo = FALSE, fig.width=7, fig.height=5, fig.align='center', cache = FALSE} +```{r, echo = FALSE, fig.width=7, fig.height=5, fig.align='center', cache = TRUE} load("assets/crps_learning/overview_data.rds") data %>% @@ -1168,7 +1134,7 @@ Y_{t} = \mu + Y_{t-1} + \varepsilon_t \quad \text{with} \quad \varepsilon_t = \ ## Significance -```{r, echo = FALSE, fig.width=7, fig.height=5.5, fig.align='center', cache = FALSE, results='asis'} +```{r, echo = FALSE, fig.width=7, fig.height=5.5, fig.align='center', cache = TRUE, results='asis'} load("assets/crps_learning/bernstein_application_study_estimations+learnings_rev1.RData") quantile_loss <- function(X, y, tau) { @@ -1243,7 +1209,7 @@ for (j in 1:ncol(table)) { table_out ``` -```{r, echo = FALSE, fig.width=7, fig.height=5.5, fig.align='center', cache = FALSE, results='asis'} +```{r, echo = FALSE, fig.width=7, fig.height=5.5, fig.align='center', cache = TRUE, results='asis'} MO <- 6 OUT <- Mout OUT.num <- OUT @@ -1287,7 +1253,7 @@ table_out2 %>% ## QL -```{r, echo = FALSE, fig.width=13, fig.height=5.5, fig.align='center', cache = FALSE} +```{r, echo = FALSE, fig.width=13, fig.height=5.5, fig.align='center', cache = TRUE} ##### Performance across probabilities M <- length(mnames) @@ -1335,7 +1301,7 @@ t(RQL) %>% ## Cumulative Loss Difference -```{r, echo = FALSE, fig.width=13, fig.height=5.5, fig.align='center', cache = FALSE} +```{r, echo = FALSE, fig.width=13, fig.height=5.5, fig.align='center', cache = TRUE} DQL <- t(apply(apply(QL[1:KK, -c(1:TTinit), ], c(1, 2), mean), 1, cumsum)) rownames(DQL) <- mnames @@ -1378,7 +1344,7 @@ data %>% ## Weights (BOAG P-Smooth) -```{r, echo = FALSE, fig.width=13, fig.height=5.5, fig.align='center', cache = FALSE} +```{r, echo = FALSE, fig.width=13, fig.height=5.5, fig.align='center', cache = TRUE} load("assets/crps_learning/weights_data.RData") weights_data %>% ggplot(aes(Date, p, fill = w)) + @@ -1401,7 +1367,7 @@ weights_data %>% ## Weights (Last) -```{r, echo = FALSE, fig.width=13, fig.height=5.5, fig.align='center', cache = FALSE} +```{r, echo = FALSE, fig.width=13, fig.height=5.5, fig.align='center', cache = TRUE} load("assets/crps_learning/weights_example.RData") weights %>% ggplot(aes(x = p, y = weights, col = Model)) + @@ -1726,7 +1692,7 @@ Computation is easy since we have an analytical solution. ::: {.column width="48%"} -```{r, fig.align="center", echo=FALSE, out.width = "1000px"} +```{r, fig.align="center", echo=FALSE, out.width = "1000px", cache = TRUE} knitr::include_graphics("assets/mcrps_learning/algorithm.svg") ``` @@ -1791,19 +1757,19 @@ Computation Time: ~30 Minutes ## Constant -```{r, fig.align="center", echo=FALSE, out.width = "400"} +```{r, fig.align="center", echo=FALSE, out.width = "400", cache = TRUE} knitr::include_graphics("assets/mcrps_learning/constant.svg") ``` ## Constant PR -```{r, fig.align="center", echo=FALSE, out.width = "400"} +```{r, fig.align="center", echo=FALSE, out.width = "400", cache = TRUE} knitr::include_graphics("assets/mcrps_learning/constant_pr.svg") ``` ## Constant MV -```{r, fig.align="center", echo=FALSE, out.width = "400"} +```{r, fig.align="center", echo=FALSE, out.width = "400", cache = TRUE} knitr::include_graphics("assets/mcrps_learning/constant_mv.svg") ``` @@ -1821,13 +1787,13 @@ knitr::include_graphics("assets/mcrps_learning/constant_mv.svg") ## Pointwise -```{r, fig.align="center", echo=FALSE, out.width = "400"} +```{r, fig.align="center", echo=FALSE, out.width = "400", cache = TRUE} knitr::include_graphics("assets/mcrps_learning/pointwise.svg") ``` ## Smooth -```{r, fig.align="center", echo=FALSE, out.width = "400"} +```{r, fig.align="center", echo=FALSE, out.width = "400", cache = TRUE} knitr::include_graphics("assets/mcrps_learning/smooth_best.svg") ``` @@ -1843,7 +1809,7 @@ knitr::include_graphics("assets/mcrps_learning/smooth_best.svg") ::: {.column width="55%"} -```{r} +```{r, cache = TRUE} load("assets/mcrps_learning/naive_table_df.rds") table_naive <- naive_table_df %>% @@ -1987,7 +1953,7 @@ Foo ## Results -```{r, warning=FALSE, fig.align="center", echo=FALSE, fig.width=12, fig.height=6} +```{r, warning=FALSE, fig.align="center", echo=FALSE, fig.width=12, fig.height=6, cache = TRUE} load("assets/mcrps_learning/pars_data.rds") pars_data %>% ggplot(aes(x = dates, y = value)) + @@ -2026,7 +1992,7 @@ pars_data %>% ## Results: Hour 16:00-17:00 -```{r, fig.align="center", echo=FALSE, fig.width=12, fig.height=6} +```{r, fig.align="center", echo=FALSE, fig.width=12, fig.height=6, cache = TRUE} load("assets/mcrps_learning/weights_h.rds") weights_h %>% ggplot(aes(date, q, fill = weight)) + @@ -2066,7 +2032,7 @@ weights_h %>% ## Results: Median -```{r, fig.align="center", echo=FALSE, fig.width=12, fig.height=6} +```{r, fig.align="center", echo=FALSE, fig.width=12, fig.height=6, cache = TRUE} load("assets/mcrps_learning/weights_q.rds") weights_q %>% mutate(hour = as.numeric(hour) - 1) %>% @@ -2157,7 +2123,7 @@ We use `Rcpp` modules to expose a class to R Basis specification `b_smooth_pr` is internally passed to `make_basis_mats()`: -```{r, echo = TRUE, eval = FALSE, cache = FALSE} +```{r, echo = TRUE, eval = FALSE, cache = TRUE} mod <- online( y = Y, experts = experts, @@ -2237,6 +2203,8 @@ Get these slides: # Modeling Volatility and Dependence of European Carbon and Energy Prices +TODO: Add Reference + --- ## Motivation @@ -2296,7 +2264,7 @@ How can the dynamics be characterized? ## Data -```{r, echo=FALSE, fig.width = 12, fig.height = 6, fig.align="center"} +```{r, echo=FALSE, fig.width = 12, fig.height = 6, fig.align="center", cache = TRUE} readr::read_csv("assets/voldep/2022_10_14_eur_ref_co2_adj_hvpi_ex_nrg.csv") %>% select(-EUR_USD, -hvpi_x_nrg) %>% pivot_longer(-Date) %>% @@ -2573,7 +2541,7 @@ Relative improvement in ES compared to $\text{RW}^{\sigma, \rho}$ Cellcolor: w.r.t test statistic of Diebold-Mariano test (testing wether the model outperformes the benchmark, greener = better). -```{r, echo=FALSE, results='asis'} +```{r, echo=FALSE, results='asis', cache = TRUE} load("assets/voldep/energy_df.Rdata") table_energy <- energy %>% kbl( @@ -2673,7 +2641,7 @@ table_energy %>% Improvement in CRPS of selected models relative to $\textrm{RW}^{\sigma, \rho}_{}$ in % (higher = better). Colored according to the test statistic of a DM-Test comparing to $\textrm{RW}^{\sigma, \rho}_{}$ (greener means lower test statistic i.e., better performance compared to $\textrm{RW}^{\sigma, \rho}_{}$). -```{r, echo=FALSE, results = 'asis'} +```{r, echo=FALSE, results = 'asis', cache = TRUE} load("assets/voldep/crps_df.Rdata") table_crps <- crps %>% @@ -2752,7 +2720,7 @@ Conclusion: the Improvements seen before must be attributed to other parts of th Improvement in RMSE score of selected models relative to $\textrm{RW}^{\sigma, \rho}_{}$ in % (higher = better). Colored according to the test statistic of a DM-Test comparing to $\textrm{RW}^{\sigma, \rho}_{}$ (greener means lower test statistic i.e., better performance compared to $\textrm{RW}^{\sigma, \rho}_{}$). -```{r, echo=FALSE, results = 'asis'} +```{r, echo=FALSE, results = 'asis', cache = TRUE} load("assets/voldep/rmsq_df.Rdata") table_rmsq <- rmsq %>% @@ -2802,7 +2770,7 @@ table_rmsq %>% ## Evolution of Linear Dependence $\Xi$ -```{r, echo=FALSE, fig.width = 12, fig.height = 6, fig.align="center"} +```{r, echo=FALSE, fig.width = 12, fig.height = 6, fig.align="center", cache = TRUE} load("assets/voldep/plot_rho_df.Rdata") ggplot() + geom_line( @@ -2887,7 +2855,7 @@ ggplot() + ## Predictive Quantiles (Russian Invasion) -```{r, echo=FALSE, fig.width = 12, fig.height = 6, fig.align="center"} +```{r, echo=FALSE, fig.width = 12, fig.height = 6, fig.align="center", cache = TRUE} load("assets/voldep/plot_quant_df.Rdata") plot_quant_data %>% ggplot(aes(x = date, y = value)) + @@ -2981,43 +2949,7 @@ Accounting for heteroscedasticity or stabilizing the variance via log transforma :::: -## Columns Template - -:::: {.columns} - -::: {.column width="48%"} - -Baz - -::: - -::: {.column width="2%"} - -::: - -::: {.column width="48%"} - -foo - -::: - -:::: - -## Paneltabset Template - -::: {.panel-tabset} - -## Baz - -Bar - -## Bam - -Foo - -:::: - -# References +## References ```{r refs1, echo=FALSE, results="asis"} PrintBibliography(my_bib, .opts = list(style = "text")) diff --git a/25_07_phd_defense/sydney.scss b/25_07_phd_defense/sydney.scss index c2b4078..a87e65a 100644 --- a/25_07_phd_defense/sydney.scss +++ b/25_07_phd_defense/sydney.scss @@ -1,7 +1,7 @@ // See https://quarto.org/docs/presentations/revealjs/themes.html#saas-variables $brand-red: #e64626; -$brand-blue: #0148A4; +$brand-blue: #fcfcfc; $brand-yellow: #FFB800; $brand-charcoal: #424242; $brand-gray: #F1F1F1;