From d91e8ea505ca68302b705f1f0e64d95763c5da86 Mon Sep 17 00:00:00 2001
From: Jonathan Berrisch <Jonathan@Berrisch.biz>
Date: Sat, 17 May 2025 11:04:30 +0200
Subject: [PATCH] Update main slides, add first half of CRPS learning slides

---
 25_07_phd_defense/assets/library.bib |  539 ++++++++++++
 25_07_phd_defense/index.qmd          | 1222 +++++++++++++-------------
 2 files changed, 1172 insertions(+), 589 deletions(-)
 create mode 100644 25_07_phd_defense/assets/library.bib

diff --git a/25_07_phd_defense/assets/library.bib b/25_07_phd_defense/assets/library.bib
new file mode 100644
index 0000000..98491d1
--- /dev/null
+++ b/25_07_phd_defense/assets/library.bib
@@ -0,0 +1,539 @@
+@article{aastveit2014nowcasting,
+  title     = {Nowcasting GDP in real time: A density combination approach},
+  author    = {Aastveit, Knut Are and Gerdrup, Karsten R and Jore, Anne Sofie and Thorsrud, Leif Anders},
+  journal   = {Journal of Business \& Economic Statistics},
+  volume    = {32},
+  number    = {1},
+  pages     = {48--68},
+  year      = {2014},
+  publisher = {Taylor \& Francis}
+}
+@incollection{aastveit2019evolution,
+  title     = {The Evolution of Forecast Density Combinations in Economics},
+  author    = {Aastveit, Knut Are and Mitchell, James and Ravazzolo, Francesco and van Dijk, Herman K},
+  booktitle = {Oxford Research Encyclopedia of Economics and Finance},
+  year      = {2019}
+}
+@article{atiya2020does,
+  title     = {Why does forecast combination work so well?},
+  author    = {Atiya, Amir F},
+  journal   = {International Journal of Forecasting},
+  volume    = {36},
+  number    = {1},
+  pages     = {197--200},
+  year      = {2020},
+  publisher = {Elsevier}
+}
+@article{atsalakis2016using,
+  title     = {Using computational intelligence to forecast carbon prices},
+  author    = {Atsalakis, George S},
+  journal   = {Applied Soft Computing},
+  volume    = {43},
+  pages     = {107--116},
+  year      = {2016},
+  publisher = {Elsevier}
+}
+@article{bai2020does,
+  title     = {Does crude oil futures price really help to predict spot oil price? New evidence from density forecasting},
+  author    = {Bai, Lan and Li, Xiafei and Wei, Yu and Wei, Guiwu},
+  journal   = {International Journal of Finance \& Economics},
+  year      = {2020},
+  publisher = {Wiley Online Library}
+}
+@article{benz2009modeling,
+  title     = {Modeling the price dynamics of CO2 emission allowances},
+  author    = {Benz, Eva and Tr{\"u}ck, Stefan},
+  journal   = {Energy Economics},
+  volume    = {31},
+  number    = {1},
+  pages     = {4--15},
+  year      = {2009},
+  publisher = {Elsevier}
+}
+@article{biau2011sequential,
+  title     = {Sequential quantile prediction of time series},
+  author    = {Biau, G{\'e}rard and Patra, Beno{\^\i}t},
+  journal   = {IEEE Transactions on Information Theory},
+  volume    = {57},
+  number    = {3},
+  pages     = {1664--1674},
+  year      = {2011},
+  publisher = {IEEE}
+}
+@inproceedings{bousquet2001tracking,
+  title        = {Tracking a small set of experts by mixing past posteriors},
+  author       = {Bousquet, Olivier and Warmuth, Manfred K},
+  booktitle    = {International Conference on Computational Learning Theory},
+  pages        = {31--47},
+  year         = {2001},
+  organization = {Springer}
+}
+@article{bregere2020online,
+  title   = {Online hierarchical forecasting for power consumption data},
+  author  = {Br{\'e}g{\`e}re, Margaux and Huard, Malo},
+  journal = {arXiv preprint arXiv:2003.00585},
+  year    = {2020}
+}
+@article{busetti2017quantile,
+  title     = {Quantile aggregation of density forecasts},
+  author    = {Busetti, Fabio},
+  journal   = {Oxford Bulletin of Economics and Statistics},
+  volume    = {79},
+  number    = {4},
+  pages     = {495--512},
+  year      = {2017},
+  publisher = {Wiley Online Library}
+}
+@book{cesa2006prediction,
+  title     = {Prediction, learning, and games},
+  author    = {Cesa-Bianchi, Nicolo and Lugosi, G{\'a}bor},
+  year      = {2006},
+  publisher = {Cambridge university press}
+}
+@article{cesa2012mirror,
+  title   = {Mirror descent meets fixed share (and feels no regret)},
+  author  = {Cesa-Bianchi, Nicolo and Gaillard, Pierre and Lugosi, G{\'a}bor and Stoltz, Gilles},
+  journal = {Advances in Neural Information Processing Systems},
+  volume  = {25},
+  pages   = {980--988},
+  year    = {2012}
+}
+@article{cheng2015forecasting,
+  title     = {Forecasting with factor-augmented regression: A frequentist model averaging approach},
+  author    = {Cheng, Xu and Hansen, Bruce E},
+  journal   = {Journal of Econometrics},
+  volume    = {186},
+  number    = {2},
+  pages     = {280--293},
+  year      = {2015},
+  publisher = {Elsevier}
+}
+@article{chernozhukov2010quantile,
+  title     = {Quantile and probability curves without crossing},
+  author    = {Chernozhukov, Victor and Fern{\'a}ndez-Val, Iv{\'a}n and Galichon, Alfred},
+  journal   = {Econometrica},
+  volume    = {78},
+  number    = {3},
+  pages     = {1093--1125},
+  year      = {2010},
+  publisher = {Wiley Online Library}
+}
+@article{devaine2013forecasting,
+  title     = {Forecasting electricity consumption by aggregating specialized experts},
+  author    = {Devaine, Marie and Gaillard, Pierre and Goude, Yannig and Stoltz, Gilles},
+  journal   = {Machine Learning},
+  volume    = {90},
+  number    = {2},
+  pages     = {231--260},
+  year      = {2013},
+  publisher = {Springer}
+}
+@article{dutta2018modeling,
+  title     = {Modeling and forecasting the volatility of carbon emission market: The role of outliers, time-varying jumps and oil price risk},
+  author    = {Dutta, Anupam},
+  journal   = {Journal of Cleaner Production},
+  volume    = {172},
+  pages     = {2773--2781},
+  year      = {2018},
+  publisher = {Elsevier}
+}
+@article{eddelbuettel2014rcpparmadillo,
+  title     = {RcppArmadillo: Accelerating R with high-performance C++ linear algebra},
+  author    = {Eddelbuettel, Dirk and Sanderson, Conrad},
+  journal   = {Computational Statistics \& Data Analysis},
+  volume    = {71},
+  pages     = {1054--1063},
+  year      = {2014},
+  publisher = {Elsevier}
+}
+@article{fragoso2018bayesian,
+  title     = {Bayesian model averaging: A systematic review and conceptual classification},
+  author    = {Fragoso, Tiago M and Bertoli, Wesley and Louzada, Francisco},
+  journal   = {International Statistical Review},
+  volume    = {86},
+  number    = {1},
+  pages     = {1--28},
+  year      = {2018},
+  publisher = {Wiley Online Library}
+}
+@inproceedings{gaillard2014second,
+  title        = {A second-order bound with excess losses},
+  author       = {Gaillard, Pierre and Stoltz, Gilles and Van Erven, Tim},
+  booktitle    = {Conference on Learning Theory},
+  pages        = {176--196},
+  year         = {2014},
+  organization = {PMLR}
+}
+@incollection{gaillard2015forecasting,
+  title     = {Forecasting electricity consumption by aggregating experts; how to design a good set of experts},
+  author    = {Gaillard, Pierre and Goude, Yannig},
+  booktitle = {Modeling and stochastic learning for forecasting in high dimensions},
+  pages     = {95--115},
+  year      = {2015},
+  publisher = {Springer}
+}
+@inproceedings{gaillard2018efficient,
+  title     = {Efficient online algorithms for fast-rate regret bounds under sparsity},
+  author    = {Gaillard, Pierre and Wintenberger, Olivier},
+  booktitle = {Advances in Neural Information Processing Systems},
+  pages     = {7026--7036},
+  year      = {2018}
+}
+@article{garcia2020short,
+  title   = {Short-term European Union Allowance price forecasting with artificial neural networks},
+  author  = {Garc{\'\i}a, Agust{\'\i}n and Jaramillo-Mor{\'a}n, Miguel A},
+  journal = {Entrepreneurship and Sustainability Issues},
+  volume  = {8},
+  number  = {1},
+  pages   = {261},
+  year    = {2020}
+}
+@article{gneiting2007strictly,
+  title     = {Strictly proper scoring rules, prediction, and estimation},
+  author    = {Gneiting, Tilmann and Raftery, Adrian E},
+  journal   = {Journal of the American statistical Association},
+  volume    = {102},
+  number    = {477},
+  pages     = {359--378},
+  year      = {2007},
+  publisher = {Taylor \& Francis}
+}
+@article{gneiting2011comparing,
+  title     = {Comparing density forecasts using threshold-and quantile-weighted scoring rules},
+  author    = {Gneiting, Tilmann and Ranjan, Roopesh},
+  journal   = {Journal of Business \& Economic Statistics},
+  volume    = {29},
+  number    = {3},
+  pages     = {411--422},
+  year      = {2011},
+  publisher = {Taylor \& Francis}
+}
+@article{gneiting2011making,
+  title     = {Making and evaluating point forecasts},
+  author    = {Gneiting, Tilmann},
+  journal   = {Journal of the American Statistical Association},
+  volume    = {106},
+  number    = {494},
+  pages     = {746--762},
+  year      = {2011},
+  publisher = {Taylor \& Francis}
+}
+@article{gneiting2011quantiles,
+  title     = {Quantiles as optimal point forecasts},
+  author    = {Gneiting, Tilmann},
+  journal   = {International Journal of forecasting},
+  volume    = {27},
+  number    = {2},
+  pages     = {197--207},
+  year      = {2011},
+  publisher = {Elsevier}
+}
+@article{hansen2008least,
+  title     = {Least-squares forecast averaging},
+  author    = {Hansen, Bruce E},
+  journal   = {Journal of Econometrics},
+  volume    = {146},
+  number    = {2},
+  pages     = {342--350},
+  year      = {2008},
+  publisher = {Elsevier}
+}
+@article{hao2020modelling,
+  title     = {Modelling of carbon price in two real carbon trading markets},
+  author    = {Hao, Yan and Tian, Chengshi and Wu, Chunying},
+  journal   = {Journal of Cleaner Production},
+  volume    = {244},
+  pages     = {118556},
+  year      = {2020},
+  publisher = {Elsevier}
+}
+@article{he1997quantile,
+  title     = {Quantile curves without crossing},
+  author    = {He, Xuming},
+  journal   = {The American Statistician},
+  volume    = {51},
+  number    = {2},
+  pages     = {186--192},
+  year      = {1997},
+  publisher = {Taylor \& Francis}
+}
+@article{herbster1998tracking,
+  title     = {Tracking the best expert},
+  author    = {Herbster, Mark and Warmuth, Manfred K},
+  journal   = {Machine learning},
+  volume    = {32},
+  number    = {2},
+  pages     = {151--178},
+  year      = {1998},
+  publisher = {Springer}
+}
+@article{hsiao2014there,
+  title     = {Is there an optimal forecast combination?},
+  author    = {Hsiao, Cheng and Wan, Shui Ki},
+  journal   = {Journal of Econometrics},
+  volume    = {178},
+  pages     = {294--309},
+  year      = {2014},
+  publisher = {Elsevier}
+}
+@book{hyndman2018forecasting,
+  title     = {Forecasting: principles and practice},
+  author    = {Hyndman, Rob J and Athanasopoulos, George},
+  year      = {2018},
+  publisher = {OTexts}
+}
+@article{jore2010combining,
+  title     = {Combining forecast densities from VARs with uncertain instabilities},
+  author    = {Jore, Anne Sofie and Mitchell, James and Vahey, Shaun P},
+  journal   = {Journal of Applied Econometrics},
+  volume    = {25},
+  number    = {4},
+  pages     = {621--634},
+  year      = {2010},
+  publisher = {Wiley Online Library}
+}
+@inproceedings{kakade2008generalization,
+  title     = {On the Generalization Ability of Online Strongly Convex Programming Algorithms.},
+  author    = {Kakade, Sham M and Tewari, Ambuj},
+  booktitle = {NIPS},
+  pages     = {801--808},
+  year      = {2008}
+}
+@article{kapetanios2015generalised,
+  title     = {Generalised density forecast combinations},
+  author    = {Kapetanios, G and Mitchell, James and Price, Simon and Fawcett, Nicholas},
+  journal   = {Journal of Econometrics},
+  volume    = {188},
+  number    = {1},
+  pages     = {150--165},
+  year      = {2015},
+  publisher = {Elsevier}
+}
+@inproceedings{koolen2015second,
+  title     = {Second-order quantile methods for experts and combinatorial games},
+  author    = {Koolen, Wouter M and Van Erven, Tim},
+  booktitle = {Conference on Learning Theory},
+  pages     = {1155--1175},
+  year      = {2015}
+}
+@article{koop2013forecasting,
+  title     = {Forecasting the European carbon market},
+  author    = {Koop, Gary and Tole, Lise},
+  journal   = {Journal of the Royal Statistical Society: Series A (Statistics in Society)},
+  volume    = {176},
+  number    = {3},
+  pages     = {723--741},
+  year      = {2013},
+  publisher = {Wiley Online Library}
+}
+@article{korotin2019integral,
+  title   = {Integral Mixabilty: a Tool for Efficient Online Aggregation of Functional and Probabilistic Forecasts},
+  author  = {Korotin, Alexander and V'yugin, Vladimir and Burnaev, Evgeny},
+  journal = {arXiv preprint arXiv:1912.07048},
+  year    = {2019}
+}
+@inproceedings{korotin2020mixing,
+  title        = {Mixing past predictions},
+  author       = {Korotin, Alexander and V’yugin, Vladimir and Burnaev, Evgeny},
+  booktitle    = {Conformal and Probabilistic Prediction and Applications},
+  pages        = {171--188},
+  year         = {2020},
+  organization = {PMLR}
+}
+@article{lichtendahl2013better,
+  title     = {Is it better to average probabilities or quantiles?},
+  author    = {Lichtendahl Jr, Kenneth C and Grushka-Cockayne, Yael and Winkler, Robert L},
+  journal   = {Management Science},
+  volume    = {59},
+  number    = {7},
+  pages     = {1594--1611},
+  year      = {2013},
+  publisher = {INFORMS}
+}
+@article{lin2018multi,
+  title     = {A multi-model combination approach for probabilistic wind power forecasting},
+  author    = {Lin, You and Yang, Ming and Wan, Can and Wang, Jianhui and Song, Yonghua},
+  journal   = {IEEE Transactions on Sustainable Energy},
+  volume    = {10},
+  number    = {1},
+  pages     = {226--237},
+  year      = {2018},
+  publisher = {IEEE}
+}
+@article{littlestone1994weighted,
+  title     = {The weighted majority algorithm},
+  author    = {Littlestone, Nick and Warmuth, Manfred K},
+  journal   = {Information and computation},
+  volume    = {108},
+  number    = {2},
+  pages     = {212--261},
+  year      = {1994},
+  publisher = {Elsevier}
+}
+@article{lu2015jackknife,
+  title     = {Jackknife model averaging for quantile regressions},
+  author    = {Lu, Xun and Su, Liangjun},
+  journal   = {Journal of Econometrics},
+  volume    = {188},
+  number    = {1},
+  pages     = {40--58},
+  year      = {2015},
+  publisher = {Elsevier}
+}
+@article{maciejowska2020pca,
+  title     = {PCA Forecast Averaging—Predicting Day-Ahead and Intraday Electricity Prices},
+  author    = {Maciejowska, Katarzyna and Uniejewski, Bartosz and Serafin, Tomasz},
+  journal   = {Energies},
+  volume    = {13},
+  number    = {14},
+  pages     = {3530},
+  year      = {2020},
+  publisher = {Multidisciplinary Digital Publishing Institute}
+}
+@article{mhammedi2019lipschitz,
+  title   = {Lipschitz adaptivity with multiple learning rates in online learning},
+  author  = {Mhammedi, Zakaria and Koolen, Wouter M and Van Erven, Tim},
+  journal = {arXiv preprint arXiv:1902.10797},
+  year    = {2019}
+}
+@article{nowotarski2018recent,
+  title     = {Recent advances in electricity price forecasting: A review of probabilistic forecasting},
+  author    = {Nowotarski, Jakub and Weron, Rafa{\l}},
+  journal   = {Renewable and Sustainable Energy Reviews},
+  volume    = {81},
+  pages     = {1548--1568},
+  year      = {2018},
+  publisher = {Elsevier}
+}
+@article{opschoor2017combining,
+  title     = {Combining density forecasts using focused scoring rules},
+  author    = {Opschoor, Anne and Van Dijk, Dick and van der Wel, Michel},
+  journal   = {Journal of Applied Econometrics},
+  volume    = {32},
+  number    = {7},
+  pages     = {1298--1313},
+  year      = {2017},
+  publisher = {Wiley Online Library}
+}
+@article{petropoulos2020forecasting,
+  title   = {Forecasting: theory and practice},
+  author  = {Petropoulos, Fotios and Apiletti, Daniele and Assimakopoulos, Vassilios and Babai, Mohamed Zied and Barrow, Devon K and Bergmeir, Christoph and Bessa, Ricardo J and Boylan, John E and Browell, Jethro and Carnevale, Claudio and others},
+  journal = {arXiv preprint arXiv:2012.03854},
+  year    = {2020}
+}
+@article{raftery2005using,
+  title   = {Using Bayesian model averaging to calibrate forecast ensembles},
+  author  = {Raftery, Adrian E and Gneiting, Tilmann and Balabdaoui, Fadoua and Polakowski, Michael},
+  journal = {Monthly weather review},
+  volume  = {133},
+  number  = {5},
+  pages   = {1155--1174},
+  year    = {2005}
+}
+@article{segnon2017modeling,
+  title     = {Modeling and forecasting the volatility of carbon dioxide emission allowance prices: A review and comparison of modern volatility models},
+  author    = {Segnon, Mawuli and Lux, Thomas and Gupta, Rangan},
+  journal   = {Renewable and Sustainable Energy Reviews},
+  volume    = {69},
+  pages     = {692--704},
+  year      = {2017},
+  publisher = {Elsevier}
+}
+@article{thorey2017online,
+  title     = {Online learning with the Continuous Ranked Probability Score for ensemble forecasting},
+  author    = {Thorey, Jean and Mallet, Vivien and Baudin, Paul},
+  journal   = {Quarterly Journal of the Royal Meteorological Society},
+  volume    = {143},
+  number    = {702},
+  pages     = {521--529},
+  year      = {2017},
+  publisher = {Wiley Online Library}
+}
+@article{thorey2018ensemble,
+  title     = {Ensemble forecast of photovoltaic power with online CRPS learning},
+  author    = {Thorey, Jean and Chaussin, Christophe and Mallet, Vivien},
+  journal   = {International Journal of Forecasting},
+  volume    = {34},
+  number    = {4},
+  pages     = {762--773},
+  year      = {2018},
+  publisher = {Elsevier}
+}
+@article{tu2011markowitz,
+  title     = {Markowitz meets Talmud: A combination of sophisticated and naive diversification strategies},
+  author    = {Tu, Jun and Zhou, Guofu},
+  journal   = {Journal of Financial Economics},
+  volume    = {99},
+  number    = {1},
+  pages     = {204--215},
+  year      = {2011},
+  publisher = {Elsevier}
+}
+@article{v2020online,
+  title     = {Online Aggregation of Probabilistic Forecasts Based on the Continuous Ranked Probability Score},
+  author    = {V’yugin, VV and Trunov, VG},
+  journal   = {Journal of Communications Technology and Electronics},
+  volume    = {65},
+  number    = {6},
+  pages     = {662--676},
+  year      = {2020},
+  publisher = {Springer}
+}
+@article{van2018probabilistic,
+  title     = {Probabilistic forecasting of solar power, electricity consumption and net load: Investigating the effect of seasons, aggregation and penetration on prediction intervals},
+  author    = {Van der Meer, DW and Munkhammar, Joakim and Wid{\'e}n, Joakim},
+  journal   = {Solar Energy},
+  volume    = {171},
+  pages     = {397--413},
+  year      = {2018},
+  publisher = {Elsevier}
+}
+@article{vovk1990aggregating,
+  title   = {Aggregating strategies},
+  author  = {Vovk, Volodimir G},
+  journal = {Proc. of Computational Learning Theory, 1990},
+  year    = {1990}
+}
+@book{wahba1990spline,
+  title     = {Spline models for observational data},
+  author    = {Wahba, Grace},
+  year      = {1990},
+  publisher = {SIAM}
+}
+@book{wang2011smoothing,
+  title     = {Smoothing splines: methods and applications},
+  author    = {Wang, Yuedong},
+  year      = {2011},
+  publisher = {CRC Press}
+}
+@article{wang2019jackknife,
+  title   = {Jackknife Model Averaging for Composite Quantile Regression},
+  author  = {Wang, Miaomiao and Zou, Guohua},
+  journal = {arXiv preprint arXiv:1910.12209},
+  year    = {2019}
+}
+@article{wintenberger2017optimal,
+  title     = {Optimal learning with Bernstein online aggregation},
+  author    = {Wintenberger, Olivier},
+  journal   = {Machine Learning},
+  volume    = {106},
+  number    = {1},
+  pages     = {119--141},
+  year      = {2017},
+  publisher = {Springer}
+}
+@article{zamo2020sequential,
+  title   = {Sequential Aggregation of Probabilistic Forecasts--Applicaton to Wind Speed Ensemble Forecasts},
+  author  = {Zamo, Micha{\"e}l and Bel, Liliane and Mestre, Olivier},
+  journal = {arXiv preprint arXiv:2005.03540},
+  year    = {2020}
+}
+@article{zhang2020load,
+  title     = {Load probability density forecasting by transforming and combining quantile forecasts},
+  author    = {Zhang, Shu and Wang, Yi and Zhang, Yutian and Wang, Dan and Zhang, Ning},
+  journal   = {Applied Energy},
+  volume    = {277},
+  pages     = {115600},
+  year      = {2020},
+  publisher = {Elsevier}
+}
\ No newline at end of file
diff --git a/25_07_phd_defense/index.qmd b/25_07_phd_defense/index.qmd
index c6eb21b..bab3782 100644
--- a/25_07_phd_defense/index.qmd
+++ b/25_07_phd_defense/index.qmd
@@ -47,650 +47,694 @@ $$
 
 :::
 
-## EfeMOD
+## PHD DeFence
 
-**Empirisch fundierte Elektrizitätsmarkt-Modellierung mit Open Data**
+```{r, setup, include=FALSE}
+# Compile with: rmarkdown::render("crps_learning.Rmd")
+library(latex2exp)
+library(ggplot2)
+library(dplyr)
+library(tidyr)
+library(purrr)
+library(kableExtra)
+knitr::opts_chunk$set(
+  dev = "svglite" # Use svg figures
+)
+library(RefManageR)
+BibOptions(
+  check.entries = TRUE,
+  bib.style = "authoryear",
+  cite.style = "authoryear",
+  style = "html",
+  hyperlink = TRUE,
+  dashed = FALSE
+)
+my_bib <- ReadBib("assets/library.bib", check = FALSE)
+col_lightgray <- "#e7e7e7"
+col_blue <- "#000088"
+col_smooth_expost <- "#a7008b"
+col_smooth <- "#187a00"
+col_pointwise <- "#008790"
+col_constant <- "#dd9002"
+col_optimum <- "#666666"
+```
+
+```{r xaringan-panelset, echo=FALSE}
+xaringanExtra::use_panelset()
+```
+
+```{r xaringanExtra-freezeframe, echo=FALSE}
+xaringanExtra::use_freezeframe(responsive = TRUE)
+```
+
+# Outline
+
+- [Motivation](#motivation)
+- [The Framework of Prediction under Expert Advice](#pred_under_exp_advice)
+- [The Continious Ranked Probability Scrore](#crps)
+- [Optimality of (Pointwise) CRPS-Learning](#crps_optim)
+- [A Simple Probabilistic Example](#simple_example)
+- [The Proposed CRPS-Learning Algorithm](#proposed_algorithm)
+- [Simulation Results](#simulation)
+- [Possible Extensions](#extensions)
+- [Application Study](#application)
+- [Wrap-Up](#conclusion)
+- [References](#references)
+
+---
+
+class: center, middle, sydney-blue
+
+# Motivation
+
+name: motivation
+
+## Motivation
 
 :::: {.columns}
 
-::: {.column width="65%"}
+::: {.column width="48%"}
 
-[{{< fa users-gear >}}]{style="color: #404040;"} **Project Entities:**
+The Idea:
 
-Chair of Prof. Dr. Christoph Weber (Management Sciences and Energy Economics)
+- Combine multiple forecasts instead of choosing one
 
-Chair of Prof. Dr. Florian Ziel (Data Science in Energy and Environment)
+- Combination weights may vary over **time**, over the **distribution** or **both**
 
-[{{< fa bullseye >}}]{style="color: #404040;"} &ensp; **Project Goal:**
-
-Use publicly available data (particularly ENTSO-E Transparency Platform) to estimate parameters for energy system and energy market models.
-
-:::
-
-::: {.column width="5%"}
-<!-- empty column to create gap -->
-:::
-
-::: {.column width="30%"}
-![](figures/BMWK.webp)
-
-:::
-
-::::
-
-## EfeMOD
-
-![](figures/power_plant_list.jpg)
-
-## Motivation and Objective
-
-**Identification of Power Plant Operation States Using Clustering**
-
-[{{< fa earth-europe >}}]{style="color: #404040;"} Gain Knowledge about the Power Plant Characteristics
-
-- Operation Points,
-- Efficiency
-- Capacity, etc.
-
-[{{< fa display >}}]{style="color: #404040;"} This Presentation: 
-
-Identify Operation States:
-
-- Stable Operation
-- Startup
-- Minimum-Stable Operation, etc.
-
-Provide these characteristics to other researchers
-
-[{{< fa right-long >}}]{style="color: #404040;"} e.g. to estimate efficiency
-
-## Data
-
-[{{< fa database >}}]{style="color:#404040;"} Entsoe Data: 
-
-- ActualGenerationOutputPerGenerationUnit_16.1.A
-- UnavailabilityOfGenerationUnits_15.1.A_B
-
-[{{< fa fire-flame-simple >}}]{style="color:rgb(0, 200, 255);"} We focus on natural gas units:
-
-- 63 units in `DE_LU` bidding zone
-- 299 units across all bidding zones
-
-[{{< fa calendar-days >}}]{style="color:#404040;"} We use recent data:
-
-- 2020-01-01 until "now"
-
-## Data
-
-![](figures/Block%20AGuD/0_data1.jpg)
-
-## Data
-
-![](figures/Block%20AGuD/0_data2.jpg)
-
-## Data
-
-
-::: {.panel-tabset}
-
-## Lausward
-
-:::: {.columns}
-
-::: {.column width="42%"}
-
-**Heizkraftwerk Lausward **
-
-Location: Düsseldorf
-
-Block Anton (*Block AGuD*)
-
-Combined cycle gas turbine (CCGT)
-
-Electrical output: 103 MW [{{< fa bolt >}}]{style="color: #ffc400;"}
-
-75 MW of district heating can be decoupled
-
-Efficiency: 54%
-
-Fuel Utilization Rate: 87% (with district heating)
-
-:::
-
-::: {.column width="3%"}
-<!-- empty column to create gap -->
-:::
-
-::: {.column width="55%"}
-
-![](figures/Block%20AGuD/0_data3.jpg)
-
-:::
-
-::::
-
-## Emsland
-
-:::: {.columns}
-
-::: {.column width="42%"}
-
-**Erdgaskraftwerk Emsland**
-
-Location: Lingen (Ems)
-
-*Block C*
-
-Combined cycle gas turbine (CCGT)
-
-Electrical output: 475 MW [{{< fa bolt >}}]{style="color: #ffc400;"}
-
-Efficiency: 46%
-
-Black start enabled.
-
-:::
-
-::: {.column width="3%"}
-<!-- empty column to create gap -->
-:::
-
-::: {.column width="55%"}
-
-![](figures/Emsland%20C/0_data3.jpg)
-
-:::
-
-::::
-
-:::
-
-
-
-## Empirical Approach
-
-:::: {.columns}
-
-::: {.column width="42%"}
-
-### Overview
-
-Empirical identification of states
-
-3-Step Approach:
-
-- Prior Partitioning
-  - We create preliminary clusters
-  - They will be used to initialize the main clustering
-- Main Clustering
-  - Gaussian Model Based Clustering
-- Label Assignment 
-  - We assign meaningful labels to the final clusters
-
-:::
-
-::: {.column width="3%"}
-<!-- empty column to create gap -->
-:::
-
-::: {.column width="55%"}
-
-![](figures/Block%20AGuD/0_data3.jpg)
-
-:::
-
-::::
-
-:::
-
-## Empirical Approach
-
-:::: {.columns}
-
-::: {.column width="42%"}
-
-### Prior Partitioning
-
-[{{< fa arrow-up-right-dots >}}]{style="color: #202020FF;"} Divide the space in meaningful partitions: 
-
-Define the Capacity: $\zeta = max(t0)$
-
-Define a threshold: $\gamma = \frac{\zeta}{50}$
-
-[{{< fa circle >}}]{style="color: #2D7D32FF;"} $\pm \gamma$ around the diagonal: Stable <br>
-[{{< fa circle >}}]{style="color: #202020FF;"} $t0 < 1$ & $t1 < 1$: Zero <br>
-[{{< fa circle >}}]{style="color: #FA8C00FF;"} $t0 < \gamma$ & $t1 > 1$: Startup <br>
-[{{< fa circle >}}]{style="color: #D81A5FFF;"} $t0 > 1$ & $t1 < \gamma$: Shutdown <br>
-[{{< fa circle >}}]{style="color: #FDD834FF;"} $t1 > t0$: Ramp-Up <br>
-[{{< fa circle >}}]{style="color: #8D24AAFF;"} $t1 < t0$: Ramp-Down
-
-We project <b style="color: #2D7D32FF;">Stable</b> observations onto the diagonal, <font style = "opacity: 0.4;"> <b style="color: #FA8C00FF;">Startup</b>  on $t1$ and <b style="color: #D81A5FFF;">Shutdown</b> on $t0$ for the next step. </font>
-
-:::
-
-::: {.column width="3%"}
-<!-- empty column to create gap -->
-:::
-
-::: {.column width="55%"}
-
-![](figures/Block%20AGuD/1_pre-partition.jpg)
-
-:::
-
-::::
-
-:::
-
-## Empirical Approach
-
-:::: {.columns}
-
-::: {.column width="42%"}
-
-### Prior Partitioning
-
-Model-Based Clustering of the Regions using `mclust::Mclust` in `R`.
-
-- <b style="color: #2D7D32FF;">Stable</b>: 2-5 Clusters
-- <b style="color: #FDD834FF;">Ramp Up</b>: 2-4 Clusters
-- <b style="color: #8D24AAFF;">Ramp Down</b>: 2-4 Clusters
-
-[{{< fa lightbulb >}}]{style="color:rgb(255, 166, 0);"} Obtain finite mixture distribution:
-
-$$\sum_{k=1}^{G}{\pi_k f_k (\mathbf{x}; \mathbf{\theta}_k)}$$
-
-$f_k$ Density of k's component<br>
-$\pi_k$ Mixture weights<br>
-$\theta_k$ parameters of k's density component
-
-
-:::
-
-::: {.column width="3%"}
-<!-- empty column to create gap -->
-:::
-
-::: {.column width="55%"}
-
-![](figures/Block%20AGuD/1_pre-partition.jpg)
-
-:::
-
-::::
-
-## Empirical Approach
-
-### Prior Partitioning
-:::: {.columns}
-
-::: {.column width="49%"}
-
-$$f(\mathbf{x}; \mathbf{\Psi}) = \sum_{k=1}^{G}{\pi_k \phi (\mathbf{x}; \mathbf{\mu}_k; \mathbf{\Sigma}_k)}$$
-
-$\phi(\cdot)$ Multivariate Gaussian density<br>
-
-Maximum Likelihood Estimation via Expectation Maximization (EM) algorithm
-
-Likelihood for Gaussian Mixture Models (GMMs):
-
-\begin{align}
-  \ell(\Psi) = \sum_{i=1}^n \log \left\{ \sum_{k=1}^G \pi_k \phi(x_i; \mu_k, \Sigma_k) \right\}
-\end{align}
-
-[{{< fa retweet >}}]{style="color: #404040;"} We Re-Formulate this likelihood to a complete-data likelihood to utilize the EM algorithm
+2 Popular options for combining distributions:
 
+- Combining across quantiles (this paper)
+  - Horizontal aggregation, vincentization
+- Combining across probabilities
+  - Vertical aggregation
 
 :::
 
 ::: {.column width="2%"}
-<!-- empty column to create gap -->
-:::
 
-::: {.column width="49%"}
-
-\begin{align}
-  \ell_{\mathcal{C}}(\Psi) = \sum_{i=1}^n \sum_{k=1}^G z_{ik} \left\{ \log \pi_k + \log \phi(x_i; \mu_k, \Sigma_k) \right\}
-\end{align}
-
-\begin{align}
-  z_{ik} = 
-  \begin{cases} 
-  1 & \text{if } x_i \text{ belongs to component }k \\
-  0 & \text{otherwise.}
-  \end{cases}
-\end{align}
-
-E-Step:
-
-\begin{align}
-  \hat{z}_{ik} = \frac{\hat{\pi}_k \phi(x_i; \hat{\mu}_k, \hat{\Sigma}_k)}{\sum_{g=1}^{G} \hat{\pi}_g \phi(x_i; \hat{\mu}_g, \hat{\Sigma}_g)},
-\end{align}
-
-M-Step:
-
-\begin{align}
-\quad \hat{\mu}_k = \frac{\sum_{i=1}^{n} \hat{z}_{ik} x_i}{n_k}, \quad \text{where} \quad n_k = \sum_{i=1}^{n} \hat{z}_{ik}.
-\end{align}
-
-
-:::
-
-::::
-
-
-
-::: {.notes}
-
-- log-likelihood in (2.2) is hard to maximize directly
--  even numerically 
-
-- As a consequence, mixture models are usually fitted by reformulating the mixture
-problem as an incomplete-data problem within the EM framework.
-
-General EM Steps:
-
-- Init
-- Estimate latent component memberships
-- M-Step obtain the updated parameter estimates
-- Check convergence criteria
-
-::: 
-
-## Empirical Approach
-
-:::: {.columns}
-
-::: {.column width="42%"}
-
-### Prior Partitioning
-
-**Initialization**
-
-We initialize the EM algorithm (E-Step) using the partitions
-obtained from model-based agglomerative hierarchical clustering (MBAHC)
-
-**Estimation**
-
-The Bayesian information criterion (BIC) is used for model selection
-
-**Prior Partitioning Results**
-
-Right graph shows prior clusters.
-
-:::
-
-::: {.column width="3%"}
-<!-- empty column to create gap -->
-:::
-
-::: {.column width="55%"}
-
-::: {.panel-tabset}
-
-## Lausward
-
-![](figures/Block%20AGuD/2_partition.jpg)
-
-## Emsland
-
-![](figures/Emsland%20C/2_partition.jpg)
-
-:::
-
-:::
-
-::::
-
-::: {.notes}
-
-recursively merging the two clusters that yield the maximum
-likelihood of a probability model over all possible merges
-
-:::
-
-## Empirical Approach
-
-:::: {.columns}
-
-::: {.column width="42%"}
-
-### Main Clustering 
-
-**MBAHC**
-
-Prior Clusters are used in MBAHC
-
-The results of the MBAHC are used to initialize the EM Algorithm in the main Gaussian Model Based Clustering
-
-**Main Clustering Results**
-
-Right graph shows *Maximum A Posteriori (MAP) Classification*
-
-Colour indicates cumulated log(density) of all components.
-
-:::
-
-::: {.column width="3%"}
-<!-- empty column to create gap -->
-:::
-
-::: {.column width="55%"}
-
-::: {.panel-tabset}
-
-## Lausward
-
-![](figures/Block%20AGuD/3_cluster.jpg)
-
-## Emsland
-
-![](figures/Emsland%20C/3_cluster.jpg)
-
-:::
-
-:::
-
-::::
-
-::: {.notes}
-
-recursively merging the two clusters that yield the maximum
-likelihood of a probability model over all possible merges
-
-:::
-
-
-
-## Empirical Approach
-
-### Label Assignment
-
-:::: {.columns}
-
-::: {.column width="48%"}
-
-We assign labels to the clusters using their mean $\mu$ and correlation $\rho$
-
-Multiple clusters may describe one Generation State (e.g., along the diagonal)
-
-:::
-
-::: {.column width="4%"}
-<!-- empty column to create gap -->
 :::
 
 ::: {.column width="48%"}
 
-```{r}
-library(dplyr)
-load("figures/Block AGuD/clusters.RDS")
-clusters %>%
-  select(classification, mu_t0, mu_t1, cor) %>%
-  head()
+::: {.panel-tabset}
+
+## Time
+
+```{r, echo = FALSE, fig.height=6}
+par(mfrow = c(3, 3), mar = c(2, 2, 2, 2))
+set.seed(1)
+# Data
+X <- matrix(ncol = 3, nrow = 15)
+X[, 1] <- seq(from = 8, to = 12, length.out = 15) + 0.25 * rnorm(15)
+X[, 2] <- 10 + 0.25 * rnorm(15)
+X[, 3] <- seq(from = 12, to = 8, length.out = 15) + 0.25 * rnorm(15)
+# Weights
+w <- matrix(ncol = 3, nrow = 15)
+w[, 1] <- sin(0.1 * 1:15)
+w[, 2] <- cos(0.1 * 1:15)
+w[, 3] <- seq(from = -2, 0.25, length.out = 15)^2
+w <- (w / rowSums(w))
+# Vis
+plot(X[, 1],
+  lwd = 4,
+  type = "l",
+  ylim = c(8, 12),
+  xlab = "",
+  ylab = "",
+  xaxt = "n",
+  yaxt = "n",
+  bty = "n",
+  col = "#2050f0"
+)
+plot(w[, 1],
+  lwd = 4, type = "l",
+  ylim = c(0, 1),
+  xlab = "",
+  ylab = "", xaxt = "n", yaxt = "n", bty = "n", col = "#2050f0"
+)
+text(6, 0.5, TeX("$w_1(t)$"), cex = 2, col = "#2050f0")
+arrows(13, 0.25, 15, 0.0, , lwd = 4, bty = "n")
+plot.new()
+plot(X[, 2],
+  lwd = 4,
+  type = "l", ylim = c(8, 12),
+  xlab = "", ylab = "", xaxt = "n", yaxt = "n", bty = "n", col = "purple"
+)
+plot(w[, 2],
+  lwd = 4, type = "l",
+  ylim = c(0, 1),
+  xlab = "",
+  ylab = "", xaxt = "n", yaxt = "n", bty = "n", col = "purple"
+)
+text(6, 0.6, TeX("$w_2(t)$"), cex = 2, col = "purple")
+arrows(13, 0.5, 15, 0.5, , lwd = 4, bty = "n")
+plot(rowSums(X * w), lwd = 4, type = "l", xlab = "", ylab = "", xaxt = "n", yaxt = "n", bty = "n", col = "#298829")
+plot(X[, 3],
+  lwd = 4,
+  type = "l", ylim = c(8, 12),
+  xlab = "", ylab = "", xaxt = "n", yaxt = "n", bty = "n", col = "#e423b4"
+)
+plot(w[, 3],
+  lwd = 4, type = "l",
+  ylim = c(0, 1),
+  xlab = "",
+  ylab = "", xaxt = "n", yaxt = "n", bty = "n", col = "#e423b4"
+)
+text(6, 0.25, TeX("$w_3(t)$"), cex = 2, col = "#e423b4")
+arrows(13, 0.75, 15, 1, , lwd = 4, bty = "n")
+```
+
+## Distribution
+
+```{r, echo = FALSE, fig.height=6}
+par(mfrow = c(3, 3), mar = c(2, 2, 2, 2))
+set.seed(1)
+# Data
+X <- matrix(ncol = 3, nrow = 31)
+
+X[, 1] <- dchisq(0:30, df = 10)
+X[, 2] <- dnorm(0:30, mean = 15, sd = 5)
+X[, 3] <- dexp(0:30, 0.2)
+# Weights
+w <- matrix(ncol = 3, nrow = 31)
+w[, 1] <- sin(0.05 * 0:30)
+w[, 2] <- cos(0.05 * 0:30)
+w[, 3] <- seq(from = -2, 0.25, length.out = 31)^2
+w <- (w / rowSums(w))
+# Vis
+plot(X[, 1],
+  lwd = 4,
+  type = "l",
+  xlab = "",
+  ylab = "",
+  xaxt = "n",
+  yaxt = "n",
+  bty = "n",
+  col = "#2050f0"
+)
+plot(X[, 2],
+  lwd = 4,
+  type = "l",
+  xlab = "", ylab = "", xaxt = "n", yaxt = "n", bty = "n", col = "purple"
+)
+plot(X[, 3],
+  lwd = 4,
+  type = "l",
+  xlab = "", ylab = "", xaxt = "n", yaxt = "n", bty = "n", col = "#e423b4"
+)
+plot(w[, 1],
+  lwd = 4, type = "l",
+  ylim = c(0, 1),
+  xlab = "",
+  ylab = "", xaxt = "n", yaxt = "n", bty = "n", col = "#2050f0"
+)
+text(12, 0.5, TeX("$w_1(x)$"), cex = 2, col = "#2050f0")
+arrows(26, 0.25, 31, 0.0, , lwd = 4, bty = "n")
+plot(w[, 2],
+  lwd = 4, type = "l",
+  ylim = c(0, 1),
+  xlab = "",
+  ylab = "", xaxt = "n", yaxt = "n", bty = "n", col = "purple"
+)
+text(15, 0.5, TeX("$w_2(x)$"), cex = 2, col = "purple")
+arrows(15, 0.25, 15, 0, , lwd = 4, bty = "n")
+plot(w[, 3],
+  lwd = 4, type = "l",
+  ylim = c(0, 1),
+  xlab = "",
+  ylab = "", xaxt = "n", yaxt = "n", bty = "n", col = "#e423b4"
+)
+text(20, 0.5, TeX("$w_3(x)$"), cex = 2, col = "#e423b4")
+arrows(5, 0.25, 0, 0, , lwd = 4, bty = "n")
+plot.new()
+plot(rowSums(X * w), lwd = 4, type = "l", xlab = "", ylab = "", xaxt = "n", yaxt = "n", bty = "n", col = "#298829")
 ```
 
 :::
 
-::::
-
-\begin{align}
-\text{State} =
-  \begin{cases}
-    \color{#202020FF}{\text{Zero}} & (\mu_{t0} < 1) \land (\mu_{t1} < 1), \\
-    \text{MSO} & \left[ (\mu_{t0} > \zeta/10) \land (\mu_{t1} > \zeta / 10)  \land (\right| \mu_{t0} - \mu_{t1} \left| > \zeta / 10) \right]\\ &   \rightarrow \operatorname{argmin}(\mu_{t0} + \mu_{t1}), \\
-    \text{Max Capacity} & \rightarrow \operatorname{argmax}(\mu_{t0} + \mu_{t1}), \\
-    \text{Startup} & (\mu_{t1} \geq \zeta / 10) \land (\mu_{t0} < \gamma) \land (\rho < 0.3), \\
-    \text{Shutdown} & (\mu_{t0} \geq \zeta / 10) \land (\mu_{t1} < \gamma) \land (\rho < 0.3), \\
-    \text{Stable Operation} & \text{Remaining clusters with cor} > 0.8, \\
-    \text{Ramp Up} & \text{Remaining clusters: } \mu_{t1} > \mu_{t0}, \\
-    \text{Ramp Down} & \text{Remaining clusters: } \mu_{t1} < \mu_{t0}.
-  \end{cases}
-\end{align}
-
-::: {.notes}
-
-recursively merging the two clusters that yield the maximum
-likelihood of a probability model over all possible merges
-
 :::
 
-## Empirical Approach
+::::
+
+# The Framework of Prediction under Expert Advice
+
+## The Framework of Prediction under Expert Advice
+
+### The sequential framework
 
 :::: {.columns}
 
-::: {.column width="39%"}
+::: {.column width="48%"}
 
-### Label Assignment
-
-Right graphs show *assigned states*
-
-The points are coloured according to
-
-- MAP
-- Probability (each pure colour reflects a probability of 1)
-
-Some points below /above the diagonal are assigned to Ramp Up / Ramp Down
-
-- Can be easily fixed for MAP
-- Fixing probabilistic predictions not that easy
+Each day, $t = 1, 2, ... T$
+- The **forecaster** receives predictions $\widehat{X}_{t,k}$ from $K$ **experts**
+- The **forecaster** assings weights $w_{t,k}$ to each **expert**
+- The **forecaster** calculates her prediction:
+\begin{equation}
+    \widetilde{X}_{t} = \sum_{k=1}^K w_{t,k} \widehat{X}_{t,k}.
+    \label{eq_forecast_def}
+\end{equation}
+- The realization for $t$ is observedilities
+  - Vertical aggregation
 
 :::
 
 ::: {.column width="2%"}
-<!-- empty column to create gap -->
-:::
-
-::: {.column width="59%"}
-
-::: {.panel-tabset}
-
-## LSW
-
-![](figures/Block%20AGuD/4_assignments.jpg)
-
-## LSW Pr
-
-![](figures/Block%20AGuD/4_assignments_prob.jpg)
-
-## LSW Pr
-
-![](figures/Block%20AGuD/4_probability.jpg)
-
-## EMS
-
-![](figures/Emsland%20C/4_assignments.jpg)
-
-## EMS Pr
-
-![](figures/Emsland%20C/4_assignments_prob.jpg)
-
-## EMS Pr
-
-![](figures/Emsland%20C/4_probability.jpg)
 
 :::
 
+::: {.column width="48%"}
+
+- The experts can be institutions, persons, or models
+- The forecasts can be point-forecasts (i.e., mean or median) or full predictive distributions
+- We do not need any assumptions concerning the underlying data
+- `r Citet(my_bib, "cesa2006prediction")`
+
 :::
 
 ::::
 
-::: {.notes}
+---
 
-recursively merging the two clusters that yield the maximum
-likelihood of a probability model over all possible merges
+## The Regret
 
-:::
+Weights are updated sequentially according to the past performance of the $K$ experts.
+
+That is, a loss function $\ell$ is needed. This is used to compute the **cumulative regret** $R_{t,k}$
+
+\begin{equation}
+    R_{t,k}  = \widetilde{L}_{t} - \widehat{L}_{t,k} =  \sum_{i = 1}^t \ell(\widetilde{X}_{i},Y_i) - \ell(\widehat{X}_{i,k},Y_i)
+    \label{eq_regret}
+\end{equation}
+
+The cumulative regret:
+- Indicates the predictive accuracy of the expert $k$ until time $t$.
+- Measures how much the forecaster *regrets* not having followed the expert's advice
+
+Popular loss functions for point forecasting `r Citet(my_bib, "gneiting2011making")`:
+.pull-left[
+- $\ell_2$-loss $\ell_2(x, y) = | x -y|^2$
+  - optimal for mean prediction 
+]
+.pull-right[
+- $\ell_1$-loss $\ell_1(x, y) = | x -y|$ 
+  - optimal for median predictions 
+]
 
-## Empirical Approach
 
 :::: {.columns}
 
-::: {.column width="42%"}
+::: {.column width="48%"}
 
-### Label Assignment
-
-*Fixing assignments*
-
-Relabeling Ramp Up and Ramp Down MAP predictions is trivial:
-
-\begin{align}
-\text{State} =
-  \begin{cases}
-    \text{Ramp Up} & x_{t1} > x_{t0}, \\
-    \text{Ramp Down} & x_{t1} < x_{t0}.
-  \end{cases}
-\end{align}
-
-Fixing the probability array is more involved:
-
-Find observations $x_{t1} < x_{t0}$ that can not be "Ramp Up":
-
-Set probability of all Ramp Up clusters to $0$.
-
-Normalize the probabilities.
+- $\ell_2$-loss $\ell_2(x, y) = | x -y|^2$
+  - optimal for mean prediction 
 
 :::
 
-::: {.column width="3%"}
-<!-- empty column to create gap -->
-:::
-
-::: {.column width="55%"} 
-
-::: {.panel-tabset}
-
-
-## LSW Pr
-
-![](figures/Block%20AGuD/4_assignments_prob_fixed.jpg)
-
-## LSW Pr
-
-![](figures/Block%20AGuD/4_probability_fixed.jpg)
-
-## EMS Pr
-
-![](figures/Emsland%20C/4_assignments_prob_fixed.jpg)
-
-## EMS Pr
-
-![](figures/Emsland%20C/4_probability_fixed.jpg)
+::: {.column width="2%"}
 
 :::
 
+::: {.column width="48%"}
+
+- $\ell_1$-loss $\ell_1(x, y) = | x -y|$ 
+  - optimal for median predictions 
+
 :::
 
 ::::
 
-## Outlook
 
-<br>
-<br>
+## Popular Aggregation Algorithms
+
+#### The naive combination
+
+\begin{equation}
+    w_{t,k}^{\text{Naive}} = \frac{1}{K}
+\end{equation}
+
+#### The exponentially weighted average forecaster (EWA)
+
+\begin{align}
+    w_{t,k}^{\text{EWA}} & = \frac{e^{\eta R_{t,k}} }{\sum_{k = 1}^K e^{\eta R_{t,k}}}
+    =
+    \frac{e^{-\eta \ell(\widehat{X}_{t,k},Y_t)} w^{\text{EWA}}_{t-1,k} }{\sum_{k = 1}^K e^{-\eta \ell(\widehat{X}_{t,k},Y_t)} w^{\text{EWA}}_{t-1,k} }
+    \label{eq_ewa_general}
+\end{align}
+
+#### The polynomial weighted aggregation (PWA)
+
+\begin{align}
+    w_{t,k}^{\text{PWA}} & = \frac{ 2(R_{t,k})^{q-1}_{+} }{ \|(R_t)_{+}\|^{q-2}_q}
+    \label{eq_pwa_general}
+\end{align}
+
+with $q\geq 2$ and $x_{+}$ the (vector) of positive parts of $x$.
+
+## Optimality
+
+In stochastic settings, the cumulative Risk should be analyezed `r Citet(my_bib, "wintenberger2017optimal")`:
+
+\begin{align}
+    \underbrace{\widetilde{\mathcal{R}}_t = \sum_{i=1}^t \mathbb{E}[\ell(\widetilde{X}_{i},Y_i)|\mathcal{F}_{i-1}]}_{\text{Cumulative Risk of Forecaster}} \qquad\qquad\qquad \text{ and } \qquad\qquad\qquad
+    \underbrace{\widehat{\mathcal{R}}_{t,k} = \sum_{i=1}^t \mathbb{E}[\ell(\widehat{X}_{i,k},Y_i)|\mathcal{F}_{i-1}]}_{\text{Cumulative Risk of Experts}}
+    \label{eq_def_cumrisk}
+\end{align}
+
+There are two problems that an algorithm should solve in iid settings:
+
+:::: {.columns}
+
+::: {.column width="48%"}
+
+### The selection problem
+\begin{equation}
+    \frac{1}{t}\left(\widetilde{\mathcal{R}}_t - \widehat{\mathcal{R}}_{t,\min} \right) \stackrel{t\to \infty}{\rightarrow} a \quad \text{with} \quad a \leq 0.
+    \label{eq_opt_select}
+\end{equation}
+The forecaster is asymptotically not worse than the best expert $\widehat{\mathcal{R}}_{t,\min}$.
+
+:::
+
+::: {.column width="2%"}
+
+:::
+
+::: {.column width="48%"}
+
+### The convex aggregation problem
+
+\begin{equation}
+    \frac{1}{t}\left(\widetilde{\mathcal{R}}_t - \widehat{\mathcal{R}}_{t,\pi} \right) \stackrel{t\to \infty}{\rightarrow} b \quad \text{with} \quad b \leq 0 .
+    \label{eq_opt_conv}
+\end{equation}
+The forecaster is asymptotically not worse than the best convex combination $\widehat{X}_{t,\pi}$ in hindsight (**oracle**).
+
+:::
+
+::::
+
+## Optimality
+
+Satisfying the convexity property \eqref{eq_opt_conv} comes at the cost of slower possible convergence.
+
+According to `r Citet(my_bib, "wintenberger2017optimal")`, an algorithm has optimal rates with respect to selection \eqref{eq_opt_select} and convex aggregation \eqref{eq_opt_conv} if
+
+\begin{align}
+    \frac{1}{t}\left(\widetilde{\mathcal{R}}_t - \widehat{\mathcal{R}}_{t,\min} \right) & =
+    \mathcal{O}\left(\frac{\log(K)}{t}\right)\label{eq_optp_select}
+\end{align}
+
+\begin{align}
+    \frac{1}{t}\left(\widetilde{\mathcal{R}}_t - \widehat{\mathcal{R}}_{t,\pi} \right)  & =
+    \mathcal{O}\left(\sqrt{\frac{\log(K)}{t}}\right)
+    \label{eq_optp_conv}
+\end{align}
+
+Algorithms can statisfy both \eqref{eq_optp_select} and \eqref{eq_optp_conv} depending on:
+
+- The loss function
+- Regularity conditions on $Y_t$ and $\widehat{X}_{t,k}$
+- The weighting scheme
+
+## Optimality
+
+According to `r Citet(my_bib, "cesa2006prediction")` EWA \eqref{eq_ewa_general} satisfies the optimal selection convergence \eqref{eq_optp_select} in a deterministic setting if the:
+- Loss $\ell$ is exp-concave
+- Learning-rate $\eta$ is chosen correctly
+
+Those results can be converted to stochastic iid settings `r Citet(my_bib, "kakade2008generalization")` `r Citet(my_bib, "gaillard2014second")`.
+
+The optimal convex aggregation convergence \eqref{eq_optp_conv} can be satisfied by applying the kernel-trick. Thereby, the loss is linearized:
+\begin{align}
+\ell^{\nabla}(x,y) = \ell'(\widetilde{X},y) x
+\end{align}
+$\ell'$ is the subgradient of $\ell$ in its first coordinate evaluated at forecast combination $\widetilde{X}$.
+
+Combining probabilistic forecasts calls for a probabilistic loss function
+
+:::: {.notes}
+
+We apply Bernstein Online Aggregation (BOA). It lets us weaken the exp-concavity condition while almost keeping the optimalities \ref{eq_optp_select} and \ref{eq_optp_conv}.
+
+::::
+
+## The Continuous Ranked Probability Score
+
+:::: {.columns}
+
+::: {.column width="48%"}
+
+**An appropriate choice:**
+
+\begin{align*}
+    \text{CRPS}(F, y) & = \int_{\mathbb{R}} {(F(x) - \mathbb{1}\{ x > y \})}^2 dx
+    \label{eq_crps}
+\end{align*}
+
+It's strictly proper `r Citet(my_bib, "gneiting2007strictly")`.
+
+Using the CRPS, we can calculate time-adaptive weight $w_{t,k}$. However, what if the experts' performance is not uniform over all parts of the distribution? 
+
+The idea: utilize this relation:
+
+\begin{align*}
+    \text{CRPS}(F, y) = 2 \int_0^{1}  \text{QL}_p(F^{-1}(p), y) \, d p.
+    \label{eq_crps_qs}
+\end{align*}
+
+:::
+
+::: {.column width="2%"}
+
+:::
+
+::: {.column width="48%"}
+
+to combine quantiles of the probabilistic forecasts individually using the quantile-loss (QL):
+\begin{align*}
+    \text{QL}_p(q, y) & = (\mathbb{1}\{y < q\} -p)(q - y)
+\end{align*}
+
+</br>
+
+**But is it optimal?**
+
+CRPS is exp-concave `r fontawesome::fa("check", fill ="#00b02f")`
+
+`r fontawesome::fa("arrow-right", fill ="#000000")` EWA \eqref{eq_ewa_general} with CRPS satisfies \eqref{eq_optp_select} and \eqref{eq_optp_conv}
+
+QL is convex, but not exp-concave `r fontawesome::fa("exclamation", fill ="#ffa600")`
+
+`r fontawesome::fa("arrow-right", fill ="#000000")` Bernstein Online Aggregation (BOA) lets us weaken the exp-concavity condition while almost keeping optimal convergence
+
+:::
+
+::::
+
+## CRPS-Learning Optimality
+
+For convex losses, BOAG satisfies that there exist a $C>0$ such that for $x>0$ it holds that
+\begin{equation}
+    P\left( \frac{1}{t}\left(\widetilde{\mathcal{R}}_t - \widehat{\mathcal{R}}_{t,\pi} \right)  \leq C \log(\log(t)) \left(\sqrt{\frac{\log(K)}{t}} + \frac{\log(K)+x}{t}\right)  \right) \geq
+    1-e^{x}
+    \label{eq_boa_opt_conv}
+\end{equation}
+`r fontawesome::fa("arrow-right", fill ="#000000")` Almost optimal w.r.t *convex aggregation* \eqref{eq_optp_conv} `r Citet(my_bib, "wintenberger2017optimal")` .
+
+The same algorithm satisfies that there exist a $C>0$ such that for $x>0$ it holds that
+\begin{equation}
+    P\left( \frac{1}{t}\left(\widetilde{\mathcal{R}}_t - \widehat{\mathcal{R}}_{t,\min} \right) \leq
+    C\left(\frac{\log(K)+\log(\log(Gt))+ x}{\alpha t}\right)^{\frac{1}{2-\beta}} \right) \geq
+    1-e^{x}
+    \label{eq_boa_opt_select}
+\end{equation}
+
+if $Y_t$ is bounded, the considered loss $\ell$ is convex $G$-Lipschitz and weak exp-concave in its first coordinate. 
+
+This is for losses that satisfy **A1** and **A2**.
+
+## CRPS-Learning Optimality
+
+:::: {.columns}
+
+::: {.column width="48%"}
+
+**A1**
+
+For some $G>0$ it holds
+for all $x_1,x_2\in \mathbb{R}$ and $t>0$ that
+
+$$ | \ell(x_1, Y_t)-\ell(x_2, Y_t) | \leq G |x_1-x_2|$$
+
+**A2** For some $\alpha>0$, $\beta\in[0,1]$ it holds
+for all $x_1,x_2 \in \mathbb{R}$ and $t>0$ that
+
+\begin{align*}
+    \mathbb{E}[
+        & \ell(x_1, Y_t)-\ell(x_2, Y_t) | \mathcal{F}_{t-1}] \leq \\
+        & \mathbb{E}[ \ell'(x_1, Y_t)(x_1 -  x_2)  |\mathcal{F}_{t-1}] \\
+                              & +
+    \mathbb{E}\left[ \left. \left( \alpha(\ell'(x_1, Y_t)(x_1 -  x_2))^{2}\right)^{1/\beta}  \right|\mathcal{F}_{t-1}\right]
+\end{align*}
+
+`r fontawesome::fa("arrow-right", fill ="#000000")` Almost optimal w.r.t *selection* \eqref{eq_optp_select} `r Citet(my_bib, "gaillard2018efficient")`.
+
+:::
+
+::: {.column width="2%"}
+
+:::
+
+::: {.column width="48%"}
+
+**Lemma 1**
+
+\begin{align}
+    2\overline{\widehat{\mathcal{R}}}^{\text{QL}}_{t,\min}
+      & \leq \widehat{\mathcal{R}}^{\text{CRPS}}_{t,\min}
+    \label{eq_risk_ql_crps_expert}                        \\
+    2\overline{\widehat{\mathcal{R}}}^{\text{QL}}_{t,\pi}
+      & \leq \widehat{\mathcal{R}}^{\text{CRPS}}_{t,\pi} .
+    \label{eq_risk_ql_crps_convex}
+\end{align}
+
+Pointwise can outperform constant procedures
+
+QL is convex but not exp-concave: 
+
+`r fontawesome::fa("arrow-right")` Almost optimal convergence w.r.t. *convex aggregation* \eqref{eq_boa_opt_conv} `r fontawesome::fa("check", fill ="#00b02f")` </br>
+
+For almost optimal congerence w.r.t. *selection* \eqref{eq_boa_opt_select} we need to check **A1** and **A2**:
+
+QL is Lipschitz continuous:
+
+`r fontawesome::fa("arrow-right")` **A1** holds `r fontawesome::fa("check", fill ="#ffa600")` </br>
+
+:::
+
+::::
+
+
+## CRPS-Learning Optimality
+
+:::: {.columns}
+
+::: {.column width="48%"}
+
+Conditional quantile risk: $\mathcal{Q}_p(x) = \mathbb{E}[ \text{QL}_p(x, Y_t) | \mathcal{F}_{t-1}]$.
+
+`r fontawesome::fa("arrow-right")` convexity properties of $\mathcal{Q}_p$ depend on the
+conditional distribution $Y_t|\mathcal{F}_{t-1}$.
+
+**Proposition 1**
+
+Let $Y$ be a univariate random variable with (Radon-Nikodym) $\nu$-density $f$, then for the second subderivative of the quantile risk
+$\mathcal{Q}_p(x) = \mathbb{E}[ \text{QL}_p(x, Y) ]$
+of $Y$ it holds for all $p\in(0,1)$ that
+$\mathcal{Q}_p'' = f.$
+Additionally, if $f$ is a continuous Lebesgue-density with $f\geq\gamma>0$ for some constant $\gamma>0$ on its support $\text{spt}(f)$ then
+is $\mathcal{Q}_p$ is $\gamma$-strongly convex.
+
+Strong convexity with $\beta=1$ implies **A2** `r fontawesome::fa("check", fill ="#ffa600")` `r Citet(my_bib, "gaillard2018efficient")`
+
+:::
+
+::: {.column width="2%"}
+
+:::
+
+::: {.column width="48%"}
+
+`r fontawesome::fa("arrow-right")` **A1** and **A2** give us almost optimal convergence w.r.t. selection \eqref{eq_boa_opt_select} `r fontawesome::fa("check", fill ="#00b02f")` </br>
+
+**Theorem 1**
+
+The gradient based fully adaptive Bernstein online aggregation (BOAG) applied pointwise for all $p\in(0,1)$ on $\text{QL}$ satisfies
+\eqref{eq_boa_opt_conv} with  minimal CRPS given by
+
+$$\widehat{\mathcal{R}}_{t,\pi} = 2\overline{\widehat{\mathcal{R}}}^{\text{QL}}_{t,\pi}.$$
+
+If $Y_t|\mathcal{F}_{t-1}$ is bounded 
+and has a pdf $f_t$ satifying $f_t>\gamma >0$ on its 
+support $\text{spt}(f_t)$ then \ref{eq_boa_opt_select} holds with $\beta=1$ and
+
+$$\widehat{\mathcal{R}}_{t,\min} = 2\overline{\widehat{\mathcal{R}}}^{\text{QL}}_{t,\min}$$.
+
+:::
+
+::::
+
+
+## A Probabilistic Example
+
+
+
+:::: {.columns}
+
+::: {.column width="48%"}
+
+Simple Example:
+
+
+\begin{align}
+    Y_t               & \sim \mathcal{N}(0,\,1)                     \\
+    \widehat{X}_{t,1} & \sim \widehat{F}_{1}  = \mathcal{N}(-1,\,1) \\
+    \widehat{X}_{t,2} & \sim \widehat{F}_{2}  = \mathcal{N}(3,\,4)
+    \label{eq:dgp_sim1}
+\end{align}
+
+- True weights vary over $p$
+- Figures show the ECDF and calculated weights using $T=25$ realizations
+- Pointwise solution creates rough estimates
+- Pointwise is better than constant
+- Smooth solution is better than pointwise
+
+:::
+
+::: {.column width="2%"}
+
+:::
+
+::: {.column width="48%"}
+
+foo
+
+:::
+
+::::
+
+
+
+
+
+
+## Columns Template
+
+:::: {.columns}
+
+::: {.column width="48%"}
+
+Baz
+
+:::
+
+::: {.column width="2%"}
+
+:::
+
+::: {.column width="48%"}
+
+foo
+
+:::
+
+::::
+
+
+# References
+
+```{r refs1, echo=FALSE, results="asis"}
+PrintBibliography(my_bib, .opts = list(style = "text"))
+```
 
-- The approach works in general
-- Conceptually simple
-- Label assignment needs some more work
-- Probabilistic statements may need adjustments for Ramp-Up Ramp-Down predictions
-- Some kind of validation would be desirable
-- Results will be used party on another research project in the EFEMOD project