From d91e8ea505ca68302b705f1f0e64d95763c5da86 Mon Sep 17 00:00:00 2001 From: Jonathan Berrisch Date: Sat, 17 May 2025 11:04:30 +0200 Subject: [PATCH] Update main slides, add first half of CRPS learning slides --- 25_07_phd_defense/assets/library.bib | 539 ++++++++++++ 25_07_phd_defense/index.qmd | 1222 +++++++++++++------------- 2 files changed, 1172 insertions(+), 589 deletions(-) create mode 100644 25_07_phd_defense/assets/library.bib diff --git a/25_07_phd_defense/assets/library.bib b/25_07_phd_defense/assets/library.bib new file mode 100644 index 0000000..98491d1 --- /dev/null +++ b/25_07_phd_defense/assets/library.bib @@ -0,0 +1,539 @@ +@article{aastveit2014nowcasting, + title = {Nowcasting GDP in real time: A density combination approach}, + author = {Aastveit, Knut Are and Gerdrup, Karsten R and Jore, Anne Sofie and Thorsrud, Leif Anders}, + journal = {Journal of Business \& Economic Statistics}, + volume = {32}, + number = {1}, + pages = {48--68}, + year = {2014}, + publisher = {Taylor \& Francis} +} +@incollection{aastveit2019evolution, + title = {The Evolution of Forecast Density Combinations in Economics}, + author = {Aastveit, Knut Are and Mitchell, James and Ravazzolo, Francesco and van Dijk, Herman K}, + booktitle = {Oxford Research Encyclopedia of Economics and Finance}, + year = {2019} +} +@article{atiya2020does, + title = {Why does forecast combination work so well?}, + author = {Atiya, Amir F}, + journal = {International Journal of Forecasting}, + volume = {36}, + number = {1}, + pages = {197--200}, + year = {2020}, + publisher = {Elsevier} +} +@article{atsalakis2016using, + title = {Using computational intelligence to forecast carbon prices}, + author = {Atsalakis, George S}, + journal = {Applied Soft Computing}, + volume = {43}, + pages = {107--116}, + year = {2016}, + publisher = {Elsevier} +} +@article{bai2020does, + title = {Does crude oil futures price really help to predict spot oil price? New evidence from density forecasting}, + author = {Bai, Lan and Li, Xiafei and Wei, Yu and Wei, Guiwu}, + journal = {International Journal of Finance \& Economics}, + year = {2020}, + publisher = {Wiley Online Library} +} +@article{benz2009modeling, + title = {Modeling the price dynamics of CO2 emission allowances}, + author = {Benz, Eva and Tr{\"u}ck, Stefan}, + journal = {Energy Economics}, + volume = {31}, + number = {1}, + pages = {4--15}, + year = {2009}, + publisher = {Elsevier} +} +@article{biau2011sequential, + title = {Sequential quantile prediction of time series}, + author = {Biau, G{\'e}rard and Patra, Beno{\^\i}t}, + journal = {IEEE Transactions on Information Theory}, + volume = {57}, + number = {3}, + pages = {1664--1674}, + year = {2011}, + publisher = {IEEE} +} +@inproceedings{bousquet2001tracking, + title = {Tracking a small set of experts by mixing past posteriors}, + author = {Bousquet, Olivier and Warmuth, Manfred K}, + booktitle = {International Conference on Computational Learning Theory}, + pages = {31--47}, + year = {2001}, + organization = {Springer} +} +@article{bregere2020online, + title = {Online hierarchical forecasting for power consumption data}, + author = {Br{\'e}g{\`e}re, Margaux and Huard, Malo}, + journal = {arXiv preprint arXiv:2003.00585}, + year = {2020} +} +@article{busetti2017quantile, + title = {Quantile aggregation of density forecasts}, + author = {Busetti, Fabio}, + journal = {Oxford Bulletin of Economics and Statistics}, + volume = {79}, + number = {4}, + pages = {495--512}, + year = {2017}, + publisher = {Wiley Online Library} +} +@book{cesa2006prediction, + title = {Prediction, learning, and games}, + author = {Cesa-Bianchi, Nicolo and Lugosi, G{\'a}bor}, + year = {2006}, + publisher = {Cambridge university press} +} +@article{cesa2012mirror, + title = {Mirror descent meets fixed share (and feels no regret)}, + author = {Cesa-Bianchi, Nicolo and Gaillard, Pierre and Lugosi, G{\'a}bor and Stoltz, Gilles}, + journal = {Advances in Neural Information Processing Systems}, + volume = {25}, + pages = {980--988}, + year = {2012} +} +@article{cheng2015forecasting, + title = {Forecasting with factor-augmented regression: A frequentist model averaging approach}, + author = {Cheng, Xu and Hansen, Bruce E}, + journal = {Journal of Econometrics}, + volume = {186}, + number = {2}, + pages = {280--293}, + year = {2015}, + publisher = {Elsevier} +} +@article{chernozhukov2010quantile, + title = {Quantile and probability curves without crossing}, + author = {Chernozhukov, Victor and Fern{\'a}ndez-Val, Iv{\'a}n and Galichon, Alfred}, + journal = {Econometrica}, + volume = {78}, + number = {3}, + pages = {1093--1125}, + year = {2010}, + publisher = {Wiley Online Library} +} +@article{devaine2013forecasting, + title = {Forecasting electricity consumption by aggregating specialized experts}, + author = {Devaine, Marie and Gaillard, Pierre and Goude, Yannig and Stoltz, Gilles}, + journal = {Machine Learning}, + volume = {90}, + number = {2}, + pages = {231--260}, + year = {2013}, + publisher = {Springer} +} +@article{dutta2018modeling, + title = {Modeling and forecasting the volatility of carbon emission market: The role of outliers, time-varying jumps and oil price risk}, + author = {Dutta, Anupam}, + journal = {Journal of Cleaner Production}, + volume = {172}, + pages = {2773--2781}, + year = {2018}, + publisher = {Elsevier} +} +@article{eddelbuettel2014rcpparmadillo, + title = {RcppArmadillo: Accelerating R with high-performance C++ linear algebra}, + author = {Eddelbuettel, Dirk and Sanderson, Conrad}, + journal = {Computational Statistics \& Data Analysis}, + volume = {71}, + pages = {1054--1063}, + year = {2014}, + publisher = {Elsevier} +} +@article{fragoso2018bayesian, + title = {Bayesian model averaging: A systematic review and conceptual classification}, + author = {Fragoso, Tiago M and Bertoli, Wesley and Louzada, Francisco}, + journal = {International Statistical Review}, + volume = {86}, + number = {1}, + pages = {1--28}, + year = {2018}, + publisher = {Wiley Online Library} +} +@inproceedings{gaillard2014second, + title = {A second-order bound with excess losses}, + author = {Gaillard, Pierre and Stoltz, Gilles and Van Erven, Tim}, + booktitle = {Conference on Learning Theory}, + pages = {176--196}, + year = {2014}, + organization = {PMLR} +} +@incollection{gaillard2015forecasting, + title = {Forecasting electricity consumption by aggregating experts; how to design a good set of experts}, + author = {Gaillard, Pierre and Goude, Yannig}, + booktitle = {Modeling and stochastic learning for forecasting in high dimensions}, + pages = {95--115}, + year = {2015}, + publisher = {Springer} +} +@inproceedings{gaillard2018efficient, + title = {Efficient online algorithms for fast-rate regret bounds under sparsity}, + author = {Gaillard, Pierre and Wintenberger, Olivier}, + booktitle = {Advances in Neural Information Processing Systems}, + pages = {7026--7036}, + year = {2018} +} +@article{garcia2020short, + title = {Short-term European Union Allowance price forecasting with artificial neural networks}, + author = {Garc{\'\i}a, Agust{\'\i}n and Jaramillo-Mor{\'a}n, Miguel A}, + journal = {Entrepreneurship and Sustainability Issues}, + volume = {8}, + number = {1}, + pages = {261}, + year = {2020} +} +@article{gneiting2007strictly, + title = {Strictly proper scoring rules, prediction, and estimation}, + author = {Gneiting, Tilmann and Raftery, Adrian E}, + journal = {Journal of the American statistical Association}, + volume = {102}, + number = {477}, + pages = {359--378}, + year = {2007}, + publisher = {Taylor \& Francis} +} +@article{gneiting2011comparing, + title = {Comparing density forecasts using threshold-and quantile-weighted scoring rules}, + author = {Gneiting, Tilmann and Ranjan, Roopesh}, + journal = {Journal of Business \& Economic Statistics}, + volume = {29}, + number = {3}, + pages = {411--422}, + year = {2011}, + publisher = {Taylor \& Francis} +} +@article{gneiting2011making, + title = {Making and evaluating point forecasts}, + author = {Gneiting, Tilmann}, + journal = {Journal of the American Statistical Association}, + volume = {106}, + number = {494}, + pages = {746--762}, + year = {2011}, + publisher = {Taylor \& Francis} +} +@article{gneiting2011quantiles, + title = {Quantiles as optimal point forecasts}, + author = {Gneiting, Tilmann}, + journal = {International Journal of forecasting}, + volume = {27}, + number = {2}, + pages = {197--207}, + year = {2011}, + publisher = {Elsevier} +} +@article{hansen2008least, + title = {Least-squares forecast averaging}, + author = {Hansen, Bruce E}, + journal = {Journal of Econometrics}, + volume = {146}, + number = {2}, + pages = {342--350}, + year = {2008}, + publisher = {Elsevier} +} +@article{hao2020modelling, + title = {Modelling of carbon price in two real carbon trading markets}, + author = {Hao, Yan and Tian, Chengshi and Wu, Chunying}, + journal = {Journal of Cleaner Production}, + volume = {244}, + pages = {118556}, + year = {2020}, + publisher = {Elsevier} +} +@article{he1997quantile, + title = {Quantile curves without crossing}, + author = {He, Xuming}, + journal = {The American Statistician}, + volume = {51}, + number = {2}, + pages = {186--192}, + year = {1997}, + publisher = {Taylor \& Francis} +} +@article{herbster1998tracking, + title = {Tracking the best expert}, + author = {Herbster, Mark and Warmuth, Manfred K}, + journal = {Machine learning}, + volume = {32}, + number = {2}, + pages = {151--178}, + year = {1998}, + publisher = {Springer} +} +@article{hsiao2014there, + title = {Is there an optimal forecast combination?}, + author = {Hsiao, Cheng and Wan, Shui Ki}, + journal = {Journal of Econometrics}, + volume = {178}, + pages = {294--309}, + year = {2014}, + publisher = {Elsevier} +} +@book{hyndman2018forecasting, + title = {Forecasting: principles and practice}, + author = {Hyndman, Rob J and Athanasopoulos, George}, + year = {2018}, + publisher = {OTexts} +} +@article{jore2010combining, + title = {Combining forecast densities from VARs with uncertain instabilities}, + author = {Jore, Anne Sofie and Mitchell, James and Vahey, Shaun P}, + journal = {Journal of Applied Econometrics}, + volume = {25}, + number = {4}, + pages = {621--634}, + year = {2010}, + publisher = {Wiley Online Library} +} +@inproceedings{kakade2008generalization, + title = {On the Generalization Ability of Online Strongly Convex Programming Algorithms.}, + author = {Kakade, Sham M and Tewari, Ambuj}, + booktitle = {NIPS}, + pages = {801--808}, + year = {2008} +} +@article{kapetanios2015generalised, + title = {Generalised density forecast combinations}, + author = {Kapetanios, G and Mitchell, James and Price, Simon and Fawcett, Nicholas}, + journal = {Journal of Econometrics}, + volume = {188}, + number = {1}, + pages = {150--165}, + year = {2015}, + publisher = {Elsevier} +} +@inproceedings{koolen2015second, + title = {Second-order quantile methods for experts and combinatorial games}, + author = {Koolen, Wouter M and Van Erven, Tim}, + booktitle = {Conference on Learning Theory}, + pages = {1155--1175}, + year = {2015} +} +@article{koop2013forecasting, + title = {Forecasting the European carbon market}, + author = {Koop, Gary and Tole, Lise}, + journal = {Journal of the Royal Statistical Society: Series A (Statistics in Society)}, + volume = {176}, + number = {3}, + pages = {723--741}, + year = {2013}, + publisher = {Wiley Online Library} +} +@article{korotin2019integral, + title = {Integral Mixabilty: a Tool for Efficient Online Aggregation of Functional and Probabilistic Forecasts}, + author = {Korotin, Alexander and V'yugin, Vladimir and Burnaev, Evgeny}, + journal = {arXiv preprint arXiv:1912.07048}, + year = {2019} +} +@inproceedings{korotin2020mixing, + title = {Mixing past predictions}, + author = {Korotin, Alexander and V’yugin, Vladimir and Burnaev, Evgeny}, + booktitle = {Conformal and Probabilistic Prediction and Applications}, + pages = {171--188}, + year = {2020}, + organization = {PMLR} +} +@article{lichtendahl2013better, + title = {Is it better to average probabilities or quantiles?}, + author = {Lichtendahl Jr, Kenneth C and Grushka-Cockayne, Yael and Winkler, Robert L}, + journal = {Management Science}, + volume = {59}, + number = {7}, + pages = {1594--1611}, + year = {2013}, + publisher = {INFORMS} +} +@article{lin2018multi, + title = {A multi-model combination approach for probabilistic wind power forecasting}, + author = {Lin, You and Yang, Ming and Wan, Can and Wang, Jianhui and Song, Yonghua}, + journal = {IEEE Transactions on Sustainable Energy}, + volume = {10}, + number = {1}, + pages = {226--237}, + year = {2018}, + publisher = {IEEE} +} +@article{littlestone1994weighted, + title = {The weighted majority algorithm}, + author = {Littlestone, Nick and Warmuth, Manfred K}, + journal = {Information and computation}, + volume = {108}, + number = {2}, + pages = {212--261}, + year = {1994}, + publisher = {Elsevier} +} +@article{lu2015jackknife, + title = {Jackknife model averaging for quantile regressions}, + author = {Lu, Xun and Su, Liangjun}, + journal = {Journal of Econometrics}, + volume = {188}, + number = {1}, + pages = {40--58}, + year = {2015}, + publisher = {Elsevier} +} +@article{maciejowska2020pca, + title = {PCA Forecast Averaging—Predicting Day-Ahead and Intraday Electricity Prices}, + author = {Maciejowska, Katarzyna and Uniejewski, Bartosz and Serafin, Tomasz}, + journal = {Energies}, + volume = {13}, + number = {14}, + pages = {3530}, + year = {2020}, + publisher = {Multidisciplinary Digital Publishing Institute} +} +@article{mhammedi2019lipschitz, + title = {Lipschitz adaptivity with multiple learning rates in online learning}, + author = {Mhammedi, Zakaria and Koolen, Wouter M and Van Erven, Tim}, + journal = {arXiv preprint arXiv:1902.10797}, + year = {2019} +} +@article{nowotarski2018recent, + title = {Recent advances in electricity price forecasting: A review of probabilistic forecasting}, + author = {Nowotarski, Jakub and Weron, Rafa{\l}}, + journal = {Renewable and Sustainable Energy Reviews}, + volume = {81}, + pages = {1548--1568}, + year = {2018}, + publisher = {Elsevier} +} +@article{opschoor2017combining, + title = {Combining density forecasts using focused scoring rules}, + author = {Opschoor, Anne and Van Dijk, Dick and van der Wel, Michel}, + journal = {Journal of Applied Econometrics}, + volume = {32}, + number = {7}, + pages = {1298--1313}, + year = {2017}, + publisher = {Wiley Online Library} +} +@article{petropoulos2020forecasting, + title = {Forecasting: theory and practice}, + author = {Petropoulos, Fotios and Apiletti, Daniele and Assimakopoulos, Vassilios and Babai, Mohamed Zied and Barrow, Devon K and Bergmeir, Christoph and Bessa, Ricardo J and Boylan, John E and Browell, Jethro and Carnevale, Claudio and others}, + journal = {arXiv preprint arXiv:2012.03854}, + year = {2020} +} +@article{raftery2005using, + title = {Using Bayesian model averaging to calibrate forecast ensembles}, + author = {Raftery, Adrian E and Gneiting, Tilmann and Balabdaoui, Fadoua and Polakowski, Michael}, + journal = {Monthly weather review}, + volume = {133}, + number = {5}, + pages = {1155--1174}, + year = {2005} +} +@article{segnon2017modeling, + title = {Modeling and forecasting the volatility of carbon dioxide emission allowance prices: A review and comparison of modern volatility models}, + author = {Segnon, Mawuli and Lux, Thomas and Gupta, Rangan}, + journal = {Renewable and Sustainable Energy Reviews}, + volume = {69}, + pages = {692--704}, + year = {2017}, + publisher = {Elsevier} +} +@article{thorey2017online, + title = {Online learning with the Continuous Ranked Probability Score for ensemble forecasting}, + author = {Thorey, Jean and Mallet, Vivien and Baudin, Paul}, + journal = {Quarterly Journal of the Royal Meteorological Society}, + volume = {143}, + number = {702}, + pages = {521--529}, + year = {2017}, + publisher = {Wiley Online Library} +} +@article{thorey2018ensemble, + title = {Ensemble forecast of photovoltaic power with online CRPS learning}, + author = {Thorey, Jean and Chaussin, Christophe and Mallet, Vivien}, + journal = {International Journal of Forecasting}, + volume = {34}, + number = {4}, + pages = {762--773}, + year = {2018}, + publisher = {Elsevier} +} +@article{tu2011markowitz, + title = {Markowitz meets Talmud: A combination of sophisticated and naive diversification strategies}, + author = {Tu, Jun and Zhou, Guofu}, + journal = {Journal of Financial Economics}, + volume = {99}, + number = {1}, + pages = {204--215}, + year = {2011}, + publisher = {Elsevier} +} +@article{v2020online, + title = {Online Aggregation of Probabilistic Forecasts Based on the Continuous Ranked Probability Score}, + author = {V’yugin, VV and Trunov, VG}, + journal = {Journal of Communications Technology and Electronics}, + volume = {65}, + number = {6}, + pages = {662--676}, + year = {2020}, + publisher = {Springer} +} +@article{van2018probabilistic, + title = {Probabilistic forecasting of solar power, electricity consumption and net load: Investigating the effect of seasons, aggregation and penetration on prediction intervals}, + author = {Van der Meer, DW and Munkhammar, Joakim and Wid{\'e}n, Joakim}, + journal = {Solar Energy}, + volume = {171}, + pages = {397--413}, + year = {2018}, + publisher = {Elsevier} +} +@article{vovk1990aggregating, + title = {Aggregating strategies}, + author = {Vovk, Volodimir G}, + journal = {Proc. of Computational Learning Theory, 1990}, + year = {1990} +} +@book{wahba1990spline, + title = {Spline models for observational data}, + author = {Wahba, Grace}, + year = {1990}, + publisher = {SIAM} +} +@book{wang2011smoothing, + title = {Smoothing splines: methods and applications}, + author = {Wang, Yuedong}, + year = {2011}, + publisher = {CRC Press} +} +@article{wang2019jackknife, + title = {Jackknife Model Averaging for Composite Quantile Regression}, + author = {Wang, Miaomiao and Zou, Guohua}, + journal = {arXiv preprint arXiv:1910.12209}, + year = {2019} +} +@article{wintenberger2017optimal, + title = {Optimal learning with Bernstein online aggregation}, + author = {Wintenberger, Olivier}, + journal = {Machine Learning}, + volume = {106}, + number = {1}, + pages = {119--141}, + year = {2017}, + publisher = {Springer} +} +@article{zamo2020sequential, + title = {Sequential Aggregation of Probabilistic Forecasts--Applicaton to Wind Speed Ensemble Forecasts}, + author = {Zamo, Micha{\"e}l and Bel, Liliane and Mestre, Olivier}, + journal = {arXiv preprint arXiv:2005.03540}, + year = {2020} +} +@article{zhang2020load, + title = {Load probability density forecasting by transforming and combining quantile forecasts}, + author = {Zhang, Shu and Wang, Yi and Zhang, Yutian and Wang, Dan and Zhang, Ning}, + journal = {Applied Energy}, + volume = {277}, + pages = {115600}, + year = {2020}, + publisher = {Elsevier} +} \ No newline at end of file diff --git a/25_07_phd_defense/index.qmd b/25_07_phd_defense/index.qmd index c6eb21b..bab3782 100644 --- a/25_07_phd_defense/index.qmd +++ b/25_07_phd_defense/index.qmd @@ -47,650 +47,694 @@ $$ ::: -## EfeMOD +## PHD DeFence -**Empirisch fundierte Elektrizitätsmarkt-Modellierung mit Open Data** +```{r, setup, include=FALSE} +# Compile with: rmarkdown::render("crps_learning.Rmd") +library(latex2exp) +library(ggplot2) +library(dplyr) +library(tidyr) +library(purrr) +library(kableExtra) +knitr::opts_chunk$set( + dev = "svglite" # Use svg figures +) +library(RefManageR) +BibOptions( + check.entries = TRUE, + bib.style = "authoryear", + cite.style = "authoryear", + style = "html", + hyperlink = TRUE, + dashed = FALSE +) +my_bib <- ReadBib("assets/library.bib", check = FALSE) +col_lightgray <- "#e7e7e7" +col_blue <- "#000088" +col_smooth_expost <- "#a7008b" +col_smooth <- "#187a00" +col_pointwise <- "#008790" +col_constant <- "#dd9002" +col_optimum <- "#666666" +``` + +```{r xaringan-panelset, echo=FALSE} +xaringanExtra::use_panelset() +``` + +```{r xaringanExtra-freezeframe, echo=FALSE} +xaringanExtra::use_freezeframe(responsive = TRUE) +``` + +# Outline + +- [Motivation](#motivation) +- [The Framework of Prediction under Expert Advice](#pred_under_exp_advice) +- [The Continious Ranked Probability Scrore](#crps) +- [Optimality of (Pointwise) CRPS-Learning](#crps_optim) +- [A Simple Probabilistic Example](#simple_example) +- [The Proposed CRPS-Learning Algorithm](#proposed_algorithm) +- [Simulation Results](#simulation) +- [Possible Extensions](#extensions) +- [Application Study](#application) +- [Wrap-Up](#conclusion) +- [References](#references) + +--- + +class: center, middle, sydney-blue + +# Motivation + +name: motivation + +## Motivation :::: {.columns} -::: {.column width="65%"} +::: {.column width="48%"} -[{{< fa users-gear >}}]{style="color: #404040;"} **Project Entities:** +The Idea: -Chair of Prof. Dr. Christoph Weber (Management Sciences and Energy Economics) +- Combine multiple forecasts instead of choosing one -Chair of Prof. Dr. Florian Ziel (Data Science in Energy and Environment) +- Combination weights may vary over **time**, over the **distribution** or **both** -[{{< fa bullseye >}}]{style="color: #404040;"}   **Project Goal:** - -Use publicly available data (particularly ENTSO-E Transparency Platform) to estimate parameters for energy system and energy market models. - -::: - -::: {.column width="5%"} - -::: - -::: {.column width="30%"} -![](figures/BMWK.webp) - -::: - -:::: - -## EfeMOD - -![](figures/power_plant_list.jpg) - -## Motivation and Objective - -**Identification of Power Plant Operation States Using Clustering** - -[{{< fa earth-europe >}}]{style="color: #404040;"} Gain Knowledge about the Power Plant Characteristics - -- Operation Points, -- Efficiency -- Capacity, etc. - -[{{< fa display >}}]{style="color: #404040;"} This Presentation: - -Identify Operation States: - -- Stable Operation -- Startup -- Minimum-Stable Operation, etc. - -Provide these characteristics to other researchers - -[{{< fa right-long >}}]{style="color: #404040;"} e.g. to estimate efficiency - -## Data - -[{{< fa database >}}]{style="color:#404040;"} Entsoe Data: - -- ActualGenerationOutputPerGenerationUnit_16.1.A -- UnavailabilityOfGenerationUnits_15.1.A_B - -[{{< fa fire-flame-simple >}}]{style="color:rgb(0, 200, 255);"} We focus on natural gas units: - -- 63 units in `DE_LU` bidding zone -- 299 units across all bidding zones - -[{{< fa calendar-days >}}]{style="color:#404040;"} We use recent data: - -- 2020-01-01 until "now" - -## Data - -![](figures/Block%20AGuD/0_data1.jpg) - -## Data - -![](figures/Block%20AGuD/0_data2.jpg) - -## Data - - -::: {.panel-tabset} - -## Lausward - -:::: {.columns} - -::: {.column width="42%"} - -**Heizkraftwerk Lausward ** - -Location: Düsseldorf - -Block Anton (*Block AGuD*) - -Combined cycle gas turbine (CCGT) - -Electrical output: 103 MW [{{< fa bolt >}}]{style="color: #ffc400;"} - -75 MW of district heating can be decoupled - -Efficiency: 54% - -Fuel Utilization Rate: 87% (with district heating) - -::: - -::: {.column width="3%"} - -::: - -::: {.column width="55%"} - -![](figures/Block%20AGuD/0_data3.jpg) - -::: - -:::: - -## Emsland - -:::: {.columns} - -::: {.column width="42%"} - -**Erdgaskraftwerk Emsland** - -Location: Lingen (Ems) - -*Block C* - -Combined cycle gas turbine (CCGT) - -Electrical output: 475 MW [{{< fa bolt >}}]{style="color: #ffc400;"} - -Efficiency: 46% - -Black start enabled. - -::: - -::: {.column width="3%"} - -::: - -::: {.column width="55%"} - -![](figures/Emsland%20C/0_data3.jpg) - -::: - -:::: - -::: - - - -## Empirical Approach - -:::: {.columns} - -::: {.column width="42%"} - -### Overview - -Empirical identification of states - -3-Step Approach: - -- Prior Partitioning - - We create preliminary clusters - - They will be used to initialize the main clustering -- Main Clustering - - Gaussian Model Based Clustering -- Label Assignment - - We assign meaningful labels to the final clusters - -::: - -::: {.column width="3%"} - -::: - -::: {.column width="55%"} - -![](figures/Block%20AGuD/0_data3.jpg) - -::: - -:::: - -::: - -## Empirical Approach - -:::: {.columns} - -::: {.column width="42%"} - -### Prior Partitioning - -[{{< fa arrow-up-right-dots >}}]{style="color: #202020FF;"} Divide the space in meaningful partitions: - -Define the Capacity: $\zeta = max(t0)$ - -Define a threshold: $\gamma = \frac{\zeta}{50}$ - -[{{< fa circle >}}]{style="color: #2D7D32FF;"} $\pm \gamma$ around the diagonal: Stable
-[{{< fa circle >}}]{style="color: #202020FF;"} $t0 < 1$ & $t1 < 1$: Zero
-[{{< fa circle >}}]{style="color: #FA8C00FF;"} $t0 < \gamma$ & $t1 > 1$: Startup
-[{{< fa circle >}}]{style="color: #D81A5FFF;"} $t0 > 1$ & $t1 < \gamma$: Shutdown
-[{{< fa circle >}}]{style="color: #FDD834FF;"} $t1 > t0$: Ramp-Up
-[{{< fa circle >}}]{style="color: #8D24AAFF;"} $t1 < t0$: Ramp-Down - -We project Stable observations onto the diagonal, Startup on $t1$ and Shutdown on $t0$ for the next step. - -::: - -::: {.column width="3%"} - -::: - -::: {.column width="55%"} - -![](figures/Block%20AGuD/1_pre-partition.jpg) - -::: - -:::: - -::: - -## Empirical Approach - -:::: {.columns} - -::: {.column width="42%"} - -### Prior Partitioning - -Model-Based Clustering of the Regions using `mclust::Mclust` in `R`. - -- Stable: 2-5 Clusters -- Ramp Up: 2-4 Clusters -- Ramp Down: 2-4 Clusters - -[{{< fa lightbulb >}}]{style="color:rgb(255, 166, 0);"} Obtain finite mixture distribution: - -$$\sum_{k=1}^{G}{\pi_k f_k (\mathbf{x}; \mathbf{\theta}_k)}$$ - -$f_k$ Density of k's component
-$\pi_k$ Mixture weights
-$\theta_k$ parameters of k's density component - - -::: - -::: {.column width="3%"} - -::: - -::: {.column width="55%"} - -![](figures/Block%20AGuD/1_pre-partition.jpg) - -::: - -:::: - -## Empirical Approach - -### Prior Partitioning -:::: {.columns} - -::: {.column width="49%"} - -$$f(\mathbf{x}; \mathbf{\Psi}) = \sum_{k=1}^{G}{\pi_k \phi (\mathbf{x}; \mathbf{\mu}_k; \mathbf{\Sigma}_k)}$$ - -$\phi(\cdot)$ Multivariate Gaussian density
- -Maximum Likelihood Estimation via Expectation Maximization (EM) algorithm - -Likelihood for Gaussian Mixture Models (GMMs): - -\begin{align} - \ell(\Psi) = \sum_{i=1}^n \log \left\{ \sum_{k=1}^G \pi_k \phi(x_i; \mu_k, \Sigma_k) \right\} -\end{align} - -[{{< fa retweet >}}]{style="color: #404040;"} We Re-Formulate this likelihood to a complete-data likelihood to utilize the EM algorithm +2 Popular options for combining distributions: +- Combining across quantiles (this paper) + - Horizontal aggregation, vincentization +- Combining across probabilities + - Vertical aggregation ::: ::: {.column width="2%"} - -::: -::: {.column width="49%"} - -\begin{align} - \ell_{\mathcal{C}}(\Psi) = \sum_{i=1}^n \sum_{k=1}^G z_{ik} \left\{ \log \pi_k + \log \phi(x_i; \mu_k, \Sigma_k) \right\} -\end{align} - -\begin{align} - z_{ik} = - \begin{cases} - 1 & \text{if } x_i \text{ belongs to component }k \\ - 0 & \text{otherwise.} - \end{cases} -\end{align} - -E-Step: - -\begin{align} - \hat{z}_{ik} = \frac{\hat{\pi}_k \phi(x_i; \hat{\mu}_k, \hat{\Sigma}_k)}{\sum_{g=1}^{G} \hat{\pi}_g \phi(x_i; \hat{\mu}_g, \hat{\Sigma}_g)}, -\end{align} - -M-Step: - -\begin{align} -\quad \hat{\mu}_k = \frac{\sum_{i=1}^{n} \hat{z}_{ik} x_i}{n_k}, \quad \text{where} \quad n_k = \sum_{i=1}^{n} \hat{z}_{ik}. -\end{align} - - -::: - -:::: - - - -::: {.notes} - -- log-likelihood in (2.2) is hard to maximize directly -- even numerically - -- As a consequence, mixture models are usually fitted by reformulating the mixture -problem as an incomplete-data problem within the EM framework. - -General EM Steps: - -- Init -- Estimate latent component memberships -- M-Step obtain the updated parameter estimates -- Check convergence criteria - -::: - -## Empirical Approach - -:::: {.columns} - -::: {.column width="42%"} - -### Prior Partitioning - -**Initialization** - -We initialize the EM algorithm (E-Step) using the partitions -obtained from model-based agglomerative hierarchical clustering (MBAHC) - -**Estimation** - -The Bayesian information criterion (BIC) is used for model selection - -**Prior Partitioning Results** - -Right graph shows prior clusters. - -::: - -::: {.column width="3%"} - -::: - -::: {.column width="55%"} - -::: {.panel-tabset} - -## Lausward - -![](figures/Block%20AGuD/2_partition.jpg) - -## Emsland - -![](figures/Emsland%20C/2_partition.jpg) - -::: - -::: - -:::: - -::: {.notes} - -recursively merging the two clusters that yield the maximum -likelihood of a probability model over all possible merges - -::: - -## Empirical Approach - -:::: {.columns} - -::: {.column width="42%"} - -### Main Clustering - -**MBAHC** - -Prior Clusters are used in MBAHC - -The results of the MBAHC are used to initialize the EM Algorithm in the main Gaussian Model Based Clustering - -**Main Clustering Results** - -Right graph shows *Maximum A Posteriori (MAP) Classification* - -Colour indicates cumulated log(density) of all components. - -::: - -::: {.column width="3%"} - -::: - -::: {.column width="55%"} - -::: {.panel-tabset} - -## Lausward - -![](figures/Block%20AGuD/3_cluster.jpg) - -## Emsland - -![](figures/Emsland%20C/3_cluster.jpg) - -::: - -::: - -:::: - -::: {.notes} - -recursively merging the two clusters that yield the maximum -likelihood of a probability model over all possible merges - -::: - - - -## Empirical Approach - -### Label Assignment - -:::: {.columns} - -::: {.column width="48%"} - -We assign labels to the clusters using their mean $\mu$ and correlation $\rho$ - -Multiple clusters may describe one Generation State (e.g., along the diagonal) - -::: - -::: {.column width="4%"} - ::: ::: {.column width="48%"} -```{r} -library(dplyr) -load("figures/Block AGuD/clusters.RDS") -clusters %>% - select(classification, mu_t0, mu_t1, cor) %>% - head() +::: {.panel-tabset} + +## Time + +```{r, echo = FALSE, fig.height=6} +par(mfrow = c(3, 3), mar = c(2, 2, 2, 2)) +set.seed(1) +# Data +X <- matrix(ncol = 3, nrow = 15) +X[, 1] <- seq(from = 8, to = 12, length.out = 15) + 0.25 * rnorm(15) +X[, 2] <- 10 + 0.25 * rnorm(15) +X[, 3] <- seq(from = 12, to = 8, length.out = 15) + 0.25 * rnorm(15) +# Weights +w <- matrix(ncol = 3, nrow = 15) +w[, 1] <- sin(0.1 * 1:15) +w[, 2] <- cos(0.1 * 1:15) +w[, 3] <- seq(from = -2, 0.25, length.out = 15)^2 +w <- (w / rowSums(w)) +# Vis +plot(X[, 1], + lwd = 4, + type = "l", + ylim = c(8, 12), + xlab = "", + ylab = "", + xaxt = "n", + yaxt = "n", + bty = "n", + col = "#2050f0" +) +plot(w[, 1], + lwd = 4, type = "l", + ylim = c(0, 1), + xlab = "", + ylab = "", xaxt = "n", yaxt = "n", bty = "n", col = "#2050f0" +) +text(6, 0.5, TeX("$w_1(t)$"), cex = 2, col = "#2050f0") +arrows(13, 0.25, 15, 0.0, , lwd = 4, bty = "n") +plot.new() +plot(X[, 2], + lwd = 4, + type = "l", ylim = c(8, 12), + xlab = "", ylab = "", xaxt = "n", yaxt = "n", bty = "n", col = "purple" +) +plot(w[, 2], + lwd = 4, type = "l", + ylim = c(0, 1), + xlab = "", + ylab = "", xaxt = "n", yaxt = "n", bty = "n", col = "purple" +) +text(6, 0.6, TeX("$w_2(t)$"), cex = 2, col = "purple") +arrows(13, 0.5, 15, 0.5, , lwd = 4, bty = "n") +plot(rowSums(X * w), lwd = 4, type = "l", xlab = "", ylab = "", xaxt = "n", yaxt = "n", bty = "n", col = "#298829") +plot(X[, 3], + lwd = 4, + type = "l", ylim = c(8, 12), + xlab = "", ylab = "", xaxt = "n", yaxt = "n", bty = "n", col = "#e423b4" +) +plot(w[, 3], + lwd = 4, type = "l", + ylim = c(0, 1), + xlab = "", + ylab = "", xaxt = "n", yaxt = "n", bty = "n", col = "#e423b4" +) +text(6, 0.25, TeX("$w_3(t)$"), cex = 2, col = "#e423b4") +arrows(13, 0.75, 15, 1, , lwd = 4, bty = "n") +``` + +## Distribution + +```{r, echo = FALSE, fig.height=6} +par(mfrow = c(3, 3), mar = c(2, 2, 2, 2)) +set.seed(1) +# Data +X <- matrix(ncol = 3, nrow = 31) + +X[, 1] <- dchisq(0:30, df = 10) +X[, 2] <- dnorm(0:30, mean = 15, sd = 5) +X[, 3] <- dexp(0:30, 0.2) +# Weights +w <- matrix(ncol = 3, nrow = 31) +w[, 1] <- sin(0.05 * 0:30) +w[, 2] <- cos(0.05 * 0:30) +w[, 3] <- seq(from = -2, 0.25, length.out = 31)^2 +w <- (w / rowSums(w)) +# Vis +plot(X[, 1], + lwd = 4, + type = "l", + xlab = "", + ylab = "", + xaxt = "n", + yaxt = "n", + bty = "n", + col = "#2050f0" +) +plot(X[, 2], + lwd = 4, + type = "l", + xlab = "", ylab = "", xaxt = "n", yaxt = "n", bty = "n", col = "purple" +) +plot(X[, 3], + lwd = 4, + type = "l", + xlab = "", ylab = "", xaxt = "n", yaxt = "n", bty = "n", col = "#e423b4" +) +plot(w[, 1], + lwd = 4, type = "l", + ylim = c(0, 1), + xlab = "", + ylab = "", xaxt = "n", yaxt = "n", bty = "n", col = "#2050f0" +) +text(12, 0.5, TeX("$w_1(x)$"), cex = 2, col = "#2050f0") +arrows(26, 0.25, 31, 0.0, , lwd = 4, bty = "n") +plot(w[, 2], + lwd = 4, type = "l", + ylim = c(0, 1), + xlab = "", + ylab = "", xaxt = "n", yaxt = "n", bty = "n", col = "purple" +) +text(15, 0.5, TeX("$w_2(x)$"), cex = 2, col = "purple") +arrows(15, 0.25, 15, 0, , lwd = 4, bty = "n") +plot(w[, 3], + lwd = 4, type = "l", + ylim = c(0, 1), + xlab = "", + ylab = "", xaxt = "n", yaxt = "n", bty = "n", col = "#e423b4" +) +text(20, 0.5, TeX("$w_3(x)$"), cex = 2, col = "#e423b4") +arrows(5, 0.25, 0, 0, , lwd = 4, bty = "n") +plot.new() +plot(rowSums(X * w), lwd = 4, type = "l", xlab = "", ylab = "", xaxt = "n", yaxt = "n", bty = "n", col = "#298829") ``` ::: -:::: - -\begin{align} -\text{State} = - \begin{cases} - \color{#202020FF}{\text{Zero}} & (\mu_{t0} < 1) \land (\mu_{t1} < 1), \\ - \text{MSO} & \left[ (\mu_{t0} > \zeta/10) \land (\mu_{t1} > \zeta / 10) \land (\right| \mu_{t0} - \mu_{t1} \left| > \zeta / 10) \right]\\ & \rightarrow \operatorname{argmin}(\mu_{t0} + \mu_{t1}), \\ - \text{Max Capacity} & \rightarrow \operatorname{argmax}(\mu_{t0} + \mu_{t1}), \\ - \text{Startup} & (\mu_{t1} \geq \zeta / 10) \land (\mu_{t0} < \gamma) \land (\rho < 0.3), \\ - \text{Shutdown} & (\mu_{t0} \geq \zeta / 10) \land (\mu_{t1} < \gamma) \land (\rho < 0.3), \\ - \text{Stable Operation} & \text{Remaining clusters with cor} > 0.8, \\ - \text{Ramp Up} & \text{Remaining clusters: } \mu_{t1} > \mu_{t0}, \\ - \text{Ramp Down} & \text{Remaining clusters: } \mu_{t1} < \mu_{t0}. - \end{cases} -\end{align} - -::: {.notes} - -recursively merging the two clusters that yield the maximum -likelihood of a probability model over all possible merges - ::: -## Empirical Approach +:::: + +# The Framework of Prediction under Expert Advice + +## The Framework of Prediction under Expert Advice + +### The sequential framework :::: {.columns} -::: {.column width="39%"} +::: {.column width="48%"} -### Label Assignment - -Right graphs show *assigned states* - -The points are coloured according to - -- MAP -- Probability (each pure colour reflects a probability of 1) - -Some points below /above the diagonal are assigned to Ramp Up / Ramp Down - -- Can be easily fixed for MAP -- Fixing probabilistic predictions not that easy +Each day, $t = 1, 2, ... T$ +- The **forecaster** receives predictions $\widehat{X}_{t,k}$ from $K$ **experts** +- The **forecaster** assings weights $w_{t,k}$ to each **expert** +- The **forecaster** calculates her prediction: +\begin{equation} + \widetilde{X}_{t} = \sum_{k=1}^K w_{t,k} \widehat{X}_{t,k}. + \label{eq_forecast_def} +\end{equation} +- The realization for $t$ is observedilities + - Vertical aggregation ::: ::: {.column width="2%"} - -::: - -::: {.column width="59%"} - -::: {.panel-tabset} - -## LSW - -![](figures/Block%20AGuD/4_assignments.jpg) - -## LSW Pr - -![](figures/Block%20AGuD/4_assignments_prob.jpg) - -## LSW Pr - -![](figures/Block%20AGuD/4_probability.jpg) - -## EMS - -![](figures/Emsland%20C/4_assignments.jpg) - -## EMS Pr - -![](figures/Emsland%20C/4_assignments_prob.jpg) - -## EMS Pr - -![](figures/Emsland%20C/4_probability.jpg) ::: +::: {.column width="48%"} + +- The experts can be institutions, persons, or models +- The forecasts can be point-forecasts (i.e., mean or median) or full predictive distributions +- We do not need any assumptions concerning the underlying data +- `r Citet(my_bib, "cesa2006prediction")` + ::: :::: -::: {.notes} +--- -recursively merging the two clusters that yield the maximum -likelihood of a probability model over all possible merges +## The Regret -::: +Weights are updated sequentially according to the past performance of the $K$ experts. + +That is, a loss function $\ell$ is needed. This is used to compute the **cumulative regret** $R_{t,k}$ + +\begin{equation} + R_{t,k} = \widetilde{L}_{t} - \widehat{L}_{t,k} = \sum_{i = 1}^t \ell(\widetilde{X}_{i},Y_i) - \ell(\widehat{X}_{i,k},Y_i) + \label{eq_regret} +\end{equation} + +The cumulative regret: +- Indicates the predictive accuracy of the expert $k$ until time $t$. +- Measures how much the forecaster *regrets* not having followed the expert's advice + +Popular loss functions for point forecasting `r Citet(my_bib, "gneiting2011making")`: +.pull-left[ +- $\ell_2$-loss $\ell_2(x, y) = | x -y|^2$ + - optimal for mean prediction +] +.pull-right[ +- $\ell_1$-loss $\ell_1(x, y) = | x -y|$ + - optimal for median predictions +] -## Empirical Approach :::: {.columns} -::: {.column width="42%"} +::: {.column width="48%"} -### Label Assignment - -*Fixing assignments* - -Relabeling Ramp Up and Ramp Down MAP predictions is trivial: - -\begin{align} -\text{State} = - \begin{cases} - \text{Ramp Up} & x_{t1} > x_{t0}, \\ - \text{Ramp Down} & x_{t1} < x_{t0}. - \end{cases} -\end{align} - -Fixing the probability array is more involved: - -Find observations $x_{t1} < x_{t0}$ that can not be "Ramp Up": - -Set probability of all Ramp Up clusters to $0$. - -Normalize the probabilities. +- $\ell_2$-loss $\ell_2(x, y) = | x -y|^2$ + - optimal for mean prediction ::: -::: {.column width="3%"} - -::: - -::: {.column width="55%"} - -::: {.panel-tabset} - - -## LSW Pr - -![](figures/Block%20AGuD/4_assignments_prob_fixed.jpg) - -## LSW Pr - -![](figures/Block%20AGuD/4_probability_fixed.jpg) - -## EMS Pr - -![](figures/Emsland%20C/4_assignments_prob_fixed.jpg) - -## EMS Pr - -![](figures/Emsland%20C/4_probability_fixed.jpg) +::: {.column width="2%"} ::: +::: {.column width="48%"} + +- $\ell_1$-loss $\ell_1(x, y) = | x -y|$ + - optimal for median predictions + ::: :::: -## Outlook -
-
+## Popular Aggregation Algorithms + +#### The naive combination + +\begin{equation} + w_{t,k}^{\text{Naive}} = \frac{1}{K} +\end{equation} + +#### The exponentially weighted average forecaster (EWA) + +\begin{align} + w_{t,k}^{\text{EWA}} & = \frac{e^{\eta R_{t,k}} }{\sum_{k = 1}^K e^{\eta R_{t,k}}} + = + \frac{e^{-\eta \ell(\widehat{X}_{t,k},Y_t)} w^{\text{EWA}}_{t-1,k} }{\sum_{k = 1}^K e^{-\eta \ell(\widehat{X}_{t,k},Y_t)} w^{\text{EWA}}_{t-1,k} } + \label{eq_ewa_general} +\end{align} + +#### The polynomial weighted aggregation (PWA) + +\begin{align} + w_{t,k}^{\text{PWA}} & = \frac{ 2(R_{t,k})^{q-1}_{+} }{ \|(R_t)_{+}\|^{q-2}_q} + \label{eq_pwa_general} +\end{align} + +with $q\geq 2$ and $x_{+}$ the (vector) of positive parts of $x$. + +## Optimality + +In stochastic settings, the cumulative Risk should be analyezed `r Citet(my_bib, "wintenberger2017optimal")`: + +\begin{align} + \underbrace{\widetilde{\mathcal{R}}_t = \sum_{i=1}^t \mathbb{E}[\ell(\widetilde{X}_{i},Y_i)|\mathcal{F}_{i-1}]}_{\text{Cumulative Risk of Forecaster}} \qquad\qquad\qquad \text{ and } \qquad\qquad\qquad + \underbrace{\widehat{\mathcal{R}}_{t,k} = \sum_{i=1}^t \mathbb{E}[\ell(\widehat{X}_{i,k},Y_i)|\mathcal{F}_{i-1}]}_{\text{Cumulative Risk of Experts}} + \label{eq_def_cumrisk} +\end{align} + +There are two problems that an algorithm should solve in iid settings: + +:::: {.columns} + +::: {.column width="48%"} + +### The selection problem +\begin{equation} + \frac{1}{t}\left(\widetilde{\mathcal{R}}_t - \widehat{\mathcal{R}}_{t,\min} \right) \stackrel{t\to \infty}{\rightarrow} a \quad \text{with} \quad a \leq 0. + \label{eq_opt_select} +\end{equation} +The forecaster is asymptotically not worse than the best expert $\widehat{\mathcal{R}}_{t,\min}$. + +::: + +::: {.column width="2%"} + +::: + +::: {.column width="48%"} + +### The convex aggregation problem + +\begin{equation} + \frac{1}{t}\left(\widetilde{\mathcal{R}}_t - \widehat{\mathcal{R}}_{t,\pi} \right) \stackrel{t\to \infty}{\rightarrow} b \quad \text{with} \quad b \leq 0 . + \label{eq_opt_conv} +\end{equation} +The forecaster is asymptotically not worse than the best convex combination $\widehat{X}_{t,\pi}$ in hindsight (**oracle**). + +::: + +:::: + +## Optimality + +Satisfying the convexity property \eqref{eq_opt_conv} comes at the cost of slower possible convergence. + +According to `r Citet(my_bib, "wintenberger2017optimal")`, an algorithm has optimal rates with respect to selection \eqref{eq_opt_select} and convex aggregation \eqref{eq_opt_conv} if + +\begin{align} + \frac{1}{t}\left(\widetilde{\mathcal{R}}_t - \widehat{\mathcal{R}}_{t,\min} \right) & = + \mathcal{O}\left(\frac{\log(K)}{t}\right)\label{eq_optp_select} +\end{align} + +\begin{align} + \frac{1}{t}\left(\widetilde{\mathcal{R}}_t - \widehat{\mathcal{R}}_{t,\pi} \right) & = + \mathcal{O}\left(\sqrt{\frac{\log(K)}{t}}\right) + \label{eq_optp_conv} +\end{align} + +Algorithms can statisfy both \eqref{eq_optp_select} and \eqref{eq_optp_conv} depending on: + +- The loss function +- Regularity conditions on $Y_t$ and $\widehat{X}_{t,k}$ +- The weighting scheme + +## Optimality + +According to `r Citet(my_bib, "cesa2006prediction")` EWA \eqref{eq_ewa_general} satisfies the optimal selection convergence \eqref{eq_optp_select} in a deterministic setting if the: +- Loss $\ell$ is exp-concave +- Learning-rate $\eta$ is chosen correctly + +Those results can be converted to stochastic iid settings `r Citet(my_bib, "kakade2008generalization")` `r Citet(my_bib, "gaillard2014second")`. + +The optimal convex aggregation convergence \eqref{eq_optp_conv} can be satisfied by applying the kernel-trick. Thereby, the loss is linearized: +\begin{align} +\ell^{\nabla}(x,y) = \ell'(\widetilde{X},y) x +\end{align} +$\ell'$ is the subgradient of $\ell$ in its first coordinate evaluated at forecast combination $\widetilde{X}$. + +Combining probabilistic forecasts calls for a probabilistic loss function + +:::: {.notes} + +We apply Bernstein Online Aggregation (BOA). It lets us weaken the exp-concavity condition while almost keeping the optimalities \ref{eq_optp_select} and \ref{eq_optp_conv}. + +:::: + +## The Continuous Ranked Probability Score + +:::: {.columns} + +::: {.column width="48%"} + +**An appropriate choice:** + +\begin{align*} + \text{CRPS}(F, y) & = \int_{\mathbb{R}} {(F(x) - \mathbb{1}\{ x > y \})}^2 dx + \label{eq_crps} +\end{align*} + +It's strictly proper `r Citet(my_bib, "gneiting2007strictly")`. + +Using the CRPS, we can calculate time-adaptive weight $w_{t,k}$. However, what if the experts' performance is not uniform over all parts of the distribution? + +The idea: utilize this relation: + +\begin{align*} + \text{CRPS}(F, y) = 2 \int_0^{1} \text{QL}_p(F^{-1}(p), y) \, d p. + \label{eq_crps_qs} +\end{align*} + +::: + +::: {.column width="2%"} + +::: + +::: {.column width="48%"} + +to combine quantiles of the probabilistic forecasts individually using the quantile-loss (QL): +\begin{align*} + \text{QL}_p(q, y) & = (\mathbb{1}\{y < q\} -p)(q - y) +\end{align*} + +
+ +**But is it optimal?** + +CRPS is exp-concave `r fontawesome::fa("check", fill ="#00b02f")` + +`r fontawesome::fa("arrow-right", fill ="#000000")` EWA \eqref{eq_ewa_general} with CRPS satisfies \eqref{eq_optp_select} and \eqref{eq_optp_conv} + +QL is convex, but not exp-concave `r fontawesome::fa("exclamation", fill ="#ffa600")` + +`r fontawesome::fa("arrow-right", fill ="#000000")` Bernstein Online Aggregation (BOA) lets us weaken the exp-concavity condition while almost keeping optimal convergence + +::: + +:::: + +## CRPS-Learning Optimality + +For convex losses, BOAG satisfies that there exist a $C>0$ such that for $x>0$ it holds that +\begin{equation} + P\left( \frac{1}{t}\left(\widetilde{\mathcal{R}}_t - \widehat{\mathcal{R}}_{t,\pi} \right) \leq C \log(\log(t)) \left(\sqrt{\frac{\log(K)}{t}} + \frac{\log(K)+x}{t}\right) \right) \geq + 1-e^{x} + \label{eq_boa_opt_conv} +\end{equation} +`r fontawesome::fa("arrow-right", fill ="#000000")` Almost optimal w.r.t *convex aggregation* \eqref{eq_optp_conv} `r Citet(my_bib, "wintenberger2017optimal")` . + +The same algorithm satisfies that there exist a $C>0$ such that for $x>0$ it holds that +\begin{equation} + P\left( \frac{1}{t}\left(\widetilde{\mathcal{R}}_t - \widehat{\mathcal{R}}_{t,\min} \right) \leq + C\left(\frac{\log(K)+\log(\log(Gt))+ x}{\alpha t}\right)^{\frac{1}{2-\beta}} \right) \geq + 1-e^{x} + \label{eq_boa_opt_select} +\end{equation} + +if $Y_t$ is bounded, the considered loss $\ell$ is convex $G$-Lipschitz and weak exp-concave in its first coordinate. + +This is for losses that satisfy **A1** and **A2**. + +## CRPS-Learning Optimality + +:::: {.columns} + +::: {.column width="48%"} + +**A1** + +For some $G>0$ it holds +for all $x_1,x_2\in \mathbb{R}$ and $t>0$ that + +$$ | \ell(x_1, Y_t)-\ell(x_2, Y_t) | \leq G |x_1-x_2|$$ + +**A2** For some $\alpha>0$, $\beta\in[0,1]$ it holds +for all $x_1,x_2 \in \mathbb{R}$ and $t>0$ that + +\begin{align*} + \mathbb{E}[ + & \ell(x_1, Y_t)-\ell(x_2, Y_t) | \mathcal{F}_{t-1}] \leq \\ + & \mathbb{E}[ \ell'(x_1, Y_t)(x_1 - x_2) |\mathcal{F}_{t-1}] \\ + & + + \mathbb{E}\left[ \left. \left( \alpha(\ell'(x_1, Y_t)(x_1 - x_2))^{2}\right)^{1/\beta} \right|\mathcal{F}_{t-1}\right] +\end{align*} + +`r fontawesome::fa("arrow-right", fill ="#000000")` Almost optimal w.r.t *selection* \eqref{eq_optp_select} `r Citet(my_bib, "gaillard2018efficient")`. + +::: + +::: {.column width="2%"} + +::: + +::: {.column width="48%"} + +**Lemma 1** + +\begin{align} + 2\overline{\widehat{\mathcal{R}}}^{\text{QL}}_{t,\min} + & \leq \widehat{\mathcal{R}}^{\text{CRPS}}_{t,\min} + \label{eq_risk_ql_crps_expert} \\ + 2\overline{\widehat{\mathcal{R}}}^{\text{QL}}_{t,\pi} + & \leq \widehat{\mathcal{R}}^{\text{CRPS}}_{t,\pi} . + \label{eq_risk_ql_crps_convex} +\end{align} + +Pointwise can outperform constant procedures + +QL is convex but not exp-concave: + +`r fontawesome::fa("arrow-right")` Almost optimal convergence w.r.t. *convex aggregation* \eqref{eq_boa_opt_conv} `r fontawesome::fa("check", fill ="#00b02f")`
+ +For almost optimal congerence w.r.t. *selection* \eqref{eq_boa_opt_select} we need to check **A1** and **A2**: + +QL is Lipschitz continuous: + +`r fontawesome::fa("arrow-right")` **A1** holds `r fontawesome::fa("check", fill ="#ffa600")`
+ +::: + +:::: + + +## CRPS-Learning Optimality + +:::: {.columns} + +::: {.column width="48%"} + +Conditional quantile risk: $\mathcal{Q}_p(x) = \mathbb{E}[ \text{QL}_p(x, Y_t) | \mathcal{F}_{t-1}]$. + +`r fontawesome::fa("arrow-right")` convexity properties of $\mathcal{Q}_p$ depend on the +conditional distribution $Y_t|\mathcal{F}_{t-1}$. + +**Proposition 1** + +Let $Y$ be a univariate random variable with (Radon-Nikodym) $\nu$-density $f$, then for the second subderivative of the quantile risk +$\mathcal{Q}_p(x) = \mathbb{E}[ \text{QL}_p(x, Y) ]$ +of $Y$ it holds for all $p\in(0,1)$ that +$\mathcal{Q}_p'' = f.$ +Additionally, if $f$ is a continuous Lebesgue-density with $f\geq\gamma>0$ for some constant $\gamma>0$ on its support $\text{spt}(f)$ then +is $\mathcal{Q}_p$ is $\gamma$-strongly convex. + +Strong convexity with $\beta=1$ implies **A2** `r fontawesome::fa("check", fill ="#ffa600")` `r Citet(my_bib, "gaillard2018efficient")` + +::: + +::: {.column width="2%"} + +::: + +::: {.column width="48%"} + +`r fontawesome::fa("arrow-right")` **A1** and **A2** give us almost optimal convergence w.r.t. selection \eqref{eq_boa_opt_select} `r fontawesome::fa("check", fill ="#00b02f")`
+ +**Theorem 1** + +The gradient based fully adaptive Bernstein online aggregation (BOAG) applied pointwise for all $p\in(0,1)$ on $\text{QL}$ satisfies +\eqref{eq_boa_opt_conv} with minimal CRPS given by + +$$\widehat{\mathcal{R}}_{t,\pi} = 2\overline{\widehat{\mathcal{R}}}^{\text{QL}}_{t,\pi}.$$ + +If $Y_t|\mathcal{F}_{t-1}$ is bounded +and has a pdf $f_t$ satifying $f_t>\gamma >0$ on its +support $\text{spt}(f_t)$ then \ref{eq_boa_opt_select} holds with $\beta=1$ and + +$$\widehat{\mathcal{R}}_{t,\min} = 2\overline{\widehat{\mathcal{R}}}^{\text{QL}}_{t,\min}$$. + +::: + +:::: + + +## A Probabilistic Example + + + +:::: {.columns} + +::: {.column width="48%"} + +Simple Example: + + +\begin{align} + Y_t & \sim \mathcal{N}(0,\,1) \\ + \widehat{X}_{t,1} & \sim \widehat{F}_{1} = \mathcal{N}(-1,\,1) \\ + \widehat{X}_{t,2} & \sim \widehat{F}_{2} = \mathcal{N}(3,\,4) + \label{eq:dgp_sim1} +\end{align} + +- True weights vary over $p$ +- Figures show the ECDF and calculated weights using $T=25$ realizations +- Pointwise solution creates rough estimates +- Pointwise is better than constant +- Smooth solution is better than pointwise + +::: + +::: {.column width="2%"} + +::: + +::: {.column width="48%"} + +foo + +::: + +:::: + + + + + + +## Columns Template + +:::: {.columns} + +::: {.column width="48%"} + +Baz + +::: + +::: {.column width="2%"} + +::: + +::: {.column width="48%"} + +foo + +::: + +:::: + + +# References + +```{r refs1, echo=FALSE, results="asis"} +PrintBibliography(my_bib, .opts = list(style = "text")) +``` -- The approach works in general -- Conceptually simple -- Label assignment needs some more work -- Probabilistic statements may need adjustments for Ramp-Up Ramp-Down predictions -- Some kind of validation would be desirable -- Results will be used party on another research project in the EFEMOD project