Add css colors, improve crps slides
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
window.MathJax = {
|
||||
tex: {
|
||||
tags: 'ams'
|
||||
}
|
||||
},
|
||||
displayAlign: "left",
|
||||
displayIndent: "0em",
|
||||
};
|
||||
|
||||
|
||||
203
custom.scss
203
custom.scss
@@ -1,3 +1,206 @@
|
||||
:root {
|
||||
--col_lightgray: #e7e7e7;
|
||||
--col_blue: #000088;
|
||||
--col_smooth_expost: #a7008b;
|
||||
--col_constant: #dd9002;
|
||||
--col_optimum: #666666;
|
||||
--col_smooth: #187a00;
|
||||
--col_pointwise: #008790;
|
||||
--col_green: #61B94C;
|
||||
--col_orange: #ffa600;
|
||||
--col_yellow: #FCE135;
|
||||
--col_amber_1: #FFF8E0FF;
|
||||
--col_amber_2: #FFEBB2FF;
|
||||
--col_amber_3: #FFDF81FF;
|
||||
--col_amber_4: #FFD44EFF;
|
||||
--col_amber_5: #FFCA27FF;
|
||||
--col_amber_6: #FFC006FF;
|
||||
--col_amber_7: #FFB200FF;
|
||||
--col_amber_8: #FF9F00FF;
|
||||
--col_amber_9: #FF8E00FF;
|
||||
--col_amber_10: #FF6E00FF;
|
||||
--col_blue_1: #E3F2FDFF;
|
||||
--col_blue_2: #BADEFAFF;
|
||||
--col_blue_3: #90CAF8FF;
|
||||
--col_blue_4: #64B4F6FF;
|
||||
--col_blue_5: #41A5F4FF;
|
||||
--col_blue_6: #2096F2FF;
|
||||
--col_blue_7: #1E87E5FF;
|
||||
--col_blue_8: #1976D2FF;
|
||||
--col_blue_9: #1465BFFF;
|
||||
--col_blue_10: #0C46A0FF;
|
||||
--col_blue-grey_1: #EBEEF1FF;
|
||||
--col_blue-grey_2: #CED8DCFF;
|
||||
--col_blue-grey_3: #B0BEC5FF;
|
||||
--col_blue-grey_4: #90A4ADFF;
|
||||
--col_blue-grey_5: #78909BFF;
|
||||
--col_blue-grey_6: #5F7D8BFF;
|
||||
--col_blue-grey_7: #536D79FF;
|
||||
--col_blue-grey_8: #455964FF;
|
||||
--col_blue-grey_9: #37464EFF;
|
||||
--col_blue-grey_10: #263238FF;
|
||||
--col_brown_1: #EEEBE9FF;
|
||||
--col_brown_2: #D7CCC7FF;
|
||||
--col_brown_3: #BBAAA4FF;
|
||||
--col_brown_4: #A0877FFF;
|
||||
--col_brown_5: #8C6D63FF;
|
||||
--col_brown_6: #795447FF;
|
||||
--col_brown_7: #6C4C40FF;
|
||||
--col_brown_8: #5D3F37FF;
|
||||
--col_brown_9: #4D332DFF;
|
||||
--col_brown_10: #3E2622FF;
|
||||
--col_cyan_1: #DFF7F9FF;
|
||||
--col_cyan_2: #B2EBF2FF;
|
||||
--col_cyan_3: #7FDEEAFF;
|
||||
--col_cyan_4: #4CD0E0FF;
|
||||
--col_cyan_5: #26C5D9FF;
|
||||
--col_cyan_6: #00BBD3FF;
|
||||
--col_cyan_7: #00ACC0FF;
|
||||
--col_cyan_8: #0097A6FF;
|
||||
--col_cyan_9: #00838EFF;
|
||||
--col_cyan_10: #005F64FF;
|
||||
--col_deep-orange_1: #FAE9E6FF;
|
||||
--col_deep-orange_2: #FFCCBBFF;
|
||||
--col_deep-orange_3: #FFAB91FF;
|
||||
--col_deep-orange_4: #FF8A65FF;
|
||||
--col_deep-orange_5: #FF7043FF;
|
||||
--col_deep-orange_6: #FF5721FF;
|
||||
--col_deep-orange_7: #F3511EFF;
|
||||
--col_deep-orange_8: #E54A19FF;
|
||||
--col_deep-orange_9: #D84314FF;
|
||||
--col_deep-orange_10: #BF350CFF;
|
||||
--col_deep-purple_1: #ECE6F6FF;
|
||||
--col_deep-purple_2: #D1C4E9FF;
|
||||
--col_deep-purple_3: #B29DDAFF;
|
||||
--col_deep-purple_4: #9474CCFF;
|
||||
--col_deep-purple_5: #7E57C1FF;
|
||||
--col_deep-purple_6: #6639B7FF;
|
||||
--col_deep-purple_7: #5E34B1FF;
|
||||
--col_deep-purple_8: #512CA7FF;
|
||||
--col_deep-purple_9: #45269FFF;
|
||||
--col_deep-purple_10: #311A92FF;
|
||||
--col_green_1: #E7F4E9FF;
|
||||
--col_green_2: #C7E5C9FF;
|
||||
--col_green_3: #A5D6A6FF;
|
||||
--col_green_4: #80C684FF;
|
||||
--col_green_5: #66BA6AFF;
|
||||
--col_green_6: #4CAE50FF;
|
||||
--col_green_7: #439F46FF;
|
||||
--col_green_8: #388D3BFF;
|
||||
--col_green_9: #2D7D32FF;
|
||||
--col_green_10: #1A5E1FFF;
|
||||
--col_grey_1: #F9F9F9FF;
|
||||
--col_grey_2: #F4F4F4FF;
|
||||
--col_grey_3: #EDEDEDFF;
|
||||
--col_grey_4: #DFDFDFFF;
|
||||
--col_grey_5: #BDBDBDFF;
|
||||
--col_grey_6: #9E9E9EFF;
|
||||
--col_grey_7: #747474FF;
|
||||
--col_grey_8: #606060FF;
|
||||
--col_grey_9: #414141FF;
|
||||
--col_grey_10: #202020FF;
|
||||
--col_indigo_1: #E7EAF6FF;
|
||||
--col_indigo_2: #C5CAE9FF;
|
||||
--col_indigo_3: #9FA7D9FF;
|
||||
--col_indigo_4: #7985CBFF;
|
||||
--col_indigo_5: #5B6BBFFF;
|
||||
--col_indigo_6: #3F51B4FF;
|
||||
--col_indigo_7: #3948ABFF;
|
||||
--col_indigo_8: #303F9FFF;
|
||||
--col_indigo_9: #273492FF;
|
||||
--col_indigo_10: #19227EFF;
|
||||
--col_light-blue_1: #E0F4FEFF;
|
||||
--col_light-blue_2: #B2E5FCFF;
|
||||
--col_light-blue_3: #80D3F9FF;
|
||||
--col_light-blue_4: #4EC3F7FF;
|
||||
--col_light-blue_5: #28B6F6FF;
|
||||
--col_light-blue_6: #02A9F3FF;
|
||||
--col_light-blue_7: #029AE5FF;
|
||||
--col_light-blue_8: #0187D1FF;
|
||||
--col_light-blue_9: #0177BDFF;
|
||||
--col_light-blue_10: #00579AFF;
|
||||
--col_light-green_1: #F1F8E9FF;
|
||||
--col_light-green_2: #DCECC7FF;
|
||||
--col_light-green_3: #C5E0A5FF;
|
||||
--col_light-green_4: #ADD480FF;
|
||||
--col_light-green_5: #9BCC65FF;
|
||||
--col_light-green_6: #8BC34AFF;
|
||||
--col_light-green_7: #7BB241FF;
|
||||
--col_light-green_8: #679F38FF;
|
||||
--col_light-green_9: #548B2EFF;
|
||||
--col_light-green_10: #33681EFF;
|
||||
--col_lime_1: #F8FAE6FF;
|
||||
--col_lime_2: #F0F3C3FF;
|
||||
--col_lime_3: #E5ED9BFF;
|
||||
--col_lime_4: #DCE674FF;
|
||||
--col_lime_5: #D3E057FF;
|
||||
--col_lime_6: #CCDC39FF;
|
||||
--col_lime_7: #BFCA33FF;
|
||||
--col_lime_8: #AEB32BFF;
|
||||
--col_lime_9: #9E9D24FF;
|
||||
--col_lime_10: #817717FF;
|
||||
--col_orange_1: #FFF2DFFF;
|
||||
--col_orange_2: #FFDFB2FF;
|
||||
--col_orange_3: #FFCC7FFF;
|
||||
--col_orange_4: #FFB74CFF;
|
||||
--col_orange_5: #FFA626FF;
|
||||
--col_orange_6: #FF9800FF;
|
||||
--col_orange_7: #FA8C00FF;
|
||||
--col_orange_8: #F47B00FF;
|
||||
--col_orange_9: #EE6C00FF;
|
||||
--col_orange_10: #E55100FF;
|
||||
--col_pink_1: #FCE4EBFF;
|
||||
--col_pink_2: #F8BAD0FF;
|
||||
--col_pink_3: #F38EB1FF;
|
||||
--col_pink_4: #F06192FF;
|
||||
--col_pink_5: #EB3F79FF;
|
||||
--col_pink_6: #E91E63FF;
|
||||
--col_pink_7: #D81A5FFF;
|
||||
--col_pink_8: #C1185AFF;
|
||||
--col_pink_9: #AC1357FF;
|
||||
--col_pink_10: #870D4EFF;
|
||||
--col_purple_1: #F2E5F4FF;
|
||||
--col_purple_2: #E0BEE6FF;
|
||||
--col_purple_3: #CD92D8FF;
|
||||
--col_purple_4: #B967C7FF;
|
||||
--col_purple_5: #AB46BBFF;
|
||||
--col_purple_6: #9B26B0FF;
|
||||
--col_purple_7: #8D24AAFF;
|
||||
--col_purple_8: #7A1FA1FF;
|
||||
--col_purple_9: #6A1A99FF;
|
||||
--col_purple_10: #4A138CFF;
|
||||
--col_red_1: #FFEBEDFF;
|
||||
--col_red_2: #FFCCD2FF;
|
||||
--col_red_3: #EE9999FF;
|
||||
--col_red_4: #E57272FF;
|
||||
--col_red_5: #EE5250FF;
|
||||
--col_red_6: #F34335FF;
|
||||
--col_red_7: #E53934FF;
|
||||
--col_red_8: #D22E2EFF;
|
||||
--col_red_9: #C52727FF;
|
||||
--col_red_10: #B71B1BFF;
|
||||
--col_teal_1: #DFF2F1FF;
|
||||
--col_teal_2: #B2DFDAFF;
|
||||
--col_teal_3: #7FCBC4FF;
|
||||
--col_teal_4: #4CB6ACFF;
|
||||
--col_teal_5: #26A599FF;
|
||||
--col_teal_6: #009687FF;
|
||||
--col_teal_7: #00887AFF;
|
||||
--col_teal_8: #00796BFF;
|
||||
--col_teal_9: #00685BFF;
|
||||
--col_teal_10: #004C3FFF;
|
||||
--col_yellow_1: #FFFDE6FF;
|
||||
--col_yellow_2: #FFF8C4FF;
|
||||
--col_yellow_3: #FFF49DFF;
|
||||
--col_yellow_4: #FFF176FF;
|
||||
--col_yellow_5: #FFED58FF;
|
||||
--col_yellow_6: #FFEB3AFF;
|
||||
--col_yellow_7: #FDD834FF;
|
||||
--col_yellow_8: #FABF2CFF;
|
||||
--col_yellow_9: #F8A725FF;
|
||||
--col_yellow_10: #F47F17FF;
|
||||
}
|
||||
|
||||
/*-- scss:defaults --*/
|
||||
|
||||
// $body-bg: #ffffff;
|
||||
|
||||
296
index.qmd
296
index.qmd
@@ -86,10 +86,13 @@ my_bib <- ReadBib("assets/library.bib", check = FALSE)
|
||||
col_lightgray <- "#e7e7e7"
|
||||
col_blue <- "#000088"
|
||||
col_smooth_expost <- "#a7008b"
|
||||
col_smooth <- "#187a00"
|
||||
col_pointwise <- "#008790"
|
||||
col_constant <- "#dd9002"
|
||||
col_optimum <- "#666666"
|
||||
col_smooth <- "#187a00"
|
||||
col_pointwise <- "#008790"
|
||||
col_green <- "#61B94C"
|
||||
col_orange <- "#ffa600"
|
||||
col_yellow <- "#FCE135"
|
||||
```
|
||||
|
||||
# CRPS Learning
|
||||
@@ -308,9 +311,9 @@ Weights are updated sequentially according to the past performance of the $K$ ex
|
||||
|
||||
That is, a loss function $\ell$ is needed. This is used to compute the **cumulative regret** $R_{t,k}$
|
||||
|
||||
$$
|
||||
R_{t,k} = \widetilde{L}_{t} - \widehat{L}_{t,k} = \sum_{i = 1}^t \ell(\widetilde{X}_{i},Y_i) - \ell(\widehat{X}_{i,k},Y_i)
|
||||
$${#eq-regret}
|
||||
\begin{equation}
|
||||
R_{t,k} = \widetilde{L}_{t} - \widehat{L}_{t,k} = \sum_{i = 1}^t \ell(\widetilde{X}_{i},Y_i) - \ell(\widehat{X}_{i,k},Y_i)\label{eq:regret}
|
||||
\end{equation}
|
||||
|
||||
The cumulative regret:
|
||||
|
||||
@@ -325,13 +328,15 @@ Popular loss functions for point forecasting @gneiting2011making:
|
||||
|
||||
$\ell_2$ loss:
|
||||
|
||||
$$\ell_2(x, y) = | x -y|^2$${#eq-elltwo}
|
||||
\begin{equation}
|
||||
\ell_2(x, y) = | x -y|^2 \label{eq:elltwo}
|
||||
\end{equation}
|
||||
|
||||
Strictly proper for *mean* prediction
|
||||
|
||||
:::
|
||||
|
||||
::: {.column width="2%"}
|
||||
::: {.column width="4%"}
|
||||
|
||||
:::
|
||||
|
||||
@@ -339,7 +344,9 @@ Strictly proper for *mean* prediction
|
||||
|
||||
$\ell_1$ loss:
|
||||
|
||||
$$\ell_1(x, y) = | x -y|$${#eq-ellone}
|
||||
\begin{equation}
|
||||
\ell_1(x, y) = | x -y| \label{eq:ellone}
|
||||
\end{equation}
|
||||
|
||||
Strictly proper for *median* predictions
|
||||
|
||||
@@ -400,17 +407,9 @@ In stochastic settings, the cumulative Risk should be analyzed `r Citet(my_bib,
|
||||
|
||||
::::
|
||||
|
||||
## Optimality
|
||||
## Optimal Convergence
|
||||
|
||||
In stochastic settings, the cumulative Risk should be analyezed @wintenberger2017optimal:
|
||||
|
||||
\begin{align}
|
||||
\underbrace{\widetilde{\mathcal{R}}_t = \sum_{i=1}^t \mathbb{E}[\ell(\widetilde{X}_{i},Y_i)|\mathcal{F}_{i-1}]}_{\text{Cumulative Risk of Forecaster}} \qquad\qquad\qquad \text{ and } \qquad\qquad\qquad
|
||||
\underbrace{\widehat{\mathcal{R}}_{t,k} = \sum_{i=1}^t \mathbb{E}[\ell(\widehat{X}_{i,k},Y_i)|\mathcal{F}_{i-1}]}_{\text{Cumulative Risk of Experts}}
|
||||
\label{eq_def_cumrisk}
|
||||
\end{align}
|
||||
|
||||
There are two problems that an algorithm should solve in iid settings:
|
||||
<br/>
|
||||
|
||||
:::: {.columns}
|
||||
|
||||
@@ -423,14 +422,6 @@ There are two problems that an algorithm should solve in iid settings:
|
||||
\end{equation}
|
||||
The forecaster is asymptotically not worse than the best expert $\widehat{\mathcal{R}}_{t,\min}$.
|
||||
|
||||
:::
|
||||
|
||||
::: {.column width="2%"}
|
||||
|
||||
:::
|
||||
|
||||
::: {.column width="48%"}
|
||||
|
||||
### The convex aggregation problem
|
||||
|
||||
\begin{equation}
|
||||
@@ -441,13 +432,14 @@ The forecaster is asymptotically not worse than the best convex combination $\wi
|
||||
|
||||
:::
|
||||
|
||||
::::
|
||||
::: {.column width="2%"}
|
||||
|
||||
## Optimality
|
||||
:::
|
||||
|
||||
Satisfying the convexity property \eqref{eq_opt_conv} comes at the cost of slower possible convergence.
|
||||
::: {.column width="48%"}
|
||||
|
||||
Optimal rates with respect to selection \eqref{eq_opt_select} and convex aggregation \eqref{eq_opt_conv} `r Citet(my_bib, "wintenberger2017optimal")`:
|
||||
|
||||
According to @wintenberger2017optimal, an algorithm has optimal rates with respect to selection \eqref{eq_opt_select} and convex aggregation \eqref{eq_opt_conv} if
|
||||
|
||||
\begin{align}
|
||||
\frac{1}{t}\left(\widetilde{\mathcal{R}}_t - \widehat{\mathcal{R}}_{t,\min} \right) & =
|
||||
@@ -466,104 +458,102 @@ Algorithms can statisfy both \eqref{eq_optp_select} and \eqref{eq_optp_conv} dep
|
||||
- Regularity conditions on $Y_t$ and $\widehat{X}_{t,k}$
|
||||
- The weighting scheme
|
||||
|
||||
## Optimality
|
||||
|
||||
According to @cesa2006prediction EWA \eqref{eq_ewa_general} satisfies the optimal selection convergence \eqref{eq_optp_select} in a deterministic setting if the:
|
||||
- Loss $\ell$ is exp-concave
|
||||
- Learning-rate $\eta$ is chosen correctly
|
||||
|
||||
Those results can be converted to stochastic iid settings @kakade2008generalization, @gaillard2014second.
|
||||
|
||||
The optimal convex aggregation convergence \eqref{eq_optp_conv} can be satisfied by applying the kernel-trick. Thereby, the loss is linearized:
|
||||
\begin{align}
|
||||
\ell^{\nabla}(x,y) = \ell'(\widetilde{X},y) x
|
||||
\end{align}
|
||||
$\ell'$ is the subgradient of $\ell$ in its first coordinate evaluated at forecast combination $\widetilde{X}$.
|
||||
|
||||
Combining probabilistic forecasts calls for a probabilistic loss function
|
||||
|
||||
:::: {.notes}
|
||||
|
||||
We apply Bernstein Online Aggregation (BOA). It lets us weaken the exp-concavity condition while almost keeping the optimalities \ref{eq_optp_select} and \ref{eq_optp_conv}.
|
||||
:::
|
||||
|
||||
::::
|
||||
|
||||
## The Continuous Ranked Probability Score
|
||||
##
|
||||
|
||||
:::: {.columns}
|
||||
|
||||
::: {.column width="48%"}
|
||||
|
||||
**An appropriate choice:**
|
||||
### Optimal Convergence
|
||||
|
||||
\begin{align*}
|
||||
\text{CRPS}(F, y) & = \int_{\mathbb{R}} {(F(x) - \mathbb{1}\{ x > y \})}^2 dx
|
||||
\label{eq_crps}
|
||||
\end{align*}
|
||||
<br/>
|
||||
|
||||
It's strictly proper @gneiting2007strictly.
|
||||
EWA satisfies optimal selection convergence \eqref{eq_optp_select} in a deterministic setting if:
|
||||
|
||||
Using the CRPS, we can calculate time-adaptive weight $w_{t,k}$. However, what if the experts' performance is not uniform over all parts of the distribution?
|
||||
- Loss $\ell$ is exp-concave
|
||||
- Learning-rate $\eta$ is chosen correctly
|
||||
|
||||
The idea: utilize this relation:
|
||||
Those results can be converted to stochastic iid settings @kakade2008generalization, @gaillard2014second.
|
||||
|
||||
\begin{align*}
|
||||
\text{CRPS}(F, y) = 2 \int_0^{1} \text{QL}_p(F^{-1}(p), y) \, d p.
|
||||
\label{eq_crps_qs}
|
||||
\end{align*}
|
||||
Optimal convex aggregation convergence \eqref{eq_optp_conv} can be satisfied by applying the kernel-trick:
|
||||
|
||||
\begin{align}
|
||||
\ell^{\nabla}(x,y) = \ell'(\widetilde{X},y) x
|
||||
\end{align}
|
||||
|
||||
$\ell'$ is the subgradient of $\ell$ at forecast combination $\widetilde{X}$.
|
||||
|
||||
:::
|
||||
|
||||
::: {.column width="2%"}
|
||||
::: {.column width="4%"}
|
||||
|
||||
:::
|
||||
|
||||
::: {.column width="48%"}
|
||||
|
||||
to combine quantiles of the probabilistic forecasts individually using the quantile-loss (QL):
|
||||
\begin{align*}
|
||||
\text{QL}_p(q, y) & = (\mathbb{1}\{y < q\} -p)(q - y)
|
||||
\end{align*}
|
||||
### Probabilistic Setting
|
||||
|
||||
</br>
|
||||
<br/>
|
||||
|
||||
**But is it optimal?**
|
||||
**An appropriate choice:**
|
||||
|
||||
CRPS is exp-concave `r fontawesome::fa("check", fill ="#00b02f")`
|
||||
\begin{equation*}
|
||||
\text{CRPS}(F, y) = \int_{\mathbb{R}} {(F(x) - \mathbb{1}\{ x > y \})}^2 dx \label{eq:crps}
|
||||
\end{equation*}
|
||||
|
||||
`r fontawesome::fa("arrow-right", fill ="#000000")` EWA \eqref{eq_ewa_general} with CRPS satisfies \eqref{eq_optp_select} and \eqref{eq_optp_conv}
|
||||
It's strictly proper @gneiting2007strictly.
|
||||
|
||||
QL is convex, but not exp-concave `r fontawesome::fa("exclamation", fill ="#ffa600")`
|
||||
Using the CRPS, we can calculate time-adaptive weights $w_{t,k}$. However, what if the experts' performance varies in parts of the distribution?
|
||||
|
||||
`r fontawesome::fa("arrow-right", fill ="#000000")` Bernstein Online Aggregation (BOA) lets us weaken the exp-concavity condition while almost keeping optimal convergence
|
||||
`r fontawesome::fa("lightbulb", fill = col_yellow)` Utilize this relation:
|
||||
|
||||
\begin{equation*}
|
||||
\text{CRPS}(F, y) = 2 \int_0^{1} \text{QL}_p(F^{-1}(p), y) dp.\label{eq_crps_qs}
|
||||
\end{equation*}
|
||||
|
||||
... to combine quantiles of the probabilistic forecasts individually using the quantile-loss QL.
|
||||
|
||||
:::
|
||||
|
||||
::::
|
||||
|
||||
## CRPS-Learning Optimality
|
||||
## CRPS Learning Optimality
|
||||
|
||||
::: {.panel-tabset}
|
||||
|
||||
## Almost Optimal Convergence
|
||||
|
||||
|
||||
`r fontawesome::fa("exclamation", fill = col_orange)` QL is convex, but not exp-concave `r fontawesome::fa("arrow-right", fill ="#000000")` Bernstein Online Aggregation (BOA) lets us weaken the exp-concavity condition. It satisfies that there exist a $C>0$ such that for $x>0$ it holds that
|
||||
|
||||
For convex losses, BOAG satisfies that there exist a $C>0$ such that for $x>0$ it holds that
|
||||
\begin{equation}
|
||||
P\left( \frac{1}{t}\left(\widetilde{\mathcal{R}}_t - \widehat{\mathcal{R}}_{t,\pi} \right) \leq C \log(\log(t)) \left(\sqrt{\frac{\log(K)}{t}} + \frac{\log(K)+x}{t}\right) \right) \geq
|
||||
1-e^{x}
|
||||
1-e^{-x}
|
||||
\label{eq_boa_opt_conv}
|
||||
\end{equation}
|
||||
|
||||
`r fontawesome::fa("arrow-right", fill ="#000000")` Almost optimal w.r.t *convex aggregation* \eqref{eq_optp_conv} @wintenberger2017optimal.
|
||||
|
||||
The same algorithm satisfies that there exist a $C>0$ such that for $x>0$ it holds that
|
||||
\begin{equation}
|
||||
P\left( \frac{1}{t}\left(\widetilde{\mathcal{R}}_t - \widehat{\mathcal{R}}_{t,\min} \right) \leq
|
||||
C\left(\frac{\log(K)+\log(\log(Gt))+ x}{\alpha t}\right)^{\frac{1}{2-\beta}} \right) \geq
|
||||
1-e^{x}
|
||||
1-2e^{-x}
|
||||
\label{eq_boa_opt_select}
|
||||
\end{equation}
|
||||
|
||||
if $Y_t$ is bounded, the considered loss $\ell$ is convex $G$-Lipschitz and weak exp-concave in its first coordinate.
|
||||
if $Y_t$ is bounded, the considered loss $\ell$ is convex $G$-Lipschitz and weak exp-concave in its first coordinate.
|
||||
|
||||
This is for losses that satisfy **A1** and **A2**.
|
||||
`r fontawesome::fa("arrow-right", fill ="#000000")` Almost optimal w.r.t *selection* \eqref{eq_optp_select} @gaillard2018efficient.
|
||||
|
||||
`r fontawesome::fa("arrow-right", fill ="#000000")` We show that this holds for QL under feasible conditions.
|
||||
|
||||
## Conditions + Lemma
|
||||
|
||||
## CRPS-Learning Optimality
|
||||
|
||||
:::: {.columns}
|
||||
|
||||
@@ -624,8 +614,7 @@ QL is Lipschitz continuous:
|
||||
|
||||
::::
|
||||
|
||||
|
||||
## CRPS-Learning Optimality
|
||||
## Proposition + Theorem
|
||||
|
||||
:::: {.columns}
|
||||
|
||||
@@ -674,6 +663,13 @@ $$\widehat{\mathcal{R}}_{t,\min} = 2\overline{\widehat{\mathcal{R}}}^{\text{QL}}
|
||||
|
||||
::::
|
||||
|
||||
::::
|
||||
|
||||
:::: {.notes}
|
||||
|
||||
We apply Bernstein Online Aggregation (BOA). It lets us weaken the exp-concavity condition while almost keeping the optimalities \ref{eq_optp_select} and \ref{eq_optp_conv}.
|
||||
|
||||
::::
|
||||
|
||||
## A Probabilistic Example
|
||||
|
||||
@@ -797,13 +793,17 @@ ggplot() +
|
||||
|
||||
:::
|
||||
|
||||
## The Smoothing Procedure
|
||||
## The Smoothing Procedures
|
||||
|
||||
::: {.panel-tabset}
|
||||
|
||||
## Penalized Smoothing
|
||||
|
||||
:::: {.columns}
|
||||
|
||||
::: {.column width="48%"}
|
||||
|
||||
We are using penalized cubic b-splines:
|
||||
Penalized cubic B-Splines for smoothing weights:
|
||||
|
||||
Let $\varphi=(\varphi_1,\ldots, \varphi_L)$ be bounded basis functions on $(0,1)$ Then we approximate $w_{t,k}$ by
|
||||
|
||||
@@ -811,7 +811,7 @@ Let $\varphi=(\varphi_1,\ldots, \varphi_L)$ be bounded basis functions on $(0,1)
|
||||
w_{t,k}^{\text{smooth}} = \sum_{l=1}^L \beta_l \varphi_l = \beta'\varphi
|
||||
\end{align}
|
||||
|
||||
with parameter vector $\beta$. The latter is estimated penalized $L_2$-smoothing which minimizes
|
||||
with parameter vector $\beta$. The latter is estimated to penalize $L_2$-smoothing which minimizes
|
||||
|
||||
\begin{equation}
|
||||
\| w_{t,k} - \beta' \varphi \|^2_2 + \lambda \| \mathcal{D}^{d} (\beta' \varphi) \|^2_2
|
||||
@@ -820,7 +820,7 @@ with parameter vector $\beta$. The latter is estimated penalized $L_2$-smoothing
|
||||
|
||||
with differential operator $\mathcal{D}$
|
||||
|
||||
Smoothing can be applied ex-post or inside of the algorithm ( `r fontawesome::fa("arrow-right", fill ="#000000")` [Simulation](#simulation)).
|
||||
Computation is easy, since we have an analytical solution
|
||||
|
||||
:::
|
||||
|
||||
@@ -840,14 +840,119 @@ We receive the constant solution for high values of $\lambda$ when setting $d=1$
|
||||
|
||||
::::
|
||||
|
||||
## Basis Smoothing
|
||||
|
||||
:::: {.columns}
|
||||
|
||||
::: {.column width="48%"}
|
||||
|
||||
Represent weights as linear combinations of bounded basis functions:
|
||||
|
||||
\begin{equation}
|
||||
w_{t,k} = \sum_{l=1}^L \beta_{t,k,l} \varphi_l = \boldsymbol \beta_{t,k}' \boldsymbol \varphi
|
||||
\end{equation}
|
||||
|
||||
A popular choice are are B-Splines as local basis functions
|
||||
|
||||
$\boldsymbol \beta_{t,k}$ is calculated using a reduced regret matrix:
|
||||
|
||||
\begin{equation}
|
||||
\underbrace{\boldsymbol r_{t}}_{\text{LxK}} = \frac{L}{P} \underbrace{\boldsymbol B'}_{\text{LxP}} \underbrace{\left({\boldsymbol{QL}}_{\mathcal{P}}^{\nabla}(\widetilde{\boldsymbol X}_{t},Y_t)- {\boldsymbol{QL}}_{\mathcal{P}}^{\nabla}(\widehat{\boldsymbol X}_{t},Y_t)\right)}_{\text{PxK}}
|
||||
\end{equation}
|
||||
|
||||
`r fontawesome::fa("arrow-right", fill ="#000000")` $\boldsymbol r_{t}$ is transformed from PxK to LxK
|
||||
|
||||
If $L = P$ it holds that $\boldsymbol \varphi = \boldsymbol{I}$
|
||||
For $L = 1$ we receive constant weights
|
||||
|
||||
:::
|
||||
|
||||
::: {.column width="2%"}
|
||||
|
||||
:::
|
||||
|
||||
::: {.column width="48%"}
|
||||
|
||||
Weights converge to the constant solution if $L\rightarrow 1$
|
||||
|
||||
<center>
|
||||
<img src="/assets/crps_learning/weights_kstep.gif">
|
||||
</center>
|
||||
|
||||
:::
|
||||
|
||||
::::
|
||||
|
||||
::::
|
||||
|
||||
---
|
||||
|
||||
## The Proposed CRPS-Learning Algorithm
|
||||
|
||||
```{r, fig.align="left", echo=FALSE, out.width = "1000px", cache = TRUE}
|
||||
knitr::include_graphics("assets/crps_learning/algorithm_1.svg")
|
||||
```
|
||||
<br/>
|
||||
|
||||
::: {style="font-size: 85%;"}
|
||||
|
||||
:::: {.columns}
|
||||
|
||||
::: {.column width="43%"}
|
||||
|
||||
### Initialization:
|
||||
|
||||
Array of expert predicitons: $\widehat{X}_{t,p,k}$
|
||||
|
||||
Vector of Prediction targets: $Y_t$
|
||||
|
||||
Starting Weights: $\boldsymbol w_0=(w_{0,1},\ldots, w_{0,K})$
|
||||
|
||||
Penalization parameter: $\lambda\geq 0$
|
||||
|
||||
B-spline and penalty matrices $\boldsymbol B$ and $\boldsymbol D$ on $\mathcal{P}= (p_1,\ldots,p_M)$
|
||||
|
||||
Hat matrix: $$\boldsymbol{\mathcal{H}} = \boldsymbol B(\boldsymbol B'\boldsymbol B+ \lambda (\alpha \boldsymbol D_1'\boldsymbol D_1 + (1-\alpha) \boldsymbol D_2'\boldsymbol D_2))^{-1} \boldsymbol B'$$
|
||||
|
||||
Cumulative Regret: $R_{0,k} = 0$
|
||||
|
||||
Range parameter: $E_{0,k}=0$
|
||||
|
||||
Starting pseudo-weights: $\boldsymbol \beta_0 = \boldsymbol B^{\text{pinv}}\boldsymbol w_0(\boldsymbol{\mathcal{P}})$
|
||||
|
||||
|
||||
:::
|
||||
|
||||
::: {.column width="2%"}
|
||||
|
||||
:::
|
||||
|
||||
::: {.column width="55%"}
|
||||
|
||||
### Core:
|
||||
|
||||
for( t in 1:T ) {
|
||||
|
||||
$\widetilde{\boldsymbol X}_{t} = \text{Sort}\left( \boldsymbol w_{t-1}'(\boldsymbol P) \widehat{\boldsymbol X}_{t} \right)$ <b style="color: var(--col_grey_7);"># Prediction</b>
|
||||
|
||||
$\boldsymbol r_{t} = \frac{L}{M} \boldsymbol B' \left({\boldsymbol{QL}}_{\boldsymbol{\mathcal P}}^{\nabla}(\widetilde{\boldsymbol X}_{t},Y_t)- {\boldsymbol{QL}}_{\boldsymbol{\mathcal P}}^{\nabla}(\widehat{\boldsymbol X}_{t},Y_t)\right)$
|
||||
|
||||
$\boldsymbol E_{t} = \max(\boldsymbol E_{t-1}, \boldsymbol r_{t}^+ + \boldsymbol r_{t}^-)$
|
||||
|
||||
$\boldsymbol V_{t} = \boldsymbol V_{t-1} + \boldsymbol r_{t}^{ \odot 2}$
|
||||
|
||||
$\boldsymbol \eta_{t} =\min\left( \left(-\log(\boldsymbol \beta_{0}) \odot \boldsymbol V_{t}^{\odot -1} \right)^{\odot\frac{1}{2}} , \frac{1}{2}\boldsymbol E_{t}^{\odot-1}\right)$
|
||||
|
||||
$\boldsymbol R_{t} = \boldsymbol R_{t-1}+ \boldsymbol r_{t} \odot \left( \boldsymbol 1 - \boldsymbol \eta_{t} \odot \boldsymbol r_{t} \right)/2 + \boldsymbol E_{t} \odot \mathbb{1}\{-2\boldsymbol \eta_{t}\odot \boldsymbol r_{t} > 1\}$
|
||||
|
||||
$\boldsymbol \beta_{t} = K \boldsymbol \beta_{0} \odot \boldsymbol {SoftMax}\left( - \boldsymbol \eta_{t} \odot \boldsymbol R_{t} + \log( \boldsymbol \eta_{t}) \right)$
|
||||
|
||||
$\boldsymbol w_{t}(\boldsymbol P) = \underbrace{\boldsymbol B(\boldsymbol B'\boldsymbol B+ \lambda (\alpha \boldsymbol D_1'\boldsymbol D_1 + (1-\alpha) \boldsymbol D_2'\boldsymbol D_2))^{-1} \boldsymbol B'}_{\boldsymbol{\mathcal{H}}} \boldsymbol B \boldsymbol \beta_{t}$
|
||||
|
||||
}
|
||||
|
||||
:::
|
||||
|
||||
::::
|
||||
|
||||
:::
|
||||
|
||||
## Simulation Study
|
||||
|
||||
@@ -1437,19 +1542,6 @@ BOA > 16 -->
|
||||
|
||||
## Outline
|
||||
|
||||
```{r, include=FALSE}
|
||||
col_lightgray <- "#e7e7e7"
|
||||
col_blue <- "#000088"
|
||||
col_smooth_expost <- "#a7008b"
|
||||
col_smooth <- "#187a00"
|
||||
col_pointwise <- "#008790"
|
||||
col_constant <- "#dd9002"
|
||||
col_optimum <- "#666666"
|
||||
col_green <- "#61B94C"
|
||||
col_orange <- "#ffa600"
|
||||
col_yellow <- "#FCE135"
|
||||
```
|
||||
|
||||
</br>
|
||||
|
||||
**Multivariate CRPS Learning**
|
||||
|
||||
Reference in New Issue
Block a user