Init presentation boilerplate

This commit is contained in:
2025-05-11 16:10:17 +02:00
parent 6396879ed1
commit 9938b10aa8
18 changed files with 9132 additions and 0 deletions

4
.gitignore vendored
View File

@@ -82,3 +82,7 @@ data/*
# Whitelist selected files # Whitelist selected files
!data/example_witelisted.csv !data/example_witelisted.csv
# Ignore html files for now
# TODO: Remove later
*.html

View File

@@ -0,0 +1,7 @@
title: Font Awesome support
author: Carlos Scheidegger
version: 1.2.0
quarto-required: ">=1.2.269"
contributes:
shortcodes:
- fontawesome.lua

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,30 @@
.fa-tiny {
font-size: 0.5em;
}
.fa-scriptsize {
font-size: 0.7em;
}
.fa-footnotesize {
font-size: 0.8em;
}
.fa-small {
font-size: 0.9em;
}
.fa-normalsize {
font-size: 1em;
}
.fa-large {
font-size: 1.2em;
}
.fa-Large {
font-size: 1.5em;
}
.fa-LARGE {
font-size: 1.75em;
}
.fa-huge {
font-size: 2em;
}
.fa-Huge {
font-size: 2.5em;
}

View File

@@ -0,0 +1,84 @@
local function ensureLatexDeps()
quarto.doc.use_latex_package("fontawesome5")
end
local function ensureHtmlDeps()
quarto.doc.add_html_dependency({
name = 'fontawesome6',
version = '1.2.0',
stylesheets = {'assets/css/all.min.css', 'assets/css/latex-fontsize.css'}
})
end
local function isEmpty(s)
return s == nil or s == ''
end
local function isValidSize(size)
local validSizes = {
"tiny",
"scriptsize",
"footnotesize",
"small",
"normalsize",
"large",
"Large",
"LARGE",
"huge",
"Huge"
}
for _, v in ipairs(validSizes) do
if v == size then
return size
end
end
return ""
end
return {
["fa"] = function(args, kwargs)
local group = "solid"
local icon = pandoc.utils.stringify(args[1])
if #args > 1 then
group = icon
icon = pandoc.utils.stringify(args[2])
end
local title = pandoc.utils.stringify(kwargs["title"])
if not isEmpty(title) then
title = " title=\"" .. title .. "\""
end
local label = pandoc.utils.stringify(kwargs["label"])
if isEmpty(label) then
label = " aria-label=\"" .. icon .. "\""
else
label = " aria-label=\"" .. label .. "\""
end
local size = pandoc.utils.stringify(kwargs["size"])
-- detect html (excluding epub which won't handle fa)
if quarto.doc.is_format("html:js") then
ensureHtmlDeps()
if not isEmpty(size) then
size = " fa-" .. size
end
return pandoc.RawInline(
'html',
"<i class=\"fa-" .. group .. " fa-" .. icon .. size .. "\"" .. title .. label .. "></i>"
)
-- detect pdf / beamer / latex / etc
elseif quarto.doc.is_format("pdf") then
ensureLatexDeps()
if isEmpty(isValidSize(size)) then
return pandoc.RawInline('tex', "\\faIcon{" .. icon .. "}")
else
return pandoc.RawInline('tex', "{\\" .. size .. "\\faIcon{" .. icon .. "}}")
end
else
return pandoc.Null()
end
end
}

View File

@@ -0,0 +1,261 @@
/*-- scss:defaults --*/
// Custom colours and variables
$jet: #131516;
$accent: #107895;
$accent2: #9a2515;
// $accent2: #e64173;
$right-arrow: "\2192"; // Unicode character for right arrow
// fonts
/*
Note: This theme uses the Roboto font family, which it imports from Google
Fonts to ensure consistent weighting in addition to availability. While
you can use a local installation of Roboto, this is generally not
recommended since the weighting will likely be wrong (probably too
light). OTOH, importing from Google Fonts can cause some issues in
certain secure environments due the external CDN (see:
https://github.com/grantmcdermott/quarto-revealjs-clean/issues/7). If
that's the case for you, simply comment out the `@import url(...)` line
below and it will default for the default Sans Serif font on your system
(e.g., Helvetica on a Mac). Circling back to the earlier point about
preserving consistent font weights, you may also wish to remove "Roboto"
from the choice set if the family is installed locally.
*/
@import url(https://fonts.googleapis.com/css?family=Roboto:200,200i,300,300i,350,350i,400,400i);
$font-family-sans-serif: "Roboto", sans-serif !default;
$presentation-heading-font: "Roboto", sans-serif !default;
$presentation-heading-color: $jet !default;
$presentation-heading-font-weight: lighter;
//$presentation-heading-line-height: 2;
//$presentation-block-margin: 28px;
$presentation-font-size-root: 32px;
// colors
//$body-bg: #f0f1eb !default;
$body-color: $jet !default;
$link-color: $accent !default;
$selection-bg: #26351c !default;
/*-- scss:rules --*/
.reveal a {
line-height: 1.5em;
}
.reveal p {
// font-weight: 300;
font-weight: lighter;
margin-top: 1.25em;
}
// title and headings
#title-slide {
text-align: left;
.title {
color: $body-color;
font-size: 1.4em;
// font-weight: 350;
font-weight: lighter;
}
.subtitle {
color: $accent;
font-style: italic;
margin-top: 0em;
font-weight: lighter;
}
.institute,
.quarto-title-affiliation,
.quarto-title-author-email {
font-style: italic;
// font-size: 80%;
// color: #7F7F7F;
}
.author,
.quarto-title-author-name {
color: $body-color;
}
.quarto-title-authors {
display: flex;
justify-content: left;
.quarto-title-author {
padding-left: 0em;
padding-right: 0em;
width: 100%;
}
}
}
.reveal h2 {
// font-weight: 350;
font-weight: lighter;
font-size: 1.4em;
}
.reveal h3 {
color: $accent;
font-style: italic;
// font-weight: 350;
font-weight: lighter;
font-size: 0.95em;
}
.reveal h4 {
color: $accent2;
// font-weight: 350;
font-weight: normal;
margin-top: 1.25em;
}
// alerts etc.
.alert {
color: $accent2;
}
.fg {
color: var(--col, $jet);
}
.bg {
background-color: var(--col, #fff);
padding: 0.1em;
border-radius: 5px;
display: inline-block;
}
// lists
// Unordered lists
.reveal ul {
// font-weight: 300;
font-weight: lighter;
padding-left: 16px;
li::marker {
color: mix($accent, white, 70%);
}
}
.reveal ul ul {
list-style: none;
li:before {
content: $right-arrow;
color: mix($accent, white, 60%);
display: inline-block;
width: 1em;
margin-left: -1em;
margin-right: 0.5em;
}
}
// Ordered lists
.reveal ol {
// font-weight: 300;
font-weight: lighter;
padding-left: 16px;
li::marker {
color: $accent;
}
}
// Move "hamburger" menu button to top right
.reveal .slide-menu-button {
position: fixed;
top: 6px;
right: 0;
display: flex;
justify-content: flex-end;
align-items: flex-start;
pointer-events: none;
}
.reveal .slide-menu-button > * {
pointer-events: auto;
}
// Same for chalkboard buttons (with an offset)
.reveal .slide-chalkboard-buttons {
position: fixed;
top: 12px;
right: 24px;
display: flex;
justify-content: flex-end;
align-items: flex-start;
pointer-events: none;
}
.reveal .slide-chalkboard-buttons > * {
pointer-events: auto;
}
// Beamer-style button link environment
.button {
display: inline-block;
padding: 6px 12px;
margin-bottom: 0;
font-size: 14px;
font-weight: 400;
line-height: 1.42857143;
text-align: center;
white-space: nowrap;
vertical-align: middle;
cursor: pointer;
background-color: $accent;
border: 1px solid $accent;
color: #fff !important;
text-decoration: none;
border-radius: 4px;
transition: all 0.2s ease-in-out;
}
.button:hover {
background-color: #0056b3;
border-color: #0056b3;
}
.button::before {
content: "";
margin-right: 5px;
}
// Special catch for etable environment to ensure these table images
// don't overflow the slide.
// See: https://lrberge.github.io/fixest/articles/etable_new_features.html
.etable {
width: 100%;
height: calc(100% - 3em); /* Adjust 3em based on the height of your header, if necessary */
display: flex;
align-items: center;
justify-content: center;
}
.etable img {
max-width: 100%;
max-height: 100%;
width: auto;
height: auto;
object-fit: contain;
}

View File

@@ -0,0 +1,11 @@
/*-- scss:defaults --*/
$body-bg: #ffffff;
$body-color: #7a6f69;
$link-color: #005088;
$selection-color: #00b0dc;
$presentation-heading-color: $selection-color;
$tabset-border-color: #bbb3b0;
/*-- scss:rules --*/

696
25_07_phd_defense/index.qmd Normal file
View File

@@ -0,0 +1,696 @@
---
title: "Data Science Methods for Forecasting in Energy and Economics"
date: 2025-07-10
author:
- name: Jonathan Berrisch
affiliations:
- ref: hemf
affiliations:
- id: hemf
name: University of Duisburg-Essen, House of Energy Markets and Finance
format:
revealjs:
embed-resources: true
footer: ""
logo: logos_combined.png
theme: [default, clean.scss]
smaller: true
fig-format: svg
execute:
daemon: false
highlight-style: github
---
## Outline
::: {.hidden}
$$
\newcommand{\A}{{\mathbb A}}
$$
:::
<br>
::: {style="font-size: 150%;"}
[{{< fa bars-staggered >}}]{style="color: #404040;"} &ensp; Introduction & Research Motivation
[{{< fa bars-staggered >}}]{style="color: #404040;"} &ensp; Overview of the Thesis
[{{< fa table >}}]{style="color: #404040;"} &ensp; Online Learning
[{{< fa circle-nodes >}}]{style="color: #404040;"} &ensp; Probabilistic Forecasting of European Carbon and Energy Prices
[{{< fa lightbulb >}}]{style="color: #404040;"} &ensp; Limitations
[{{< fa binoculars >}}]{style="color: #404040;"} &ensp; Contributions & Outlook
:::
## EfeMOD
**Empirisch fundierte Elektrizitätsmarkt-Modellierung mit Open Data**
:::: {.columns}
::: {.column width="65%"}
[{{< fa users-gear >}}]{style="color: #404040;"} **Project Entities:**
Chair of Prof. Dr. Christoph Weber (Management Sciences and Energy Economics)
Chair of Prof. Dr. Florian Ziel (Data Science in Energy and Environment)
[{{< fa bullseye >}}]{style="color: #404040;"} &ensp; **Project Goal:**
Use publicly available data (particularly ENTSO-E Transparency Platform) to estimate parameters for energy system and energy market models.
:::
::: {.column width="5%"}
<!-- empty column to create gap -->
:::
::: {.column width="30%"}
![](figures/BMWK.webp)
:::
::::
## EfeMOD
![](figures/power_plant_list.jpg)
## Motivation and Objective
**Identification of Power Plant Operation States Using Clustering**
[{{< fa earth-europe >}}]{style="color: #404040;"} Gain Knowledge about the Power Plant Characteristics
- Operation Points,
- Efficiency
- Capacity, etc.
[{{< fa display >}}]{style="color: #404040;"} This Presentation:
Identify Operation States:
- Stable Operation
- Startup
- Minimum-Stable Operation, etc.
Provide these characteristics to other researchers
[{{< fa right-long >}}]{style="color: #404040;"} e.g. to estimate efficiency
## Data
[{{< fa database >}}]{style="color:#404040;"} Entsoe Data:
- ActualGenerationOutputPerGenerationUnit_16.1.A
- UnavailabilityOfGenerationUnits_15.1.A_B
[{{< fa fire-flame-simple >}}]{style="color:rgb(0, 200, 255);"} We focus on natural gas units:
- 63 units in `DE_LU` bidding zone
- 299 units across all bidding zones
[{{< fa calendar-days >}}]{style="color:#404040;"} We use recent data:
- 2020-01-01 until "now"
## Data
![](figures/Block%20AGuD/0_data1.jpg)
## Data
![](figures/Block%20AGuD/0_data2.jpg)
## Data
::: {.panel-tabset}
## Lausward
:::: {.columns}
::: {.column width="42%"}
**Heizkraftwerk Lausward **
Location: Düsseldorf
Block Anton (*Block AGuD*)
Combined cycle gas turbine (CCGT)
Electrical output: 103 MW [{{< fa bolt >}}]{style="color: #ffc400;"}
75 MW of district heating can be decoupled
Efficiency: 54%
Fuel Utilization Rate: 87% (with district heating)
:::
::: {.column width="3%"}
<!-- empty column to create gap -->
:::
::: {.column width="55%"}
![](figures/Block%20AGuD/0_data3.jpg)
:::
::::
## Emsland
:::: {.columns}
::: {.column width="42%"}
**Erdgaskraftwerk Emsland**
Location: Lingen (Ems)
*Block C*
Combined cycle gas turbine (CCGT)
Electrical output: 475 MW [{{< fa bolt >}}]{style="color: #ffc400;"}
Efficiency: 46%
Black start enabled.
:::
::: {.column width="3%"}
<!-- empty column to create gap -->
:::
::: {.column width="55%"}
![](figures/Emsland%20C/0_data3.jpg)
:::
::::
:::
## Empirical Approach
:::: {.columns}
::: {.column width="42%"}
### Overview
Empirical identification of states
3-Step Approach:
- Prior Partitioning
- We create preliminary clusters
- They will be used to initialize the main clustering
- Main Clustering
- Gaussian Model Based Clustering
- Label Assignment
- We assign meaningful labels to the final clusters
:::
::: {.column width="3%"}
<!-- empty column to create gap -->
:::
::: {.column width="55%"}
![](figures/Block%20AGuD/0_data3.jpg)
:::
::::
:::
## Empirical Approach
:::: {.columns}
::: {.column width="42%"}
### Prior Partitioning
[{{< fa arrow-up-right-dots >}}]{style="color: #202020FF;"} Divide the space in meaningful partitions:
Define the Capacity: $\zeta = max(t0)$
Define a threshold: $\gamma = \frac{\zeta}{50}$
[{{< fa circle >}}]{style="color: #2D7D32FF;"} $\pm \gamma$ around the diagonal: Stable <br>
[{{< fa circle >}}]{style="color: #202020FF;"} $t0 < 1$ & $t1 < 1$: Zero <br>
[{{< fa circle >}}]{style="color: #FA8C00FF;"} $t0 < \gamma$ & $t1 > 1$: Startup <br>
[{{< fa circle >}}]{style="color: #D81A5FFF;"} $t0 > 1$ & $t1 < \gamma$: Shutdown <br>
[{{< fa circle >}}]{style="color: #FDD834FF;"} $t1 > t0$: Ramp-Up <br>
[{{< fa circle >}}]{style="color: #8D24AAFF;"} $t1 < t0$: Ramp-Down
We project <b style="color: #2D7D32FF;">Stable</b> observations onto the diagonal, <font style = "opacity: 0.4;"> <b style="color: #FA8C00FF;">Startup</b> on $t1$ and <b style="color: #D81A5FFF;">Shutdown</b> on $t0$ for the next step. </font>
:::
::: {.column width="3%"}
<!-- empty column to create gap -->
:::
::: {.column width="55%"}
![](figures/Block%20AGuD/1_pre-partition.jpg)
:::
::::
:::
## Empirical Approach
:::: {.columns}
::: {.column width="42%"}
### Prior Partitioning
Model-Based Clustering of the Regions using `mclust::Mclust` in `R`.
- <b style="color: #2D7D32FF;">Stable</b>: 2-5 Clusters
- <b style="color: #FDD834FF;">Ramp Up</b>: 2-4 Clusters
- <b style="color: #8D24AAFF;">Ramp Down</b>: 2-4 Clusters
[{{< fa lightbulb >}}]{style="color:rgb(255, 166, 0);"} Obtain finite mixture distribution:
$$\sum_{k=1}^{G}{\pi_k f_k (\mathbf{x}; \mathbf{\theta}_k)}$$
$f_k$ Density of k's component<br>
$\pi_k$ Mixture weights<br>
$\theta_k$ parameters of k's density component
:::
::: {.column width="3%"}
<!-- empty column to create gap -->
:::
::: {.column width="55%"}
![](figures/Block%20AGuD/1_pre-partition.jpg)
:::
::::
## Empirical Approach
### Prior Partitioning
:::: {.columns}
::: {.column width="49%"}
$$f(\mathbf{x}; \mathbf{\Psi}) = \sum_{k=1}^{G}{\pi_k \phi (\mathbf{x}; \mathbf{\mu}_k; \mathbf{\Sigma}_k)}$$
$\phi(\cdot)$ Multivariate Gaussian density<br>
Maximum Likelihood Estimation via Expectation Maximization (EM) algorithm
Likelihood for Gaussian Mixture Models (GMMs):
\begin{align}
\ell(\Psi) = \sum_{i=1}^n \log \left\{ \sum_{k=1}^G \pi_k \phi(x_i; \mu_k, \Sigma_k) \right\}
\end{align}
[{{< fa retweet >}}]{style="color: #404040;"} We Re-Formulate this likelihood to a complete-data likelihood to utilize the EM algorithm
:::
::: {.column width="2%"}
<!-- empty column to create gap -->
:::
::: {.column width="49%"}
\begin{align}
\ell_{\mathcal{C}}(\Psi) = \sum_{i=1}^n \sum_{k=1}^G z_{ik} \left\{ \log \pi_k + \log \phi(x_i; \mu_k, \Sigma_k) \right\}
\end{align}
\begin{align}
z_{ik} =
\begin{cases}
1 & \text{if } x_i \text{ belongs to component }k \\
0 & \text{otherwise.}
\end{cases}
\end{align}
E-Step:
\begin{align}
\hat{z}_{ik} = \frac{\hat{\pi}_k \phi(x_i; \hat{\mu}_k, \hat{\Sigma}_k)}{\sum_{g=1}^{G} \hat{\pi}_g \phi(x_i; \hat{\mu}_g, \hat{\Sigma}_g)},
\end{align}
M-Step:
\begin{align}
\quad \hat{\mu}_k = \frac{\sum_{i=1}^{n} \hat{z}_{ik} x_i}{n_k}, \quad \text{where} \quad n_k = \sum_{i=1}^{n} \hat{z}_{ik}.
\end{align}
:::
::::
::: {.notes}
- log-likelihood in (2.2) is hard to maximize directly
- even numerically
- As a consequence, mixture models are usually fitted by reformulating the mixture
problem as an incomplete-data problem within the EM framework.
General EM Steps:
- Init
- Estimate latent component memberships
- M-Step obtain the updated parameter estimates
- Check convergence criteria
:::
## Empirical Approach
:::: {.columns}
::: {.column width="42%"}
### Prior Partitioning
**Initialization**
We initialize the EM algorithm (E-Step) using the partitions
obtained from model-based agglomerative hierarchical clustering (MBAHC)
**Estimation**
The Bayesian information criterion (BIC) is used for model selection
**Prior Partitioning Results**
Right graph shows prior clusters.
:::
::: {.column width="3%"}
<!-- empty column to create gap -->
:::
::: {.column width="55%"}
::: {.panel-tabset}
## Lausward
![](figures/Block%20AGuD/2_partition.jpg)
## Emsland
![](figures/Emsland%20C/2_partition.jpg)
:::
:::
::::
::: {.notes}
recursively merging the two clusters that yield the maximum
likelihood of a probability model over all possible merges
:::
## Empirical Approach
:::: {.columns}
::: {.column width="42%"}
### Main Clustering
**MBAHC**
Prior Clusters are used in MBAHC
The results of the MBAHC are used to initialize the EM Algorithm in the main Gaussian Model Based Clustering
**Main Clustering Results**
Right graph shows *Maximum A Posteriori (MAP) Classification*
Colour indicates cumulated log(density) of all components.
:::
::: {.column width="3%"}
<!-- empty column to create gap -->
:::
::: {.column width="55%"}
::: {.panel-tabset}
## Lausward
![](figures/Block%20AGuD/3_cluster.jpg)
## Emsland
![](figures/Emsland%20C/3_cluster.jpg)
:::
:::
::::
::: {.notes}
recursively merging the two clusters that yield the maximum
likelihood of a probability model over all possible merges
:::
## Empirical Approach
### Label Assignment
:::: {.columns}
::: {.column width="48%"}
We assign labels to the clusters using their mean $\mu$ and correlation $\rho$
Multiple clusters may describe one Generation State (e.g., along the diagonal)
:::
::: {.column width="4%"}
<!-- empty column to create gap -->
:::
::: {.column width="48%"}
```{r}
library(dplyr)
load("figures/Block AGuD/clusters.RDS")
clusters %>%
select(classification, mu_t0, mu_t1, cor) %>%
head()
```
:::
::::
\begin{align}
\text{State} =
\begin{cases}
\color{#202020FF}{\text{Zero}} & (\mu_{t0} < 1) \land (\mu_{t1} < 1), \\
\text{MSO} & \left[ (\mu_{t0} > \zeta/10) \land (\mu_{t1} > \zeta / 10) \land (\right| \mu_{t0} - \mu_{t1} \left| > \zeta / 10) \right]\\ & \rightarrow \operatorname{argmin}(\mu_{t0} + \mu_{t1}), \\
\text{Max Capacity} & \rightarrow \operatorname{argmax}(\mu_{t0} + \mu_{t1}), \\
\text{Startup} & (\mu_{t1} \geq \zeta / 10) \land (\mu_{t0} < \gamma) \land (\rho < 0.3), \\
\text{Shutdown} & (\mu_{t0} \geq \zeta / 10) \land (\mu_{t1} < \gamma) \land (\rho < 0.3), \\
\text{Stable Operation} & \text{Remaining clusters with cor} > 0.8, \\
\text{Ramp Up} & \text{Remaining clusters: } \mu_{t1} > \mu_{t0}, \\
\text{Ramp Down} & \text{Remaining clusters: } \mu_{t1} < \mu_{t0}.
\end{cases}
\end{align}
::: {.notes}
recursively merging the two clusters that yield the maximum
likelihood of a probability model over all possible merges
:::
## Empirical Approach
:::: {.columns}
::: {.column width="39%"}
### Label Assignment
Right graphs show *assigned states*
The points are coloured according to
- MAP
- Probability (each pure colour reflects a probability of 1)
Some points below /above the diagonal are assigned to Ramp Up / Ramp Down
- Can be easily fixed for MAP
- Fixing probabilistic predictions not that easy
:::
::: {.column width="2%"}
<!-- empty column to create gap -->
:::
::: {.column width="59%"}
::: {.panel-tabset}
## LSW
![](figures/Block%20AGuD/4_assignments.jpg)
## LSW Pr
![](figures/Block%20AGuD/4_assignments_prob.jpg)
## LSW Pr
![](figures/Block%20AGuD/4_probability.jpg)
## EMS
![](figures/Emsland%20C/4_assignments.jpg)
## EMS Pr
![](figures/Emsland%20C/4_assignments_prob.jpg)
## EMS Pr
![](figures/Emsland%20C/4_probability.jpg)
:::
:::
::::
::: {.notes}
recursively merging the two clusters that yield the maximum
likelihood of a probability model over all possible merges
:::
## Empirical Approach
:::: {.columns}
::: {.column width="42%"}
### Label Assignment
*Fixing assignments*
Relabeling Ramp Up and Ramp Down MAP predictions is trivial:
\begin{align}
\text{State} =
\begin{cases}
\text{Ramp Up} & x_{t1} > x_{t0}, \\
\text{Ramp Down} & x_{t1} < x_{t0}.
\end{cases}
\end{align}
Fixing the probability array is more involved:
Find observations $x_{t1} < x_{t0}$ that can not be "Ramp Up":
Set probability of all Ramp Up clusters to $0$.
Normalize the probabilities.
:::
::: {.column width="3%"}
<!-- empty column to create gap -->
:::
::: {.column width="55%"}
::: {.panel-tabset}
## LSW Pr
![](figures/Block%20AGuD/4_assignments_prob_fixed.jpg)
## LSW Pr
![](figures/Block%20AGuD/4_probability_fixed.jpg)
## EMS Pr
![](figures/Emsland%20C/4_assignments_prob_fixed.jpg)
## EMS Pr
![](figures/Emsland%20C/4_probability_fixed.jpg)
:::
:::
::::
## Outlook
<br>
<br>
- The approach works in general
- Conceptually simple
- Label assignment needs some more work
- Probabilistic statements may need adjustments for Ramp-Up Ramp-Down predictions
- Some kind of validation would be desirable
- Results will be used party on another research project in the EFEMOD project

Binary file not shown.