add MSc thesis slides

2026-04-15 02:11:14 +02:00
parent 184014fe4b
commit 1937afcc35
2 changed files with 121 additions and 3 deletions
--- a/Images/oov.png
+++ b/Images/oov.png
--- a/presentation.tex
+++ b/presentation.tex
@@ -9,6 +9,7 @@
 \hypersetup{pdfpagemode=UseNone} % don't show bookmarks on initial view

 % font
+\usepackage{bm}
 \usepackage{fontspec}
 \setsansfont{TeX Gyre Heros}
 \setbeamerfont{note page}{family*=pplx,size=\footnotesize} % Palatino for notes
@@ -17,6 +18,11 @@
 % In Mac, unzip it, double-click the .otf files, and install using "FontBook"
 %   http://www.gust.org.pl/projects/e-foundry/tex-gyre/heros/qhv2.004otf.zip

+% restore a standard LaTeX-like math font
+\usepackage{amsmath}
+\usepackage{unicode-math}
+\setmathfont{Latin Modern Math}
+
 %\newfontfamily\emojifont{Noto Emoji}
 \newcommand{\inlineemoji}[2][1.2em]{%
  \raisebox{-0.2em}{\includegraphics[height=#1]{#2}}%
@@ -39,7 +45,8 @@
 \definecolor{foreground}{RGB}{34,34,34}
 \definecolor{background}{RGB}{255,255,255}
 \definecolor{title}{RGB}{0,82,155}
-\definecolor{gray}{RGB}{110,110,110}
+%\definecolor{gray}{RGB}{110,110,110}
+\definecolor{gray}{RGB}{15,15,15}
 \definecolor{subtitle}{RGB}{0,121,107}
 \definecolor{hilight}{RGB}{0,121,107}
 \definecolor{vhilight}{RGB}{180,0,102}
@@ -58,6 +65,13 @@
 \setbeamerfont{itemize/enumerate subbody}{size=\footnotesize}
 \setbeamerfont{itemize/enumerate subitem}{size=\footnotesize}

+% configure itemize spacing
+
+% alas, it breaks itemize styling :(
+%\usepackage{enumitem}
+% second-level itemize (sub-itemize)
+%\setlist[itemize,2]{itemsep=0.7em}
+
 % page number
 \setbeamertemplate{footline}{%
    \raisebox{5pt}{\makebox[\paperwidth]{\hfill\makebox[20pt]{\color{gray}
@@ -71,15 +85,16 @@
 \newcommand{\ei}{\end{itemize}}
 \newcommand{\ig}{\includegraphics}
 \newcommand{\subt}[1]{{\footnotesize \color{subtitle} {#1}}}
+\newcommand{\subtnc}[1]{{\footnotesize #1}}

 % title info
 \title{AI-Enhanced High-Accuracy Robotics for Industrial Applications}
-\subtitle{}
+\subtitle{Application for PhD-Position (m/f/d) in Industrial Robotics}
 \author{\href{https://abanbytes.eu}{David Madl}}
 %\institute{\href{https://www.biostat.wisc.edu}{Biostatistics \& Medical Informatics} \\[2pt] \href{http://www.wisc.edu}{University of Wisconsin{\textendash}Madison}}
 \date{%\href{http://kbroman.org}{\tt \scriptsize kbroman.org}
 %\\[-4pt]
-\href{https://github.com/kbroman}{\tt \scriptsize github.com/cidermole}
+\href{https://github.com/cidermole}{\tt \scriptsize github.com/cidermole}
 }


@@ -114,4 +129,107 @@
 \end{frame}


+\begin{frame}{MSc Thesis}
+\subt{Handling out-of-vocabulary words in a domain adaptation setting in SMT}
+
+%\vspace{12pt}
+
+\begin{itemize}
+  \item{Phrase-based Statistical Machine Translation\vspace{0.5em}}
+  \begin{itemize}
+    \addtolength{\itemsep}{0.7em}
+    \item{Warren Weaver (1947):\\[0.5em]
+    \textit{``This [article in Russian] is really written in English, but it has been coded in some strange symbols. I will now proceed to decode.''}}
+    \item{Bayes Theorem \& independence assumption:\\[0.5em]
+    $P(\text{en}|\text{ru}) = \frac{P(\text{ru}|\text{en}) P(\text{en})}{P(\text{ru})}$\\[0.5em]
+    $P(\text{ru}|\text{en}) = \prod_{i}^{M} P(\text{phrase\_ru}_{i}|\text{en})$ \hspace{0.5em} translation model\\[0.5em]
+    $P(\text{en}) = \prod_{k}^{L} P(w_{k}|w_{k-n}...w_{k-1})$ \hspace{0.5em} language model\\[0.5em]
+    $P(\text{ru})$ \hspace{0.5em} dropped normalization factor
+    }
+  \end{itemize}
+\end{itemize}
+
+{\tiny see e.g. (Koehn et al 2003, ``Statistical phrase-based translation'')}
+
+\note{
+  The rules for the translation model are more complex than shown here, because of the possibility of phrase splits at different word boundaries.
+
+  The probabilities on the \textbf{right-hand side} are estimated from a training corpus. 
+  The language model is estimated as transitions of an n-state \textbf{Hidden Markov Model}. 
+  The translation model obtains phrases from a previous optimization called \textbf{Word Alignment}.
+}
+
+% {\color{hilight} b}
+
+\end{frame}
+
+
+\begin{frame}{MSc Thesis - Domain Adaptation 1}
+\subtnc{Handling out-of-vocabulary words in a domain adaptation setting in SMT}
+
+\vspace{12pt}
+
+\bi
+  \item{{\color{hilight} test} and {\color{vhilight} train} datasets}\\[0.5em]
+  \item{{\color{hilight} medical} and {\color{vhilight} political} domains}\\[0.5em]
+  \item{{\color{hilight} 5 M} and {\color{vhilight} 50 M} word tokens}\\[0.5em]
+  \item{Domain adaptation:\\[0.5em]
+    ${\color{hilight} P(\text{en}|\text{ru})} = \frac{\color{vhilight} P(\text{ru}|\text{en}) P(\text{en})}{P(\text{ru})}$\\[0.5em]
+    ${\color{vhilight} P(\text{ru}|\text{en}) = \prod_{i}^{M} P(\text{phrase\_ru}_{i}|\text{en})}$ \hspace{0.5em} translation model\\[0.5em]
+    ${\color{vhilight} P(\text{en}) = \prod_{k}^{L} P(w_{k}|w_{k-n}...w_{k-1})}$ \hspace{0.5em} language model\\[0.5em]
+  }
+\ei
+
+\note{
+  In domain adaptation, we have a distributional mismatch between training and test data. 
+  Simply appending target domain text to a large training dataset is not optimal. 
+  This is because the statistics of the original domain dominate.
+}
+
+\end{frame}
+
+
+\begin{frame}{MSc Thesis - Domain Adaptation 2}
+\subtnc{Handling out-of-vocabulary words in a domain adaptation setting in SMT}
+
+\vspace{12pt}
+
+\bi
+  \item{{\color{hilight} medical} and {\color{vhilight} political} domains}\\[0.5em]
+  \item{Mixture model:\\[0.5em]
+    $P(\text{ru}|\text{en}) = \alpha_1 {\color{hilight} P_1(\text{ru}|\text{en})} + \alpha_2 {\color{vhilight} P_2(\text{ru}|\text{en})} $\\[0.5em]
+    $P(\text{en}) = \alpha_1 {\color{hilight} P_1(\text{en})} + \alpha_2 {\color{vhilight} P_2(\text{en})} $\\[0.5em]
+  }
+  \item{Optimize quality measure:\\[0.5em]
+    $argmax_{\symbf{\alpha}} \text{BLEU}(\symbf{\alpha})$, $\sum_i \alpha_i = 1$}
+\ei
+
+\note{
+  We can do better by estimating two models, one on each domain. 
+  Then we optimize the resulting translation quality based on the mixture parameters.
+}
+
+\end{frame}
+
+
+\begin{frame}{MSc Thesis - Word Alignment oracle}
+\subt{Handling out-of-vocabulary words in a domain adaptation setting in SMT}
+
+\begin{center}
+\ig[width=0.6\textwidth]{Images/oov.png}
+\end{center}
+
+{\tiny source: Figure 6.1.3b, MSc Thesis}
+
+\note{
+  The thesis topic I was assigned was to investigate words which could not be translated. 
+  The oracle experiments are the most insightful. This one shows, for different training set sizes:
+  * in green, the **theoretical limit** from training data,
+  * in red, if **Word Alignment** had full statistics,
+  * in blue, the actual errors.
+}
+
+\end{frame}
+
+
 \end{document}