Files
acin-interview/presentation.tex

291 lines
9.2 KiB
TeX
Raw Normal View History

2026-04-14 23:07:14 +02:00
\documentclass[12pt,t]{beamer}
\usepackage{graphicx}
\setbeameroption{hide notes}
\setbeamertemplate{note page}[plain]
% get rid of junk
\usetheme{default}
\beamertemplatenavigationsymbolsempty
\hypersetup{pdfpagemode=UseNone} % don't show bookmarks on initial view
% font
2026-04-15 02:11:14 +02:00
\usepackage{bm}
2026-04-14 23:07:14 +02:00
\usepackage{fontspec}
\setsansfont{TeX Gyre Heros}
\setbeamerfont{note page}{family*=pplx,size=\footnotesize} % Palatino for notes
% "TeX Gyre Heros can be used as a replacement for Helvetica"
% In Unix, unzip the following into ~/.fonts
% In Mac, unzip it, double-click the .otf files, and install using "FontBook"
% http://www.gust.org.pl/projects/e-foundry/tex-gyre/heros/qhv2.004otf.zip
2026-04-15 02:11:14 +02:00
% restore a standard LaTeX-like math font
\usepackage{amsmath}
\usepackage{unicode-math}
\setmathfont{Latin Modern Math}
2026-04-14 23:07:14 +02:00
%\newfontfamily\emojifont{Noto Emoji}
\newcommand{\inlineemoji}[2][1.2em]{%
\raisebox{-0.2em}{\includegraphics[height=#1]{#2}}%
}
%% named colors
%\definecolor{offwhite}{RGB}{249,242,215}
%\definecolor{foreground}{RGB}{255,255,255}
%\definecolor{background}{RGB}{24,24,24}
%\definecolor{title}{RGB}{107,174,214}
%\definecolor{gray}{RGB}{155,155,155}
%\definecolor{subtitle}{RGB}{102,255,204}
%\definecolor{hilight}{RGB}{102,255,204}
%\definecolor{vhilight}{RGB}{255,111,207}
%\definecolor{lolight}{RGB}{155,155,155}
%%\definecolor{green}{RGB}{125,250,125}
% named colors for white-background slides
\definecolor{offwhite}{RGB}{255,255,255}
\definecolor{foreground}{RGB}{34,34,34}
\definecolor{background}{RGB}{255,255,255}
\definecolor{title}{RGB}{0,82,155}
2026-04-15 02:11:14 +02:00
%\definecolor{gray}{RGB}{110,110,110}
\definecolor{gray}{RGB}{15,15,15}
2026-04-14 23:07:14 +02:00
\definecolor{subtitle}{RGB}{0,121,107}
\definecolor{hilight}{RGB}{0,121,107}
\definecolor{vhilight}{RGB}{180,0,102}
\definecolor{lolight}{RGB}{130,130,130}
%\definecolor{green}{RGB}{0,140,70}
% use those colors
\setbeamercolor{titlelike}{fg=title}
\setbeamercolor{subtitle}{fg=subtitle}
\setbeamercolor{institute}{fg=gray}
\setbeamercolor{normal text}{fg=foreground,bg=background}
\setbeamercolor{item}{fg=foreground} % color of bullets
\setbeamercolor{subitem}{fg=gray}
\setbeamercolor{itemize/enumerate subbody}{fg=gray}
\setbeamertemplate{itemize subitem}{{\textendash}}
\setbeamerfont{itemize/enumerate subbody}{size=\footnotesize}
\setbeamerfont{itemize/enumerate subitem}{size=\footnotesize}
2026-04-15 02:11:14 +02:00
% configure itemize spacing
% alas, it breaks itemize styling :(
%\usepackage{enumitem}
% second-level itemize (sub-itemize)
%\setlist[itemize,2]{itemsep=0.7em}
2026-04-14 23:07:14 +02:00
% page number
\setbeamertemplate{footline}{%
\raisebox{5pt}{\makebox[\paperwidth]{\hfill\makebox[20pt]{\color{gray}
\scriptsize\insertframenumber}}}\hspace*{5pt}}
% add a bit of space at the top of the notes page
\addtobeamertemplate{note page}{\setlength{\parskip}{12pt}}
% a few macros
\newcommand{\bi}{\begin{itemize}}
\newcommand{\ei}{\end{itemize}}
\newcommand{\ig}{\includegraphics}
\newcommand{\subt}[1]{{\footnotesize \color{subtitle} {#1}}}
2026-04-15 02:11:14 +02:00
\newcommand{\subtnc}[1]{{\footnotesize #1}}
2026-04-14 23:07:14 +02:00
% title info
\title{AI-Enhanced High-Accuracy Robotics for Industrial Applications}
2026-04-15 02:11:14 +02:00
\subtitle{Application for PhD-Position (m/f/d) in Industrial Robotics}
2026-04-14 23:07:14 +02:00
\author{\href{https://abanbytes.eu}{David Madl}}
%\institute{\href{https://www.biostat.wisc.edu}{Biostatistics \& Medical Informatics} \\[2pt] \href{http://www.wisc.edu}{University of Wisconsin{\textendash}Madison}}
\date{%\href{http://kbroman.org}{\tt \scriptsize kbroman.org}
%\\[-4pt]
2026-04-15 02:11:14 +02:00
\href{https://github.com/cidermole}{\tt \scriptsize github.com/cidermole}
2026-04-14 23:07:14 +02:00
}
\begin{document}
% title slide
{
\setbeamertemplate{footline}{} % no page number here
\frame{
\titlepage
\note{
} } }
\begin{frame}{Me - personal interests}
\vspace{24pt}
\bi
%\item{\emojifont🏐 Volleyball}
%\item{\emojifont🧗 Bouldering}
%\item{\emojifont💃 Tango Argentino}
%\item{\emojifont♘ Chess}
%\item{\emojifont🌎 South America}
\item{\inlineemoji{Images/volley.png} Volleyball}
\item{\inlineemoji{Images/boulder.png} Bouldering}
\item{\inlineemoji{Images/tango.png} Tango Argentino}
\item{\inlineemoji{Images/chess.png} Chess}
\item{\inlineemoji{Images/sa.png} South America}
\ei
\end{frame}
2026-04-15 02:11:14 +02:00
\begin{frame}{MSc Thesis}
\subt{Handling out-of-vocabulary words in a domain adaptation setting in SMT}
%\vspace{12pt}
\begin{itemize}
\item{Phrase-based Statistical Machine Translation\vspace{0.5em}}
\begin{itemize}
\addtolength{\itemsep}{0.7em}
\item{Warren Weaver (1947):\\[0.5em]
\textit{``This [article in Russian] is really written in English, but it has been coded in some strange symbols. I will now proceed to decode.''}}
\item{Bayes Theorem \& independence assumption:\\[0.5em]
$P(\text{en}|\text{ru}) = \frac{P(\text{ru}|\text{en}) P(\text{en})}{P(\text{ru})}$\\[0.5em]
$P(\text{ru}|\text{en}) = \prod_{i}^{M} P(\text{phrase\_ru}_{i}|\text{en})$ \hspace{0.5em} translation model\\[0.5em]
$P(\text{en}) = \prod_{k}^{L} P(w_{k}|w_{k-n}...w_{k-1})$ \hspace{0.5em} language model\\[0.5em]
$P(\text{ru})$ \hspace{0.5em} dropped normalization factor
}
\end{itemize}
\end{itemize}
{\tiny see e.g. (Koehn et al 2003, ``Statistical phrase-based translation'')}
\note{
The rules for the translation model are more complex than shown here, because of the possibility of phrase splits at different word boundaries.
The probabilities on the \textbf{right-hand side} are estimated from a training corpus.
The language model is estimated as transitions of an n-state \textbf{Hidden Markov Model}.
The translation model obtains phrases from a previous optimization called \textbf{Word Alignment}.
}
% {\color{hilight} b}
\end{frame}
\begin{frame}{MSc Thesis - Domain Adaptation 1}
\subtnc{Handling out-of-vocabulary words in a domain adaptation setting in SMT}
\vspace{12pt}
\bi
\item{{\color{hilight} test} and {\color{vhilight} train} datasets}\\[0.5em]
\item{{\color{hilight} medical} and {\color{vhilight} political} domains}\\[0.5em]
\item{{\color{hilight} 5 M} and {\color{vhilight} 50 M} word tokens}\\[0.5em]
\item{Domain adaptation:\\[0.5em]
${\color{hilight} P(\text{en}|\text{ru})} = \frac{\color{vhilight} P(\text{ru}|\text{en}) P(\text{en})}{P(\text{ru})}$\\[0.5em]
${\color{vhilight} P(\text{ru}|\text{en}) = \prod_{i}^{M} P(\text{phrase\_ru}_{i}|\text{en})}$ \hspace{0.5em} translation model\\[0.5em]
${\color{vhilight} P(\text{en}) = \prod_{k}^{L} P(w_{k}|w_{k-n}...w_{k-1})}$ \hspace{0.5em} language model\\[0.5em]
}
\ei
\note{
In domain adaptation, we have a distributional mismatch between training and test data.
Simply appending target domain text to a large training dataset is not optimal.
This is because the statistics of the original domain dominate.
}
\end{frame}
\begin{frame}{MSc Thesis - Domain Adaptation 2}
\subtnc{Handling out-of-vocabulary words in a domain adaptation setting in SMT}
\vspace{12pt}
\bi
\item{{\color{hilight} medical} and {\color{vhilight} political} domains}\\[0.5em]
\item{Mixture model:\\[0.5em]
$P(\text{ru}|\text{en}) = \alpha_1 {\color{hilight} P_1(\text{ru}|\text{en})} + \alpha_2 {\color{vhilight} P_2(\text{ru}|\text{en})} $\\[0.5em]
$P(\text{en}) = \alpha_1 {\color{hilight} P_1(\text{en})} + \alpha_2 {\color{vhilight} P_2(\text{en})} $\\[0.5em]
}
\item{Optimize quality measure:\\[0.5em]
$argmax_{\symbf{\alpha}} \text{BLEU}(\symbf{\alpha})$, $\sum_i \alpha_i = 1$}
\ei
\note{
We can do better by estimating two models, one on each domain.
Then we optimize the resulting translation quality based on the mixture parameters.
}
\end{frame}
\begin{frame}{MSc Thesis - Word Alignment oracle}
\subt{Handling out-of-vocabulary words in a domain adaptation setting in SMT}
\begin{center}
\ig[width=0.6\textwidth]{Images/oov.png}
\end{center}
{\tiny source: Figure 6.1.3b, MSc Thesis}
\note{
The thesis topic I was assigned was to investigate words which could not be translated.
The oracle experiments are the most insightful. This one shows, for different training set sizes:
* in green, the **theoretical limit** from training data,
* in red, if **Word Alignment** had full statistics,
* in blue, the actual errors.
}
\end{frame}
2026-04-15 03:13:16 +02:00
\begin{frame}{Modellbildung 1}
\begin{center}
\[
\frac{d}{dt}\!\left(\frac{\partial L}{\partial \dot q_i}\right)
-
\frac{\partial L}{\partial q_i}
+
\frac{\partial D}{\partial \dot q_i}
=
\sum_{a=1}^{m}\lambda_a \frac{\partial f_a(q,t)}{\partial q_i},
\qquad i=1,\dots,n
\]
\end{center}
\note{
Auf Wunsch v. Kollegen Hartl-Nesic folgt eine schnelle Übersicht was ich aktuell kann:
* Anhand von potenzieller und kinetischer Energie des Systems die linearen Differenzialgleichungen aufstellen.
}
\end{frame}
\begin{frame}{Modellbildung 2}
\begin{center}
\begin{figure}
\ig[width=0.6\textwidth]{Images/pendel_balken.jpg}
\caption{Pendel mit Biegebalken}
\end{figure}
\end{center}
\bi
\item{Kräfte, $\sum_i \vec{F}_i = \frac{d\vec{p}}{dt} = m \frac{d^2\vec{x}}{dt^2}$}
\ei
\begin{center}
\[
\ddot{\beta}
=
(\frac{k}{m} \gamma sin^2(\beta))
+
(-\frac{g}{r} cos(\beta) - \frac{k}{m} \gamma cos^2(\beta))
\]
% TODO: gamma ...
\end{center}
\note{
Leider sind die linearen Differenzialgleichungen sehr schnell ausgeschöpft.
Man kann das Modell zwar mit der Jacobimatrix linearisieren.
Für die Identifikation braucht es jedoch "interessante" Koordinaten im Zustandsraum.
}
\end{frame}
2026-04-14 23:07:14 +02:00
\end{document}