summaryrefslogtreecommitdiff
path: root/sampling_alg_lac2020/LAC-20.tex
blob: 285323e74825d5769a75f2d5bcecd368b3066d4b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
% Template LaTeX file for LAC-20 papers
%
% To generate the correct references using BibTeX, run
%     latex, bibtex, latex, latex
% modified...
% - from DAFx-00 to DAFx-02 by Florian Keiler, 2002-07-08
% - from DAFx-02 to DAFx-03 by Gianpaolo Evangelista
% - from DAFx-05 to DAFx-06 by Vincent Verfaille, 2006-02-05
% - from DAFx-06 to DAFx-07 by Vincent Verfaille, 2007-01-05
%                          and Sylvain Marchand, 2007-01-31
% - from DAFx-07 to DAFx-08 by Henri Penttinen, 2007-12-12
%                          and Jyri Pakarinen 2008-01-28
% - from DAFx-08 to DAFx-09 by Giorgio Prandi, Fabio Antonacci 2008-10-03
% - from DAFx-09 to DAFx-10 by Hannes Pomberger 2010-02-01
% - from DAFx-10 to DAFx-12 by Jez Wells 2011
% - from DAFx-12 to DAFx-14 by Sascha Disch 2013
% - from DAFx-15 to DAFx-16 by Pavel Rajmic 2015
% - from DAFx-16 to IFC-18 by Romain Michon 2018
% - from IFC-18 to LAC-19 by Romain Michon 2019
% - from LAC-19 to LAC-20 by Jean-Michaël Celerier 2020
%
% Template with hyper-references (links) active after conversion to pdf
% (with the distiller) or if compiled with pdflatex.
%
% 20060205: added package 'hypcap' to correct hyperlinks to figures and tables
%                      use of \papertitle and \paperauthorA, etc for same title in PDF and Metadata
%
% 1) Please compile using lualatex, latex or pdflatex.
% 2) If using pdflatex, you need your figures in a file format other than eps! e.g. png or jpg is working
% 3) Please use "papertitle" and "pdfauthor" definitions below

%------------------------------------------------------------------------------------------
%  !  !  !  !  !  !  !  !  !  !  !  ! user defined variables  !  !  !  !  !  !  !  !  !  !  !  !  !  !
% Please use these commands to define title and author(s) of the paper:
\def\papertitle{On Sampling Algorithms for Drums}
% \def\paperauthorA{André Nusser}
% \def\paperauthorB{Bent Bisballe Nyeng}
\def\paperauthorA{}
\def\paperauthorB{}
\def\paperauthorC{}
\def\paperauthorD{}

% Authors' affiliations have to be set below

%------------------------------------------------------------------------------------------
\documentclass[twoside,a4paper]{article}
\usepackage{LAC-20}
\usepackage{amsmath,amssymb,amsfonts,amsthm}
\usepackage{euscript}
\usepackage{ifpdf}
\usepackage{ifluatex}
\usepackage{ifxetex}

\usepackage{color}
\usepackage{listings}
\definecolor{mygrey}{rgb}{0.96,0.96,0.96}
\lstset{
  tabsize=4,
  basicstyle=\ttfamily,
  backgroundcolor=\color{mygrey},
  captionpos=b,
  breaklines=true
}

\usepackage[english]{babel}
\usepackage{caption}
\usepackage{subfig, color}
\setcounter{page}{1}
\ninept

\usepackage{times}
% pdf-tex settings: detect automatically if run by latex or pdflatex
\ifluatex
  \usepackage[
    pdftitle={\papertitle},
    pdfauthor={\paperauthorA, \paperauthorB, \paperauthorC, \paperauthorD},
    colorlinks=false, % links are activated as colror boxes instead of color text
    bookmarksnumbered, % use section numbers with bookmarks
    pdfstartview=XYZ % start with zoom=100% instead of full screen; especially useful if working with a big screen :-)
  ]{hyperref}
  
  \edef\pdfcompresslevel{\pdfvariable compresslevel}
  \pdfcompresslevel=9
  \usepackage{graphicx}
  
  \usepackage[figure,table]{hypcap}
  \usepackage{fontspec}
\else
  \ifxetex
    \usepackage[
      pdftitle={\papertitle},
      pdfauthor={\paperauthorA, \paperauthorB, \paperauthorC, \paperauthorD},
      colorlinks=false, % links are activated as colror boxes instead of color text
      bookmarksnumbered, % use section numbers with bookmarks
      pdfstartview=XYZ % start with zoom=100% instead of full screen; especially useful if working with a big screen :-)
    ]{hyperref}
    
    \pdfcompresslevel=9
    \usepackage{graphicx}
    
    \usepackage[figure,table]{hypcap}
    \usepackage{fontspec}
  \else
    \usepackage[utf8]{inputenc}
    \usepackage[T1]{fontenc}
    \ifpdf % compiling with pdflatex
      \usepackage[pdftex,
        pdftitle={\papertitle},
        pdfauthor={\paperauthorA, \paperauthorB, \paperauthorC, \paperauthorD},
        colorlinks=false, % links are activated as colror boxes instead of color text
        bookmarksnumbered, % use section numbers with bookmarks
        pdfstartview=XYZ % start with zoom=100% instead of full screen; especially useful if working with a big screen :-)
      ]{hyperref}
      \pdfcompresslevel=9
      \usepackage[pdftex]{graphicx}
      \usepackage[figure,table]{hypcap}
      \DeclareGraphicsExtensions{.png,.jpg,.pdf}
    \else % compiling with latex
      \usepackage[dvips]{epsfig,graphicx}
      \usepackage[dvips,
        colorlinks=false, % no color links
        bookmarksnumbered, % use section numbers with bookmarks
        pdfstartview=XYZ % start with zoom=100% instead of full screen
      ]{hyperref}
      % hyperrefs are active in the pdf file after conversion
      \usepackage[figure,table]{hypcap}
      \DeclareGraphicsExtensions{.eps}
    \fi
  \fi
\fi

% ====================================
% OWN PACKAGES

% For utf8 encoding
\usepackage[utf8]{inputenc}

% Use for automatic line breaks in tabular (column type X)
\usepackage{tabularx}
% For abbreviations using \newcommand
\usepackage{xspace}
% For the >{...} in tabular
\usepackage{array}
% For t o d o notes. Make the background white to make them not that distracting.
% \usepackage[textwidth=3.5cm,color=white]{todonotes}
% For better cite commands
\usepackage[numbers]{natbib}
% math stuff
\usepackage{amsmath,amsthm,amssymb}
\usepackage{mathtools}
\usepackage{nicefrac}
\usepackage{csquotes}
% Comment in the next line to draw frames around all the layout boxes for checking where they are violated.
%\usepackage{showframe}

% nice theorem and proof environments. taken from Ema's template
\theoremstyle{plain}
\newtheorem{theorem}{Theorem}
\newtheorem{proposition}{Proposition}
\newtheorem{lemma}{Lemma}
\newtheorem{corollary}{Corollary}
\newtheorem{claim}{Claim}
\newtheorem{fact}{Fact}

\theoremstyle{definition}
\newtheorem{definition}{Definition}
\newtheorem{example}{Example}

\theoremstyle{remark}
\newtheorem{remark}{Remark}
\newtheorem{observation}{Observation}
\newtheorem{conjecture}{Conjecture}

% Handy abbreviations
\newcommand{\whp}[0]{w.h.p.\xspace}
\newcommand{\ie}[0]{i.e.\xspace}
\newcommand{\wrt}[0]{w.r.t.\xspace}

% \abs and \norm hacks
\DeclarePairedDelimiter\abs{\lvert}{\rvert}
\makeatletter
\let\oldabs\abs
\def\abs{\@ifstar{\oldabs}{\oldabs*}}
\let\oldnorm\norm
\def\norm{\@ifstar{\oldnorm}{\oldnorm*}}
\makeatother

% TODO notes
\newcommand{\todo}[1]{\textcolor{red}{\textbf{TODO:} #1}}
\newcommand{\todoandre}[1]{\textcolor{green}{\textbf{TODO (André):} #1}}
\newcommand{\todobent}[1]{\textcolor{blue}{\textbf{TODO (Bent):} #1}}

% ugly hack
\renewcommand{\paragraph}[1]{\textbf{#1}. }

% ====================================

\title{\papertitle}

%-------------SINGLE-AUTHOR HEADER STARTS (uncomment below if your paper has a single author)-----------------------
% \affiliation{
% \paperauthorA \,\sthanks{This work was supported by the XYZ Foundation}}
% {\href{https://scrime.u-bordeaux.fr}{SCRIME} \\ Université de Bordeaux, France \\
% {\tt \href{mailto:ping@linuxaudio.org}{ping@linuxaudio.org}}
% }
%-----------------------------------SINGLE-AUTHOR HEADER ENDS------------------------------------------------------

%---------------TWO-AUTHOR HEADER STARTS (uncomment below if your paper has two authors)-----------------------
% \twoaffiliations{
% \paperauthorA % \, \sthanks{This work was supported by the XYZ Foundation}
% }
% {\href{https://drumgizmo.org}{DrumGizmo} \\ Saarbr\"ucken, Germany \\
% {\tt \href{mailto:andre.nusser@gmail.com}{andre.nusser@gmail.com}}
% }
% {\paperauthorB % \,\sthanks{This guy is a very good fellow}
% }
% {\href{https://drumgizmo.org}{DrumGizmo} \\ Aarhus, Denmark \\
% {\tt \href{mailto:deva@aasimon.org}{deva@aasimon.org}}
% }
%-------------------------------------TWO-AUTHOR HEADER ENDS------------------------------------------------------

%---------------THREE-AUTHOR HEADER STARTS (uncomment below if your paper has three authors)-----------------------
% \threeaffiliations{
% \paperauthorA \,\sthanks{This work was supported by the XYZ Foundation}}
% {\href{https://scrime.u-bordeaux.fr}{SCRIME} \\ Université de Bordeaux, France \\
% {\tt \href{mailto:ping@linuxaudio.org}{ping@linuxaudio.org}}
% }
% {\paperauthorB \,\sthanks{This guy is a very good fellow}}
% {\href{https://ccrma.stanford.edu}{CCRMA} \\ Stanford University, USA \\
% {\tt \href{mailto:lac@ccrma.stanford.edu}{lac@ccrma.stanford.edu}}
% }
% {\paperauthorC \,\sthanks{Illustrious contributor}}
% {\href{http://www.musikwissenschaft.uni-mainz.de/Musikinformatik/}{Johannes Gutenberg University (JGU)} \\  Mainz, Germany\\
% {\tt \href{mailto:lac@uni-mainz.de}{lac@uni-mainz.de}}
% }
%-------------------------------------THREE-AUTHOR HEADER ENDS------------------------------------------------------

%----------------FOUR-AUTHOR HEADER STARTS (uncomment below if your paper has four authors)-----------------------
% \fouraffiliations{
% \paperauthorA \,\sthanks{This work was supported by the XYZ Foundation}}
% {\href{https://scrime.u-bordeaux.fr}{SCRIME} \\ Université de Bordeaux, France \\
% {\tt \href{mailto:ping@linuxaudio.org}{ping@linuxaudio.org}}
% }
% {\paperauthorB \,\sthanks{This guy is a very good fellow}}
% {\href{https://ccrma.stanford.edu}{CCRMA} \\ Stanford University, USA \\
% {\tt \href{mailto:lac@ccrma.stanford.edu}{lac@ccrma.stanford.edu}}
% }
% {\paperauthorC \,\sthanks{Illustrious contributor}}
% {\href{http://www.musikwissenschaft.uni-mainz.de/Musikinformatik/}{Johannes Gutenberg University (JGU)} \\  Mainz, Germany\\
% {\tt \href{mailto:lac@uni-mainz.de}{lac@uni-mainz.de}}
% }
% {\paperauthorD \,\sthanks{Thanks to the predecessors for the templates}}
% {\href{https://c-base.org/}{C-Base} \\ Berlin, Germany \\
% {\tt \href{mailto:lac@c-base.com}{lac@c-base.com}}
% }
%-------------------------------------FOUR-AUTHOR HEADER ENDS------------------------------------------------------

\begin{document}

\maketitle

\begin{abstract}
\noindent Sampling drum kits well is a difficult and challenging task. Especially, building a drum kit sample bank with different velocity layers requires producing samples of very similar loudness, as changing the gain of a sample after recording makes it sound less natural. An approach that avoids this issue is to not categorize the samples in fixed groups but to simply calculate their loudness and then dynamically choose a sample, when a sample corresponding to e.g.\ a specific MIDI value is requested. We present a first investigation of algorithms doing this selection and discuss their advantages and disadvantages. The seemingly best candidate we implemented in DrumGizmo -- a FLOSS drum plugin -- and we do experiments on how our suggested algorithms perform on the samples drum kits.
\end{abstract}

\section{Introduction}
\todoandre{Talk about the general problem of sample selection.}
\todoandre{Limit scope to drums.}
\todoandre{Talk about round robin.}
\todoandre{Mention drawbacks.}
\todoandre{Introduce high-level ideas of our work.}
\todoandre{Make difference between humanization and sample selection clear.}

\subsection{Related Work}
\todo{I don't really know what to write, except about round robin. Is there any other common method or any academic literature? Are there other methods in open source programs?}
\todobent{Discuss DGs old sampling algorithm briefly.}

\subsection{Our Contribution}
\todoandre{The main points are: Identify important aspects that sampling algorithms in this setting have to fulfill; Suggest a new algorithm based on those requirements; Implement and conduct experiments on this implementation.}

\section{Preliminaries}
\todobent{Talk about how the drum kit samples are usually created; very briefly.}
\todobent{Talk about loudness computation of samples.}
\todo{Mathematical basics (if there are any important ones).}
\todo{Formalize the setting, i.e.\ what is the input/output of our algorithm?}
\todoandre{Make terminology and notation clear and check for consistency in the document.}

\subsection{Notation and Terminology}
We use the following notation throughout this article. An \emph{instrument} is considered to be one of the drums of the drum kit that we sampled. A \emph{sample} (denoted by $s, s', \dots$) is recording of one hit on a specific instrument. The \emph{power} of a sample (denoted by $p, p', \dots$) is the perceived loudness and can be expressed by any common loudness measure of an audio clip. With the term \emph{velocity} (denoted by $v, v', \dots$), we refer to the attack velocity of a MIDI note and it is thus between 0 and 127. We consider time in a discretized way and thus a \emph{time point} is an integer value intuitively referring to the number of time steps passed since the beginning of time. For a sample $s$, we refer with $t_s$ to the time point at which the sample was played last.

\section{Requirements}

% \todoandre{Intuitively discuss the requirements of a good sampling algorithm.}
We now discuss which requirements a good sampling algorithm intuitively has to fulfill. Such an algorithm has a tradeoff between two main objectives: choosing a sample which is close to the requested power value, while not choosing the same sample too close to the previous time it was used. Note that if we just want to be as close as possible to the requested power value, then we would always just choose the closest sample. However, if we now play a sequence of the same instrument at the same power level, then we play the same sample and thereby obtain a robotic sound. Thus, we want to find other samples that are not too far.

% \todoandre{List the requirements one by one and discuss them. Try to formalize them in some way.}
More concretely, we aim to fulfill the following requirements with our proposed algorithm.
\begin{description}
	\item[Close Sample:] The chosen sample should be reasonably close to the requested power value, such that the listener perceives it as being played at the same velocity.
	\item[Avoid Same Samples:] When we have multiple samples to choose from we should always take one that was last played far enough in the past to avoid a robotic sound.
	\item[Randomization:] Furthermore, to avoid patterns (like e.g. in round robin, where exactly every $n$th hit sounds the same when we have $n$ samples in our velocity group), we want some randomization.
	\item[Locality:] If two samples have a very similar power value, they should also be treated similarly by the algorithm. In other words, locally, samples should have almost the same probability of being chosen.
\end{description}

We now formalize the requirements stated above. Let $p, p'$ be two power levels. We define their dissimilarity to simply be their distance $\abs{p - p'}$. Thus, if $p$ is the input power value and $p'$ is the power value of the chosen sample, we want to minimize the above term. Let $s$ be a sample and $t_s$ the time point it was played last. When we are now queried for a sample at time $t'$, then for $s$ to be a good sample, we want $\abs{t_s - t'}$ to be reasonably high. Again, we just use the distance between the current time step and the last time step a sample was used. Randomization is difficult to formalize in a simple way in this context, thus, we simply require that for the same history, different outcomes of choosing a sample should be possible. The last requirement we also state in a more intuitive than formal way. Assume we are requested a sample for the power value $p$ and the two samples $s, s'$ have a very similar power value. Then, if we exchange $t_s$ and $t_{s'}$, the probability of choosing $s$ over $s'$ should be roughly the same as if we don't exchange them.

\section{Algorithm}
\todoandre{We have a multi-criteria optimization!}
\todoandre{Talk about the general idea of a potential function.}
\todoandre{Introduce the potential function of the algorithm.}
\todoandre{Talk about the single terms of the potential function.}
\todoandre{Maybe add some pseudo-code to make things easier to understand?}

\section{Implementation}
\todobent{Give a short introduction to DrumGizmo, including a link to the git repository.}
\todo{Talk about the timeline, i.e., when were the releases and what is still unreleased?}
\todoandre{Talk about how the sampling algorithm was implemented}
\todoandre{Add some of the source code to the paper?}
\todoandre{Give less important implementation details, e.g., like adaptive search starting from the most promising value}

\section{Experiments}
\todoandre{Talk about the setup.}
\todoandre{Talk about what the experiments should show: two close samples are chosen similarly often; playing the same MIDI note plays a reasonably varied sample set; average distance of one sample}
\todoandre{Experiments are: playing fast sweeps (with multiple hits per velocity); playing a single note over and over again at the same velocity; sound examples that people can listen to online?}
\todoandre{Do beautiful tables and plots here}
\todoandre{Summarize experiments}

\section{Conclusion and Future Work}
\todoandre{Recapitulate what was done in this paper. Highlight some of the difficulties and surprises.}
\todoandre{List future work: transforming the loudness space; refine the objective function; adapt algorithm to other instruments/settings; study to see what sounds good to people and do they actually hear the difference?}

\section{Acknowledgements}
\todo{Thank people for testing?}

%\newpage
\nocite{*}
\bibliographystyle{IEEEbib}
\bibliography{LAC-20} % requires file lac-20.bib

\end{document}