summaryrefslogtreecommitdiff
path: root/sampling_alg_lac2020/LAC-20.tex
blob: 77976b88672585c83bde1bc55659fe53e1c3ccd6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
% Template LaTeX file for LAC-20 papers
%
% To generate the correct references using BibTeX, run
%     latex, bibtex, latex, latex
% modified...
% - from DAFx-00 to DAFx-02 by Florian Keiler, 2002-07-08
% - from DAFx-02 to DAFx-03 by Gianpaolo Evangelista
% - from DAFx-05 to DAFx-06 by Vincent Verfaille, 2006-02-05
% - from DAFx-06 to DAFx-07 by Vincent Verfaille, 2007-01-05
%                          and Sylvain Marchand, 2007-01-31
% - from DAFx-07 to DAFx-08 by Henri Penttinen, 2007-12-12
%                          and Jyri Pakarinen 2008-01-28
% - from DAFx-08 to DAFx-09 by Giorgio Prandi, Fabio Antonacci 2008-10-03
% - from DAFx-09 to DAFx-10 by Hannes Pomberger 2010-02-01
% - from DAFx-10 to DAFx-12 by Jez Wells 2011
% - from DAFx-12 to DAFx-14 by Sascha Disch 2013
% - from DAFx-15 to DAFx-16 by Pavel Rajmic 2015
% - from DAFx-16 to IFC-18 by Romain Michon 2018
% - from IFC-18 to LAC-19 by Romain Michon 2019
% - from LAC-19 to LAC-20 by Jean-Michaël Celerier 2020
%
% Template with hyper-references (links) active after conversion to pdf
% (with the distiller) or if compiled with pdflatex.
%
% 20060205: added package 'hypcap' to correct hyperlinks to figures and tables
%                      use of \papertitle and \paperauthorA, etc for same title in PDF and Metadata
%
% 1) Please compile using lualatex, latex or pdflatex.
% 2) If using pdflatex, you need your figures in a file format other than eps! e.g. png or jpg is working
% 3) Please use "papertitle" and "pdfauthor" definitions below

%------------------------------------------------------------------------------------------
%  !  !  !  !  !  !  !  !  !  !  !  ! user defined variables  !  !  !  !  !  !  !  !  !  !  !  !  !  !
% Please use these commands to define title and author(s) of the paper:
\def\papertitle{On Sampling Algorithms for Drums}
% \def\paperauthorA{André Nusser}
% \def\paperauthorB{Bent Bisballe Nyeng}
\def\paperauthorA{}
\def\paperauthorB{}
\def\paperauthorC{}
\def\paperauthorD{}

% Authors' affiliations have to be set below

%------------------------------------------------------------------------------------------
\documentclass[twoside,a4paper]{article}
\usepackage{LAC-20}
\usepackage{amsmath,amssymb,amsfonts,amsthm}
\usepackage{euscript}
\usepackage{ifpdf}
\usepackage{ifluatex}
\usepackage{ifxetex}

\usepackage{color}
\usepackage{listings}
\definecolor{mygrey}{rgb}{0.96,0.96,0.96}
\lstset{
  tabsize=4,
  basicstyle=\ttfamily,
  backgroundcolor=\color{mygrey},
  captionpos=b,
  breaklines=true
}

\usepackage[english]{babel}
\usepackage{caption}
\usepackage{subfig, color}
\setcounter{page}{1}
\ninept

\usepackage{times}
% pdf-tex settings: detect automatically if run by latex or pdflatex
\ifluatex
  \usepackage[
    pdftitle={\papertitle},
    pdfauthor={\paperauthorA, \paperauthorB, \paperauthorC, \paperauthorD},
    colorlinks=false, % links are activated as colror boxes instead of color text
    bookmarksnumbered, % use section numbers with bookmarks
    pdfstartview=XYZ % start with zoom=100% instead of full screen; especially useful if working with a big screen :-)
  ]{hyperref}
  
  \edef\pdfcompresslevel{\pdfvariable compresslevel}
  \pdfcompresslevel=9
  \usepackage{graphicx}
  
  \usepackage[figure,table]{hypcap}
  \usepackage{fontspec}
\else
  \ifxetex
    \usepackage[
      pdftitle={\papertitle},
      pdfauthor={\paperauthorA, \paperauthorB, \paperauthorC, \paperauthorD},
      colorlinks=false, % links are activated as colror boxes instead of color text
      bookmarksnumbered, % use section numbers with bookmarks
      pdfstartview=XYZ % start with zoom=100% instead of full screen; especially useful if working with a big screen :-)
    ]{hyperref}
    
    \pdfcompresslevel=9
    \usepackage{graphicx}
    
    \usepackage[figure,table]{hypcap}
    \usepackage{fontspec}
  \else
    \usepackage[utf8]{inputenc}
    \usepackage[T1]{fontenc}
    \ifpdf % compiling with pdflatex
      \usepackage[pdftex,
        pdftitle={\papertitle},
        pdfauthor={\paperauthorA, \paperauthorB, \paperauthorC, \paperauthorD},
        colorlinks=false, % links are activated as colror boxes instead of color text
        bookmarksnumbered, % use section numbers with bookmarks
        pdfstartview=XYZ % start with zoom=100% instead of full screen; especially useful if working with a big screen :-)
      ]{hyperref}
      \pdfcompresslevel=9
      \usepackage[pdftex]{graphicx}
      \usepackage[figure,table]{hypcap}
      \DeclareGraphicsExtensions{.png,.jpg,.pdf}
    \else % compiling with latex
      \usepackage[dvips]{epsfig,graphicx}
      \usepackage[dvips,
        colorlinks=false, % no color links
        bookmarksnumbered, % use section numbers with bookmarks
        pdfstartview=XYZ % start with zoom=100% instead of full screen
      ]{hyperref}
      % hyperrefs are active in the pdf file after conversion
      \usepackage[figure,table]{hypcap}
      \DeclareGraphicsExtensions{.eps}
    \fi
  \fi
\fi

% ====================================
% OWN PACKAGES

% For utf8 encoding
\usepackage[utf8]{inputenc}

% Use for automatic line breaks in tabular (column type X)
\usepackage{tabularx}
% For abbreviations using \newcommand
\usepackage{xspace}
% For the >{...} in tabular
\usepackage{array}
% For t o d o notes. Make the background white to make them not that distracting.
% \usepackage[textwidth=3.5cm,color=white]{todonotes}
% For better cite commands
\usepackage[numbers]{natbib}
% math stuff
\usepackage{amsmath,amsthm,amssymb}
\usepackage{mathtools}
\usepackage{nicefrac}
\usepackage{csquotes}
% Comment in the next line to draw frames around all the layout boxes for checking where they are violated.
%\usepackage{showframe}
\usepackage{algorithm}
\usepackage{algpseudocode}
\renewcommand{\algorithmicrequire}{\textbf{Input:}}
\renewcommand{\algorithmicensure}{\textbf{Output:}}

% nice theorem and proof environments. taken from Ema's template
\theoremstyle{plain}
\newtheorem{theorem}{Theorem}
\newtheorem{proposition}{Proposition}
\newtheorem{lemma}{Lemma}
\newtheorem{corollary}{Corollary}
\newtheorem{claim}{Claim}
\newtheorem{fact}{Fact}

\theoremstyle{definition}
\newtheorem{definition}{Definition}
\newtheorem{example}{Example}

\theoremstyle{remark}
\newtheorem{remark}{Remark}
\newtheorem{observation}{Observation}
\newtheorem{conjecture}{Conjecture}

% Handy abbreviations
\newcommand{\whp}[0]{w.h.p.\xspace}
\newcommand{\ie}[0]{i.e.\xspace}
\newcommand{\wrt}[0]{w.r.t.\xspace}

% \abs and \norm hacks
\DeclarePairedDelimiter\abs{\lvert}{\rvert}
\makeatletter
\let\oldabs\abs
\def\abs{\@ifstar{\oldabs}{\oldabs*}}
\let\oldnorm\norm
\def\norm{\@ifstar{\oldnorm}{\oldnorm*}}
\makeatother

% TODO notes
\newcommand{\todo}[1]{\textcolor{red}{\textbf{TODO:} #1}}
\newcommand{\todoandre}[1]{\textcolor{green}{\textbf{TODO (André):} #1}}
\newcommand{\todobent}[1]{\textcolor{blue}{\textbf{TODO (Bent):} #1}}

% ugly hack
\renewcommand{\paragraph}[1]{\textbf{#1} }

% ====================================

\title{\papertitle}

%-------------SINGLE-AUTHOR HEADER STARTS (uncomment below if your paper has a single author)-----------------------
% \affiliation{
% \paperauthorA \,\sthanks{This work was supported by the XYZ Foundation}}
% {\href{https://scrime.u-bordeaux.fr}{SCRIME} \\ Université de Bordeaux, France \\
% {\tt \href{mailto:ping@linuxaudio.org}{ping@linuxaudio.org}}
% }
%-----------------------------------SINGLE-AUTHOR HEADER ENDS------------------------------------------------------

%---------------TWO-AUTHOR HEADER STARTS (uncomment below if your paper has two authors)-----------------------
% \twoaffiliations{
% \paperauthorA % \, \sthanks{This work was supported by the XYZ Foundation}
% }
% {\href{https://drumgizmo.org}{DrumGizmo} \\ Saarbr\"ucken, Germany \\
% {\tt \href{mailto:andre.nusser@gmail.com}{andre.nusser@gmail.com}}
% }
% {\paperauthorB % \,\sthanks{This guy is a very good fellow}
% }
% {\href{https://drumgizmo.org}{DrumGizmo} \\ Aarhus, Denmark \\
% {\tt \href{mailto:deva@aasimon.org}{deva@aasimon.org}}
% }
%-------------------------------------TWO-AUTHOR HEADER ENDS------------------------------------------------------

%---------------THREE-AUTHOR HEADER STARTS (uncomment below if your paper has three authors)-----------------------
% \threeaffiliations{
% \paperauthorA \,\sthanks{This work was supported by the XYZ Foundation}}
% {\href{https://scrime.u-bordeaux.fr}{SCRIME} \\ Université de Bordeaux, France \\
% {\tt \href{mailto:ping@linuxaudio.org}{ping@linuxaudio.org}}
% }
% {\paperauthorB \,\sthanks{This guy is a very good fellow}}
% {\href{https://ccrma.stanford.edu}{CCRMA} \\ Stanford University, USA \\
% {\tt \href{mailto:lac@ccrma.stanford.edu}{lac@ccrma.stanford.edu}}
% }
% {\paperauthorC \,\sthanks{Illustrious contributor}}
% {\href{http://www.musikwissenschaft.uni-mainz.de/Musikinformatik/}{Johannes Gutenberg University (JGU)} \\  Mainz, Germany\\
% {\tt \href{mailto:lac@uni-mainz.de}{lac@uni-mainz.de}}
% }
%-------------------------------------THREE-AUTHOR HEADER ENDS------------------------------------------------------

%----------------FOUR-AUTHOR HEADER STARTS (uncomment below if your paper has four authors)-----------------------
% \fouraffiliations{
% \paperauthorA \,\sthanks{This work was supported by the XYZ Foundation}}
% {\href{https://scrime.u-bordeaux.fr}{SCRIME} \\ Université de Bordeaux, France \\
% {\tt \href{mailto:ping@linuxaudio.org}{ping@linuxaudio.org}}
% }
% {\paperauthorB \,\sthanks{This guy is a very good fellow}}
% {\href{https://ccrma.stanford.edu}{CCRMA} \\ Stanford University, USA \\
% {\tt \href{mailto:lac@ccrma.stanford.edu}{lac@ccrma.stanford.edu}}
% }
% {\paperauthorC \,\sthanks{Illustrious contributor}}
% {\href{http://www.musikwissenschaft.uni-mainz.de/Musikinformatik/}{Johannes Gutenberg University (JGU)} \\  Mainz, Germany\\
% {\tt \href{mailto:lac@uni-mainz.de}{lac@uni-mainz.de}}
% }
% {\paperauthorD \,\sthanks{Thanks to the predecessors for the templates}}
% {\href{https://c-base.org/}{C-Base} \\ Berlin, Germany \\
% {\tt \href{mailto:lac@c-base.com}{lac@c-base.com}}
% }
%-------------------------------------FOUR-AUTHOR HEADER ENDS------------------------------------------------------

\begin{document}

\maketitle

\begin{abstract}
\noindent Sampling drum kits well is a difficult and challenging task. Especially, building a drum kit sample bank with different velocity layers requires producing samples of very similar loudness, as changing the gain of a sample after recording makes it sound less natural. An approach that avoids this issue is to not categorize the samples in fixed groups but to simply calculate their loudness and then dynamically choose a sample, when a sample corresponding to e.g.\ a specific MIDI value is requested. We present a first investigation of algorithms doing this selection and discuss their advantages and disadvantages. The seemingly best candidate we implemented in DrumGizmo -- a FLOSS drum plugin -- and we do experiments on how our suggested algorithms perform on the samples drum kits.
\end{abstract}

\section{Introduction} \label{sec:introduction}
\todoandre{Talk about the general problem of sample selection.}
\todoandre{Limit scope to drums.}
\todoandre{Talk about round robin.}
\todoandre{Mention drawbacks.}
\todoandre{Introduce high-level ideas of our work.}
\todoandre{Make difference between humanization and sample selection clear.}
\todo{Motivation: Make sure we have the following covered: Randomized samples,
 Prevent repetitions, Sample coverage (use all samples in the set).}

\subsection{Related Work}
\todo{I don't really know what to write, except about round robin. Is there any other common method or any academic literature? Are there other methods in open source programs?}
%\todobent{Discuss DGs old sampling algorithm briefly.}
The engine gets a value $l \in [0,1]$ which must then be used by the
engine for deciding how the output should be produced.
Some engines use this value as a gain factor but in the case of
DrumGizmo it is used for sample selection only.
The early versions used a sample selection algorithm based on velocity
groups, akin to the one used by sfz \todo{add bib reference}, in which
each group spans a specfied velocity range and the sample selection is
made by uniformly randomly selecting one of the samples contained in
the group corresponding to the input velocity:
{\tiny\begin{verbatim}
                   ________________                _________________
                  /                \              / uniformly random \
-- input note --> | group selector | -- group --> | sample selector  | -- sample -->
     [0; 1]       \________________/              \__________________/
\end{verbatim}}

This algorithm did not give good results in small samplesets so later
an improved algorithm was introduced which was instead based on normal
distributed random numbers and with power values for each sample in
the set.

A prerequisite for this new algorithm is that the power of each sample is
stored along with the sample data of each sample.

The power values of a drum kit are floating point numbers without any
restrictions but assumed to be positive. Then the input value
$l$ is mapped using the canonical bijections between $[0,1]$ and
$[p_{\min}, p_{max}]$ and afterwards shifted\todo{by which
amount?}. We call this new value $p$.

Now the real sample selection algorithm starts. We select a value $p'$
drawn normal distributed at random from $\mathcal{N}(p', \sigma^2)$,
where the mean value, $\mu$, is set to the input value $l$ and
and the stddev, $\sigma$, is a parameter controlled by the user
expressed in fractions of the size and span of the sampleset.
Now we simply find the sample $s$ with the power $q$ which is closest
to $p'$ -- ties are broken such that the first minimal value is chosen
(which is problematic as explained below). In case $s$ is equal to the
last sample that we played we repeat this process, otherwise we return
$s$. If we did not find another sample than the last played after $4$
iterations, we just return the last played sample.

{\tiny\begin{verbatim}
           stddev
                 \ ________                _________________
                  /        \              /  nearest power  \
-- input note --> | random | -- power --> | sample selector | -- sample -->
     [0; 1]       \________/              \_________________/
                 /
             mean
\end{verbatim}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\subsection{Our Contribution}
To the best of our knowledge, this is the first academic article that deals with the issue of selecting samples from a set of samples with \enquote{continuous power values}. To this end, we first identify important aspects that sampling algorithms in this setting have to fulfill. After we formulate those requirements and formalize them to some degree, we present our algorithm based on those requirements, which is based on computation of a multi-criteria objective function. Consequently, we give an overview over an implementation of this approach and then conduct experiments to evaluate the actual quality. As reference implementation, we use the old method by DrumGizmo -- an open source drum machine.

\section{Preliminaries}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%\todobent{Talk about how the drum kit samples are usually created; very briefly.}
Drum samples are cut from recordings of drums with multiple
microphones.
Each hit on a drum must be distinguishable from the others and can
therefore not overlap in time.
due to the multiple microphones used for the recording, each sample
spans multiple channels. Due to the speed of sound in air and the
distance from each drum towards each of the micorphones used the
initial sample position in each of the channels will not be at the
same place - the channel which are the closest to the sound source of
a particular instrument is the \textit{main channel} of that instrument:
{\tiny\begin{verbatim}
Main channel
|    .   .-.                                                    .
|    .  /   \         .-.                                       .
|    . /     \       /   \       .-.     .-.     _   _          .
+----./-------\-----/-----\-----/---\---/---\---/-\-/-\/\/------.---
|    .         \   /       \   /     '-'     '-'                .
|    .          '-'         '-'                                 .
Secondary channel                                               .
|    .     .-.                                                  .
|    .    /   \         .-.                                     .
|    .   /     \       /   \       .-.     .-.     _   _        .
+----.--/-------\-----/-----\-----/---\---/---\---/-\-/-\/\/----.---
|    .           \   /       \   /     '-'     '-'              .
|    .            '-'         '-'                               .
Secondary channel                                               .
|    .         .-.                                              .
|    .        /   \         .-.                                 .
|    .       /     \       /   \       .-.     .-.     _   _    .
+----.------/-------\-----/-----\-----/---\---/---\---/-\-/-\/\/.---
|    .               \   /       \   /     '-'     '-'          .
|    .                '-'         '-'                           .

Sample offset                                               Sample end
\end{verbatim}}

%\todobent{Talk about loudness computation of samples.}
A sample has a \textit{stroke power} which is the physical power used
by the drummer when making that particular hit.
Since this is not something that can be easily measured each sample
power is instead calculated as the power of the signal in an initial
attack period of the audio of the main channel:
\[
power(s, n) = \sum\limits_{i=0}^n s[i]^2
\]
, where $n$ is defined on a per instrument basis and will vary from
instrument to instrument and $s[i]$ is the ith sample of the main
channel.

Since the power are simply calculated sums of squares they can be used
for comparing one sample to another and ultimately be used for mapping
a midi velocity to a matching sample.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% \todo{Mathematical basics (if there are any important ones).}

% \todo{Formalize the setting, i.e.\ what is the input/output of our algorithm?}
\subsection{Setting}
We now describe the setting in which we want to choose the samples. We are given:
\begin{itemize}
	\item a drum kit consisting of a set of instruments $I$
	\item for each instrument $i \in I$, we are given an input sample set $S_i$
	\item each sample $s \in S_i$ is already labeled with a power value $p_s \in \mathbb{R}^+$
\end{itemize}
After reading the drum kit, requests of the form $(i, p) \in I \times \mathbb{R}^+$ arrive. We want to answer these requests by choosing the best sample from $S_i$ for the power value $p$.

% \todoandre{Make terminology and notation clear and check for consistency in the document.}
\subsection{Notation and Terminology}
We use the following notation throughout this article. An \emph{instrument} is considered to be one of the drums of the drum kit that we sampled. A \emph{sample} (denoted by $s, s', \dots$) is recording of one hit on a specific instrument. Given a sample $s$, the \emph{power} of it (denoted by $p_s, p_s', \dots$) is the perceived loudness and can be expressed by any common loudness measure of an audio clip. If a power value is requested and does not correspond to a sample, we denote it by $p, p', \dots$. With the term \emph{velocity} (denoted by $v, v', \dots$), we refer to the attack velocity of a MIDI note and it is thus between 0 and 127. We consider time in a discretized way and thus a \emph{time point} is an integer value intuitively referring to the number of time steps passed since the beginning of time. For a sample $s$, we refer with $t_s$ to the time point at which the sample was played last.

\section{Requirements} \label{sec:requirements}

% \todoandre{Intuitively discuss the requirements of a good sampling algorithm.}
We now discuss which requirements a good sampling algorithm intuitively has to fulfill. Such an algorithm has a tradeoff between two main objectives: choosing a sample which is close to the requested power value, while not choosing the same sample too close to the previous time it was used. Note that if we just want to be as close as possible to the requested power value, then we would always just choose the closest sample. However, if we now play a sequence of the same instrument at the same power level, then we play the same sample and thereby obtain a robotic sound. Thus, we want to find other samples that are not too far.

% \todoandre{List the requirements one by one and discuss them. Try to formalize them in some way.}
More concretely, we aim to fulfill the following requirements with our proposed algorithm.
\begin{description}
	\item[Close Sample:] The chosen sample should be reasonably close to the requested power value, such that the listener perceives it as being played at the same velocity.
	\item[Avoid Same Samples:] When we have multiple samples to choose from we should always take one that was last played far enough in the past to avoid a robotic sound.
	\item[Randomization:] Furthermore, to avoid patterns (like e.g. in round robin, where exactly every $n$th hit sounds the same when we have $n$ samples in our velocity group), we want some randomization.
	\item[Locality:] If two samples have a very similar power value, they should also be treated similarly by the algorithm. In other words, locally, samples should have almost the same probability of being chosen.
\end{description}

We now formalize the requirements stated above. Let $p, p'$ be two power levels. We define their dissimilarity to simply be their distance $\abs{p - p'}$. Thus, if $p$ is the input power value and $p'$ is the power value of the chosen sample, we want to minimize the above term. Let $s$ be a sample and $t_s$ the time point it was played last. When we are now queried for a sample at time $t$, then for $s$ to be a good sample, we want $t - t_s$ to be reasonably high. Again, we just use the distance between the current time step and the last time step a sample was used. Randomization is difficult to formalize in a simple way in this context, thus, we simply require that for the same history, different outcomes of choosing a sample should be possible. The last requirement we also state in a more intuitive than formal way. Assume we are requested a sample for the power value $p$ and the two samples $s, s'$ have a very similar power value. Then, if we exchange $t_s$ and $t_{s'}$, the probability of choosing $s$ over $s'$ should be roughly the same as if we do not exchange them.

\section{Algorithm}
Before we start describing the approach that we decided for, let us first discuss other approaches that might seem promising at this point of the discussion. \todoandre{Also quickly discuss other alternative models that are following naturally from above observations; but wait until Bent wrote the related work part to make sure to be consistent with that.}

% \todoandre{We have a multi-criteria optimization!}
The requirements mentioned in Section \ref{sec:requirements} consist of several different objectives. Thus, we are dealing with a multi-objective optimization, where we have to somehow combine the different objectives into one. As we are dealing with an interactive setting where the quality of the solution of the optimization is determined by the user, it seems natural to make the algorithm parametrized and expose the parameters to the user. Using these parameters, the user can influence how the different objectives are weighted and thereby influence the final result, just having a very rough intuitive and non-technical understanding of the process.

% \todoandre{Talk about the general idea of a potential function.}
Following from the above description, we choose a single objective function that we optimize. This objective function consists of the terms described in Section \ref{sec:requirements} as well as the parameters that are factors in front of the requirements terms.  We formulate our objective function such that smaller values are better and thus we end up with a minimization problem. Consequently, we just have to evaluate a single term on each sample and then pick the one which has the best value.

% \todoandre{Introduce the potential function of the algorithm.}
We now give the objective function. Let $p$ be the power that is requested. As before, for any sample $s$, let $t_s$ be the time at which $s$ was played last (the unit does not matter as it is parametrized by $\beta$ anyway), and let $r(s,t)$ be a random number generator producing numbers in the range $[0,1]$ uniformly and independently at random. Let $\alpha, \beta, \gamma > 0$ be the parameters that are later exposed to the user of the sample algorithm. Also, recall that $p_{\min}, p_{\max}$ is the minimal and maximal power value, respectively, and that $S$ is the sample rate, i.e., the number of time steps per second. At the current time $t$, we now want to find the sample $s$ minimizing the objective function
\[
	f(s, t) \coloneqq \alpha \cdot \left( \frac{p-p_s}{p_{\max} - p_{\min}}\right)^2 + \beta \cdot \left( 1 + \frac{t - t_s}{S}\right)^{-1} + \gamma \cdot r(s,t).
\]
Note that we have to ensure $t_s \neq t$ to avoid division by zero.

% \todoandre{Talk about the single terms of the potential function.}
Let us now consider the objective function in more detail. The objective function consists of three summands and we will have a closer look at them in order.

The first summand, namely
\[
	\alpha \cdot \left( \frac{p-p_s}{p_{\max} - p_{\min}}\right)^2,
\]
is for expressing the distance of the sample's power value $p_s$ to the desired power value $p$. Instead of using the absolute value $\abs{p-p_s}$ as discussed in Section \ref{sec:requirements}, we first normalize the value to be in the range $[0,1]$ and then square it. By squaring, we put a significantly stronger penalty than the absolute value on samples whose power value is very far from the requested power value. Note that if we request a power value that lies in the middle of the power values of our sample set, then this term will maximally be around $\frac{1}{4}\alpha$. However, if $p = p_{\max}$ or $p = p_{\min}$, then we might obtain a value of $\alpha$. While this might seem unreasonable at first glance, we want to highlight that this indeed matches the desired behavior, because the penalty should be only dependent on the distance to the sample and not the possible worst case value it can attain. In other words, a bad choice should not be make to look better, if there are much worse choices possible.

The second summand, namely
\[
	\beta \cdot \left( 1 + \frac{t - t_s}{S}\right)^{-1},
\]
expresses how much time passed since we last played the sample for which we are currently evaluating the objective function. However, we want large values if little time passes and therefore we raise the whole term to the power of $-1$. To avoid extreme values, we add 1 to the normalized distance of the current time and the last time the sample was played. Note that if we would not have a \enquote{+1}, then for $t - t_s$ being very small, the values of this term would be huge and thus dominate the whole objective function. We normalize by the sample rate, as we want the time distance to be in seconds and not in samples.

The third summand, namely
\[
	\gamma \cdot r(s,t),
\]
just adds some noise to the process to make it non-deterministic and thus avoid patterns in the selection as mentioned in Section \ref{sec:requirements}.

% \todoandre{Maybe add some pseudo-code to make things easier to understand?}
We already explained the core part of the sample selection algorithm. The remainder is now straight-forward. We simply evaluate the objective function for each sample and then pick the one with the smallest value. For completeness, Algorithm \ref{alg:sampling} shows the pseudo code for the sample selection algorithm.
\begin{algorithm}
\begin{algorithmic}
	\Require Requested power $p$, Instrument $i$, current time step $t$, parameters $\alpha, \beta, \gamma$, and array $\mathit{last}$ with the time points a sample has been played last
	\Ensure Sample $s$
	\State $s = \text{undefined}$
	\State $f_{\min} = \infty$
	\For{$s' \in S_i$}
		\State $v \gets \alpha \cdot \left( \frac{p-p_{s'}}{p_{\max} - p_{\min}}\right)^2 + \beta \cdot \left( 1 + \frac{t - \mathit{last}[s']}{S}\right)^{-1} + \gamma \cdot r(s',t)$
		\If{$v < f_{\min}$}
			\State $f_{\min} \gets v$
			\State $s \gets s'$
		\EndIf
	\EndFor
	\State $\mathit{last}[s] = t$
	\State \Return $s$
\end{algorithmic}
\caption{This is the high-level code of the sampling function.}
\label{alg:sampling}
\end{algorithm}

Note that the worst-case complexity of evaluating the objective function is linear in the number of samples for the instrument that we are considering. However, in practice we can avoid evaluation for most samples by simply starting with the \enquote{most promising} sample and recursively evaluate the neighbors until the future possible evaluations cannot beat the currently best value.

\section{Emulating Other Sample Selection Algorithms}
One of the main advantages of the described sampling algorithm is that it can emulate the most common sample choice algorithms.
Sometimes this can be done with just adjusting the parameters $\alpha, \beta, \gamma$, and sometimes we have to prepare the power values of the drum kit accordingly.
In the following we describe which algorithms can be emulated and how we have to set the parameters and power values for that.

First, note that all extreme choices of the parameters -- meaning that we set one parameter of $\alpha, \beta, \gamma$ to a positive value and all other to zero -- emulate different selection algorithms.

\paragraph{Choose Closest.} If we set $\alpha > 0$ and $\beta = \gamma = 0$, then the objective function reduces to the first summand and thus we just alway choose the sample $s$ that minimizes $\abs{p - p_s}$, i.e., the closest sample.

\paragraph{Choose Oldest.} Similarly, if $\beta > 0$ but $\alpha = \gamma = 0$, then the objective function reduces to the second summand and thus is minimal by the sample $s$ that maximizes $t-t_s$, i.e., the oldest sample.

\paragraph{Random Selection.} If now $\gamma > 0$ and $\alpha = \beta = 0$, then the objective function reduces to the third summand and we thus always select a sample uniformly at random.

\paragraph{Round Robin.} The previously mentioned emulations were straight forward, however, the arguably most commonly used sample selection algorithm in practice is round robin. As already discussed in Section \ref{sec:introduction}, round robin assumes the samples to already be grouped. In our case this means that samples $s_1, \dots,s_k$ that belong to the same velocity group should all have the same power value, i.e., $p_{s_1} = \cdots = p_{s_k}$. Now if there is a query with power value $p$, we want to always choose the closest group of samples, thus $\alpha$ should be huge. After restricting to the samples of a specific group, we now always want to play the oldest sample, thus we simply want $\beta > 0$. If we additionally want to randomize round robin in a way that we sometimes choose the second or third oldest sample, then we want to set $\gamma$ to a small value greater than zero. \todoandre{improve last few sentences.}

\section{Implementation} \label{sec:implementation}
%\todobent{Give a short introduction to DrumGizmo, including a link to the git repository.}
DrumGizmo is an open source, cross-platform, drum sample engine and
audio plugin aiming to give the best possible sound from the least
effort when doing drum programming.
The source-code is available though git at:\\
\verb|git://git.drumgizmo.org/drumgizmo.git|\\
and the source code can be browsed online here:\\
\verb|http://cgit.drumgizmo.org/drumgizmo.git/|

\todo{Talk about the timeline, i.e., when were the releases and what is still unreleased?}

% \todoandre{Talk about how the sampling algorithm was implemented. What do we need to store?}
The sampling algorithm itself did not require any particular implementation efforts. Most of the time was spent on the theoretical part of it. To give a better overview over the technicalities, we briefly list the information that needs to be stored. In a preprocessing phase, we compute $p_{\min}$ and $p_{\max}$ for each instrument and set all values of the $\mathit{last}$ arrays to 0. The power values of the samples are given by the drum kit in DrumGizmo. The parameters $\alpha, \beta, \gamma$ have default values that were determined experimentally. Each of them can be changed by the user, either interactively in the GUI or via the command line interface.

% \todoandre{Add some of the source code to the paper?}
As DrumGizmo is free software, the exact details of the implementation can be checked by everyone.

% \todoandre{Give less important implementation details, e.g., like adaptive search starting from the most promising value}
For instruments with reasonably small sample sets, simply iterating over all samples for the specific instrument as shown in Algorithm \ref{alg:sampling} is sufficiently performant. However, imagine an instrument with an extremenly large sample set. As DrumGizmo drum kits can be created by everyone, there is no restriction and we cannot assume a small sample size. To avoid performance issues arising from such a scenario, we employ a non-naive search by starting with the \enquote{most promising} sample and then inspecting its neighbors until the currently best sample is known to dominate the remaining samples. More concretely, we do the following: we start with the sample $s$ that has the closest power value $p_s$ to the requested power value $p$, i.e., we find $s$ that minimizes $\abs{p - p_s}$. The key observation why a local search often suffices is that we can lower bound the second and third summand of the objective function by 0. Thus, for a given sample $s$ and a time point $t$, we have
\begin{equation}\label{eq:lower_bound}
	f(s,t) \geq \alpha \cdot \left( \frac{p-p_s}{p_{\max} - p_{\min}}\right)^2.
\end{equation}
Assume now that, for some $x > 0$, we evaluated all samples $s$ with $p-x \leq p_s \leq p+x$. Then we know, by Equation \ref{eq:lower_bound}, that for all samples $s'$ with power values outside the range $[p-x, p+x]$ it holds that
\[
	f(s',t) > \alpha \cdot \left( \frac{x}{p_{\max} - p_{\min}}\right)^2.
\]
Note that by traversing the samples in order of their distance to $p$ (which is possible by having the samples stored in an array sorted increasingly by power value), we can stop the search as soon we searched the range $[p-x, p+x]$ for which
\[
	f_{\min} < \alpha \cdot \left( \frac{x}{p_{\max} - p_{\min}}\right)^2.
\]

\section{Experiments}
\todoandre{Talk about the setup.}
To conduct the experiments, we use the implementation of the new sampling algorithm in DrumGizmo.
As a base-line for comparison, we use the previous sample selection algorithm of DrumGizmo.
We want to evaluate how the sample selection algorithm performs in practice, therefore we use the drum kits of DrumGizmo. In particular, we use the power value distribution of the samples of those kits.
We do experiments on three different drum kits.
As normally the snare drum is the instrument with the highest number of samples, we choose this drum for our experiments.
See Table \ref{tab:drumkit_data} for some information about the samples of the drum kits and see Figure \ref{fig:power_level_distribution} for a visualization of the power level distribution of the kits.

\begin{table}
\caption{Drum kit data.}
\label{tab:drumkit_data}
\centering
\begin{tabular}{|l||rr|}
\hline
name & number of snare samples & \\
\hline
DRS kit & 234 & \\
other kit & 234 & \\
and other kit & 234 & \\
\hline
\end{tabular}
\end{table}

\begin{figure}
	\includegraphics[width=.8\linewidth]{figures/power_level_distribution.pdf}
	\caption{This shows the power value distribution of the different drum kits.}
	\label{fig:power_level_distribution}
\end{figure}

% \todoandre{Talk about what the experiments should show: two close samples are chosen similarly often; playing the same MIDI note plays a reasonably varied sample set; average distance of one sample}
We want to test the following hypotheses with our experiments:
\begin{enumerate}
	\item Two samples with similar power values are chosen similarly often.
	\item Playing the same MIDI note over and over again plays a reasonably varied set of samples.
	\item Average distance of one sample \todo{what did I want to say with that?}
\end{enumerate}
% \todoandre{Experiments are: playing fast sweeps (with multiple hits per velocity); playing a single note over and over again at the same velocity; sound examples that people can listen to online?}
To test the above hypotheses, we conduct the following experiments:
\begin{enumerate}
	\item Playing fast sweeps from MIDI velocity 0 to MIDI velocity 127, 8 times
	\item Playing a single velocity fast for 1015 times
	% \item Audio recordings of the processes
\end{enumerate}

\todoandre{Do beautiful tables and plots here}
To be able to compare the results of those experiments, we have to somehow visualize the selections. We choose to plot histograms. More concretely, for a run of the experiment, we plot the number of times a sample was selected over its index in the array of ordered samples. For the first experiment, i.e., the sweeps, you can see the histograms in Figure \ref{fig:sweep}. \todo{describe and explain plot}

\begin{figure*}
	\includegraphics[width=.47\linewidth]{figures/sweep_old.pdf}
	\hfill
	\includegraphics[width=.47\linewidth]{figures/sweep_new.pdf}
	\caption{The histogram of the old algorithm (left) and the new algorithm (right) of playing fast sweeps from MIDI velocity 0 to MIDI velocity 127, 8 times.}
	\label{fig:sweep}
\end{figure*}

The second experiment we conducted for two different MIDI velocities: MIDI velocity 80 (Figure \ref{fig:80}) and MIDI velocity 112 (Figure \ref{fig:112}). \todo{describe and explain plot}

\begin{figure*}
	\includegraphics[width=.47\linewidth]{figures/80_old.pdf}
	\hfill
	\includegraphics[width=.47\linewidth]{figures/80_new.pdf}
	\caption{The histogram of the old algorithm (left) and the new algorithm (right) of playing MIDI velocity 80 fast for 1015 times.}
	\label{fig:80}
\end{figure*}

\begin{figure*}
	\includegraphics[width=.47\linewidth]{figures/112_old.pdf}
	\hfill
	\includegraphics[width=.47\linewidth]{figures/112_new.pdf}
	\caption{The histogram of the old algorithm (left) and the new algorithm (right) of playing MIDI velocity 112 fast for 1015 times.}
	\label{fig:112}
\end{figure*}

% \todoandre{Also do an experiment regarding the adaptive search}
To also get an idea of the performance of the new sampling algorithm, we want to see how many power values of samples are evaluated per query.
Without the smart search optimization described at the end of Section \ref{sec:implementation}, this number would always be the number of samples. However, we expect that the smart search optimization significantly reduces the number of evaluations. To test this hypothesis, we take the above experiment and look at the number of evaluations. You can see the histogram in Figure \ref{fig:evaluations_histogram}. \todo{fix}

\begin{figure}
	% \includegraphics[width=.8\linewidth]{figures/evaluations_histogram.pdf}
	\caption{This plot shows the histogram of the number of evaluations of power values for the queries of experiment bla.\todo{insert correct information}}
	\label{fig:evaluations_histogram}
\end{figure}

\todoandre{Summarize experiments}

\section{Conclusion and Future Work}
\todoandre{Recapitulate what was done in this paper. Highlight some of the difficulties and surprises.}
\todoandre{List future work: transforming the loudness space; refine the objective function; adapt algorithm to other instruments/settings; study to see what sounds good to people and do they actually hear the difference?}
\todoandre{Change diversity parameter. Add a bound on the deviation from the requested power value to definitely avoid extrem outliers.}

% \section{Acknowledgements}
% \todo{Thank people for testing?}
% \todo{Thank people for proof-reading?}

%\newpage
\nocite{*}
\bibliographystyle{IEEEbib}
\bibliography{LAC-20} % requires file lac-20.bib

\end{document}