ref: 4ff85f47015d72f28f9eeef85a4c8c7222300891
parent: 5bafb67083c07394616c04e67dab86a9ca40d698
author: qwx <[email protected]>
date: Sun Jan 29 00:41:47 EST 2023
add plan9-friendly native version via npe work on pcm audio only; use pcmconv(1) for resampling
--- /dev/null
+++ b/mkfile
@@ -1,0 +1,17 @@
+</$objtype/mkfile
+TARG=stretch
+BIN=/$objtype/bin/audio
+
+OFILES=\
+ this.$O\
+ stretch.$O\
+
+HFILES=\
+ stretch.h\
+
+default:V: all
+
+</sys/src/cmd/mkone
+
+CFLAGS=$CFLAGS -p -I/sys/include/npe -D__plan9__ -D__${objtype}__\
+ -D_POSIX_SOURCE
--- /dev/null
+++ b/this.c
@@ -1,0 +1,234 @@
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+#include "stretch.h"
+
+enum{
+ Nrate = 44100,
+ Nchan = 2,
+ Sampsz = Nchan * 2,
+};
+
+void
+usage(void)
+{
+ fprint(2, "usage: %s [OPTIONS] [FILE]\n"
+ "-r F stretch ratio, [0.25,4.0] default 1.0\n"
+ "-g F gap/silence stretch ratio (if different)\n"
+ "-u N upper freq period limit [40,44100[ default 333 Hz\n"
+ "-l N lower freq period limit, [20,[ default 55 Hz\n"
+ "-b N audio buffer/window length, [1,100] default 25 ms\n"
+ "-t N gap/silence threshold (dB re FS, default -40)\n"
+ "-c cycle through all ratios, starting higher\n"
+ "-C cycle through all ratios, starting lower\n"
+ "-d force dual instance even for shallow ratios\n"
+ "-s scale rate to preserve duration (not pitch)\n"
+ "-f fast pitch detection (default >= 32 kHz)\n"
+ "-n normal pitch detection (default < 32 kHz)\n",
+ argv0);
+ exits("usage");
+}
+
+double
+rms_level_dB (int16_t *audio, int samples, int channels)
+{
+ double rms_sum = 0.0;
+ int i;
+
+ if (channels == 1)
+ for (i = 0; i < samples; ++i)
+ rms_sum += (double) audio [i] * audio [i];
+ else
+ for (i = 0; i < samples; ++i) {
+ double average = (audio [i * 2] + audio [i * 2 + 1]) / 2.0;
+ rms_sum += average * average;
+ }
+
+ return log10 (rms_sum / samples / (32768.0 * 32767.0 * 0.5)) * 10.0;
+}
+
+void
+threadmain(int argc, char **argv)
+{
+ int n, m, fd, ofd, pfd[2], cycle, dual, fast, normal, doscale, lf, uf, wsz, flags, maxnsamp, silence, nibuf, min_period, max_period, non_silence_frames, silence_frames, used_silence_frames, max_generated_stretch, max_generated_flush, samples_to_stretch, consecutive_silence_frames, verbose;
+ char *r;
+ s16int *prebuf, *ibuf, *obuf;
+ u32int insamp, outsamp;
+ float max_ratio;
+ double level, ratio, gap, smin;
+ StretchHandle S;
+
+ fd = 0;
+ ofd = 1;
+ cycle = 0; /* cycle ratio, direction, 1: fw, 2: back */
+ dual = 0; /* force dual instance */
+ fast = 0; /* force fast pitch detection */
+ normal = 0; /* force normal pitch detection */
+ doscale = 0; /* scale rate to preserve duration */
+ lf = 55; /* freq lower bound */
+ uf = 333; /* freq upper bound */
+ gap = 0.0; /* gap/silence stretch ratio */
+ ratio = 1.0; /* stretch ratio */
+ wsz = 25; /* window size */
+ smin = -40; /* gap/silence threshold */
+ verbose = 0;
+ ARGBEGIN{
+ case 'C': cycle = 2; break;
+ case 'c': cycle = 1; break;
+ case 'd': dual = 1; break;
+ case 'f': fast = 1; break;
+ case 'g': gap = strtod(EARGF(usage()), nil); if(gap < 0.25 || gap > 4.0) usage(); break;
+ case 'l': lf = strtol(EARGF(usage()), nil, 10); if(lf < 20) usage(); break;
+ case 'n': normal = 1; break;
+ case 'r': ratio = strtod(EARGF(usage()), nil); if(ratio < 0.25 || ratio > 4.0) usage(); break;
+ case 's': doscale = 1; break;
+ case 't': smin = strtod(EARGF(usage()), nil); if(smin < -70 || smin > -10) usage(); break;
+ case 'u': uf = strtol(EARGF(usage()), nil, 10); if(uf < 40) usage(); break;
+ case 'v': verbose = 1; break;
+ case 'w': wsz = strtol(EARGF(usage()), nil, 10); if(wsz < 1 || wsz > 100) usage(); break;
+ case 'h':
+ default: usage(); break;
+ }ARGEND
+ if(*argv != nil && (fd = open(*argv, OREAD)) < 0)
+ usage();
+
+ insamp = 0;
+ outsamp = 0;
+ min_period = Nrate / uf;
+ max_period = Nrate / lf;
+ flags = 0;
+ silence = gap != 0.0 && gap != ratio && !cycle;
+ nibuf = Nrate * (wsz / 1000.0);
+ max_ratio = ratio;
+ if(dual || ratio < 0.5 || ratio > 2.0 ||
+ (silence && (gap < 0.5 || gap > 2.0)))
+ flags |= STRETCH_DUAL_FLAG;
+ if((fast || Nrate >= 32000) && !normal)
+ flags |= STRETCH_FAST_FLAG;
+ if(verbose){
+ fprint(2, "file sample rate is %d Hz (%s), buffer size is %d samples\n",
+ Nrate, Nchan == 2 ? "stereo" : "mono", nibuf);
+ fprint(2, "stretch period range = %d to %d, %d channels, %s, %s\n",
+ min_period, max_period, 2, (flags & STRETCH_FAST_FLAG) ? "fast mode" : "normal mode",
+ (flags & STRETCH_DUAL_FLAG) ? "dual instance" : "single instance");
+ }
+ if((S = stretch_init(min_period, max_period, Nchan, flags)) == NULL)
+ sysfatal("initialization failed");
+ if(cycle)
+ max_ratio = (flags & STRETCH_DUAL_FLAG) ? 4.0 : 2.0;
+ else if(silence && gap > max_ratio)
+ max_ratio = gap;
+ maxnsamp = stretch_output_capacity(S, nibuf, max_ratio);
+ obuf = prebuf = nil;
+ if((ibuf = malloc(nibuf * Sampsz)) == nil
+ || (obuf = malloc(maxnsamp * Sampsz)) == nil
+ || silence && (prebuf = malloc(nibuf * Sampsz)) == nil)
+ sysfatal("malloc: %r");
+ non_silence_frames = 0,
+ silence_frames = 0,
+ used_silence_frames = 0;
+ max_generated_stretch = 0,
+ max_generated_flush = 0;
+ samples_to_stretch = 0,
+ consecutive_silence_frames = 1;
+
+ if(doscale){
+ if(pipe(pfd) < 0)
+ sysfatal("pipe: %r");
+ switch(rfork(RFPROC|RFFDG)){
+ case -1:
+ sysfatal("rfork: %r");
+ case 0:
+ close(0);
+ close(pfd[1]);
+ dup(pfd[0], 0);
+ if((r = smprint("s16c2r%d", (u32int)(Nrate * ratio + 0.5))) == nil)
+ sysfatal("smprint: %r");
+ execl("/bin/audio/pcmconv", "pcmconv", "-i", r, "-o", "s16c2r44100", nil);
+ sysfatal("execl: %r");
+ default:
+ close(1);
+ close(pfd[0]);
+ ofd = pfd[1];
+ }
+ }
+ for(;;){
+ n = read(fd, silence ? prebuf : ibuf, Sampsz * nibuf);
+ n /= Sampsz;
+ if(n < 0)
+ sysfatal("read: %r");
+ if(!silence && n == 0)
+ break;
+ insamp += n;
+ if(silence){
+ if(n != 0){
+ level = rms_level_dB(prebuf, n, Nchan);
+ if(level > smin){
+ consecutive_silence_frames = 0;
+ non_silence_frames++;
+ }
+ else{
+ consecutive_silence_frames++;
+ silence_frames++;
+ }
+ }
+ }
+ else
+ samples_to_stretch = n;
+ if(cycle){
+ if(flags & STRETCH_DUAL_FLAG)
+ ratio = (sin((double) outsamp / Nrate / 2.0) *(cycle & 1 ? 1.875 : -1.875)) + 2.125;
+ else
+ ratio = (sin((double) outsamp / Nrate) * (cycle & 1 ? 0.75 : -0.75)) + 1.25;
+ }
+ if(samples_to_stretch){
+ if(consecutive_silence_frames >= 3){
+ m = stretch_samples(S, ibuf, samples_to_stretch, obuf, gap);
+ used_silence_frames++;
+ }else
+ m = stretch_samples(S, ibuf, samples_to_stretch, obuf, ratio);
+ if(m){
+ if(m > max_generated_stretch)
+ max_generated_stretch = m;
+ write(ofd, obuf, Sampsz * m);
+ outsamp += m;
+ if(m > maxnsamp)
+ sysfatal("sample generation overflow");
+ }
+ }
+ if(silence){
+ if(n){
+ memcpy(ibuf, prebuf, n * Sampsz);
+ samples_to_stretch = n;
+ }else
+ break;
+ }
+ }
+ for(;;){
+ if((n = stretch_flush(S, obuf)) == 0)
+ break;
+ if(n > max_generated_flush)
+ max_generated_flush = n;
+ if(n > maxnsamp)
+ sysfatal("flush overflow");
+ write(ofd, obuf, Sampsz * n);
+ outsamp += n / Sampsz;
+ }
+ if(insamp && verbose){
+ fprint(2, "done, %ud samples --> %ud samples (ratio = %.3f)\n",
+ insamp, outsamp, (double)outsamp / insamp);
+ if(doscale)
+ fprint(2, "sample rate changed from %d Hz to %ud Hz\n",
+ Nrate, (u32int)(Nrate * ratio + 0.5));
+ fprint(2, "max expected samples = %d, actually seen = %d stretch, %d flush\n",
+ maxnsamp, max_generated_stretch, max_generated_flush);
+ if(silence_frames || non_silence_frames) {
+ int total_frames = silence_frames + non_silence_frames;
+ fprint(2, "%d silence frames detected (%.2f%%), %d actually used (%.2f%%)\n",
+ silence_frames, silence_frames * 100.0 / total_frames,
+ used_silence_frames, used_silence_frames * 100.0 / total_frames);
+ }
+ }
+ stretch_deinit(S);
+ exits(nil);
+}