shithub: audio-stretch

Download patch

ref: d9d2b5e927bd607d98b2b76d575f3ff1cd33531a
parent: 86ce8b4238e0c88562f4f32d65b84e373e6c530a
author: David Bryant <[email protected]>
date: Tue Nov 26 14:05:52 EST 2019

dos2unix stretch.[ch]

--- a/stretch.c
+++ b/stretch.c
@@ -1,401 +1,401 @@
-////////////////////////////////////////////////////////////////////////////
-//                        **** AUDIO-STRETCH ****                         //
-//                      Time Domain Harmonic Scaler                       //
-//                    Copyright (c) 2019 David Bryant                     //
-//                          All Rights Reserved.                          //
-//      Distributed under the BSD Software License (see license.txt)      //
-////////////////////////////////////////////////////////////////////////////
-
-// stretch.c
-
-// Time Domain Harmonic Compression and Expansion
-//
-// This library performs time domain harmonic scaling with pitch detection
-// to stretch the timing of a 16-bit PCM signal (either mono or stereo) from
-// 1/2 to 2 times its original length. This is done without altering any of
-// the tonal characteristics.
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <math.h>
-
-#include "stretch.h"
-
-#define MIN_PERIOD  24          /* minimum allowable pitch period */
-#define MAX_PERIOD  1024        /* maximum allowable pitch period */
-
-struct stretch_cnxt {
-    int num_chans, inbuff_samples, shortest, longest, tail, head, verbatim, fast_mode;
-    short *inbuff, *calcbuff;
-    unsigned int *results;
-};
-
-static void merge_blocks (short *output, short *input1, short *input2, int samples);
-static int find_period_fast (struct stretch_cnxt *cnxt, short *samples);
-static int find_period (struct stretch_cnxt *cnxt, short *samples);
-
-/*
- * Initialize a context of the time stretching code. The shortest and longest periods
- * are specified here. The longest period determines the lowest fundamental frequency
- * that can be handled correctly. Note that higher frequencies can be handled than the
- * shortest period would suggest because multiple periods can be combined, and the
- * worst-case performance will suffer if too short a period is selected.
- */
-
-StretchHandle stretch_init (int shortest_period, int longest_period, int num_channels, int fast_mode)
-{
-    struct stretch_cnxt *cnxt;
-
-    if (fast_mode) {
-        longest_period = (longest_period + 1) & ~1;
-        shortest_period &= ~1;
-    }
-
-    if (longest_period <= shortest_period || shortest_period < MIN_PERIOD || longest_period > MAX_PERIOD) {
-        printf ("stretch_init(): invalid periods!\n");
-        return NULL;
-    }
-
-    cnxt = (struct stretch_cnxt *) calloc (1, sizeof (struct stretch_cnxt));
-
-    if (cnxt) {
-        cnxt->inbuff_samples = longest_period * num_channels * 8;
-        cnxt->inbuff = calloc (cnxt->inbuff_samples, sizeof (*cnxt->inbuff));
-        cnxt->calcbuff = calloc (longest_period * num_channels, sizeof (*cnxt->calcbuff));
-        cnxt->results = calloc (longest_period, sizeof (*cnxt->results));
-    }
-
-    if (!cnxt || !cnxt->inbuff || !cnxt->calcbuff || !cnxt->results) {
-        fprintf (stderr, "stretch_init(): out of memory!\n");
-        return NULL;
-    }
-
-    cnxt->head = cnxt->tail = cnxt->longest = longest_period * num_channels;
-    cnxt->shortest = shortest_period * num_channels;
-    cnxt->num_chans = num_channels;
-    cnxt->fast_mode = fast_mode;
-
-    return (StretchHandle) cnxt;
-}
-
-/*
- * Process the specified samples with the given ratio (which is clipped to the
- * range 0.5 to 2.0). Note that the number of samples refers to total samples for
- * both channels in stereo and can be as large as desired (samples are buffered
- * here). The exact number of samples output is not possible to determine in
- * advance, but it will be close to the number of input samples times the ratio
- * plus or minus a couple of the longest periods.
- */
-
-int stretch_samples (StretchHandle handle, short *samples, int num_samples, short *output, float ratio)
-{
-    struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle;
-    int out_samples = 0;
-
-    num_samples *= cnxt->num_chans;
-
-    if (ratio < 0.5)
-        ratio = 0.5;
-    else if (ratio > 2.0)
-        ratio = 2.0;
-
-    /* while we have samples to do... */
-
-    do {
-        /* if there are more samples and room for them, copy in */
-
-        if (num_samples && cnxt->head < cnxt->inbuff_samples) {
-            int samples_to_copy = num_samples;
-
-            if (samples_to_copy > cnxt->inbuff_samples - cnxt->head)
-                samples_to_copy = cnxt->inbuff_samples - cnxt->head; 
-
-            memcpy (cnxt->inbuff + cnxt->head, samples, samples_to_copy * sizeof (cnxt->inbuff [0]));
-            num_samples -= samples_to_copy;
-            samples += samples_to_copy;
-            cnxt->head += samples_to_copy;
-        }
-
-        /* while there are enough samples to process, do so */
-
-        while (1) {
-            if (cnxt->verbatim && cnxt->head > cnxt->tail) {
-                int samples_to_copy = cnxt->verbatim;
-                if (samples_to_copy > cnxt->head - cnxt->tail)
-                    samples_to_copy = cnxt->head - cnxt->tail;
-
-                memcpy (output + out_samples, cnxt->inbuff + cnxt->tail, samples_to_copy * sizeof (cnxt->inbuff [0]));
-                out_samples += samples_to_copy;
-                cnxt->verbatim -= samples_to_copy;
-                cnxt->tail += samples_to_copy;
-            }
-            else if (cnxt->tail >= cnxt->longest && cnxt->head - cnxt->tail >= cnxt->longest * 2) {
-                if (ratio == 1.0) {
-                    memcpy (output + out_samples, cnxt->inbuff + cnxt->tail,
-                        cnxt->longest * 2 * sizeof (cnxt->inbuff [0]));
-
-                    out_samples += cnxt->longest * 2;
-                    cnxt->tail += cnxt->longest * 2;
-                }
-                else {
-                    int period = cnxt->fast_mode ? find_period_fast (cnxt, cnxt->inbuff + cnxt->tail) : 
-                        find_period (cnxt, cnxt->inbuff + cnxt->tail), incnt, outcnt;
-
-                    if (ratio < 1.0) {
-                        merge_blocks (output + out_samples, cnxt->inbuff + cnxt->tail,
-                            cnxt->inbuff + cnxt->tail + period, period); 
-
-                        out_samples += period;
-                        cnxt->tail += period * 2;
-                        incnt = (outcnt = period) * 2;
-                    }
-                    else {
-                        merge_blocks (output + out_samples, cnxt->inbuff + cnxt->tail,
-                            cnxt->inbuff + cnxt->tail - period, period * 2);
-
-                        out_samples += period * 2;
-                        cnxt->tail += period;
-                        outcnt = (incnt = period) * 2;
-                    }
-
-                    cnxt->verbatim = (int) floor (((ratio * incnt) - outcnt) / (1.0 - ratio) / cnxt->num_chans + 0.5) * cnxt->num_chans;
-
-                    if (cnxt->verbatim < 0)     // this should not happen...
-                        cnxt->verbatim = 0;
-                }
-            }
-            else
-                break;
-        }
-
-        /* if we're almost done with buffer, copy the rest back to beginning */
-
-        if (cnxt->head == cnxt->inbuff_samples) {
-            int samples_to_move = cnxt->inbuff_samples - cnxt->tail + cnxt->longest;
-
-            memmove (cnxt->inbuff, cnxt->inbuff + cnxt->tail - cnxt->longest,
-                samples_to_move * sizeof (cnxt->inbuff [0]));
-
-            cnxt->head -= cnxt->tail - cnxt->longest;
-            cnxt->tail = cnxt->longest;
-        }
-
-    } while (num_samples);
-
-    return out_samples / cnxt->num_chans;
-}  
-
-/* flush any leftover samples out at normal speed */
-
-int stretch_flush (StretchHandle handle, short *output)
-{
-    struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle;
-    int samples_to_copy = (cnxt->head - cnxt->tail) / cnxt->num_chans;
-
-    memcpy (output, cnxt->inbuff + cnxt->tail, samples_to_copy * cnxt->num_chans * sizeof (*output));
-    cnxt->tail = cnxt->head;
-
-    return samples_to_copy;
-}
-
-/* free handle */
-
-void stretch_deinit (StretchHandle handle)
-{
-    struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle;
-
-    free (cnxt->calcbuff);
-    free (cnxt->results);
-    free (cnxt->inbuff);
-    free (cnxt);
-}
-
-/*
- * The pitch detection is done by finding the period that produces the
- * maximum value for the following correlation formula applied to two
- * consecutive blocks of the given period length:
- *
- *         sum of the absolute values of each sample in both blocks
- *   ---------------------------------------------------------------------
- *   sum of the absolute differences of each corresponding pair of samples
- *
- * This formula was chosen for two reasons.  First, it produces output values
- * that can directly compared regardless of the pitch period.  Second, the
- * numerator can be accumulated for successive periods, and only the
- * denominator need be completely recalculated.
- */
-
-static int find_period (struct stretch_cnxt *cnxt, short *samples)
-{
-    unsigned int sum, diff, factor, best_factor = 0;
-    short *calcbuff = samples;
-    int period, best_period;
-    int i, j;
-
-    period = best_period = cnxt->shortest / cnxt->num_chans;
-
-    if (cnxt->num_chans == 2) {
-        calcbuff = cnxt->calcbuff;
-
-        for (i = j = 0; i < cnxt->longest * 2; i += 2)
-            calcbuff [j++] = (samples [i] + samples [i+1]) >> 1;
-    }
-
-    /* accumulate sum for shortest period size */
-
-    for (sum = i = 0; i < period; ++i)
-        sum += abs (calcbuff [i]) + abs (calcbuff [i+period]);
-
-    /* this loop actually cycles through all period lengths */
-
-    while (1) {
-        short *comp = calcbuff + period * 2;
-        short *ref = calcbuff + period;
-
-        /* compute sum of absolute differences */
-
-        diff = 0;
-
-        while (ref != calcbuff)
-            diff += abs (*--ref - *--comp);
-
-        /*
-         * Here we calculate and store the resulting correlation
-         * factor.  Note that we must watch for a difference of
-         * zero, meaning a perfect match.  Also, for increased
-         * precision using integer math, we scale the sum.  Care
-         * must be taken here to avoid possibility of overflow.
-         */
-
-        factor = diff ? (sum * 128) / diff : UINT32_MAX;
-
-        if (factor >= best_factor) {
-            best_factor = factor;
-            best_period = period;
-        }
-
-        /* see if we're done */
-
-        if (period * cnxt->num_chans == cnxt->longest)
-            break;
-
-        /* update accumulating sum and current period */
-
-        sum += abs (calcbuff [period * 2]);
-        sum += abs (calcbuff [period++ * 2 + 1]);
-    }
-
-    return best_period * cnxt->num_chans;
-}
-
-/*
- * This pitch detection function is similar to find_period() above, except that it
- * is optimized for speed. The audio data corresponding to two maximum periods is
- * averaged 2:1 into the calculation buffer, and then the calulations are done
- * for every other period length. Because the time is essentially proportional to
- * both the number of samples and the number of period lengths to try, this scheme
- * can reduce the time by a factor approaching 4x. The correlation results on either
- * side of the peak are compared to calculate a more accurate center of the period.
- */
-
-static int find_period_fast (struct stretch_cnxt *cnxt, short *samples)
-{
-    unsigned int sum, diff, best_factor = 0;
-    int period, best_period;
-    int i, j;
-
-    best_period = period = cnxt->shortest / (cnxt->num_chans * 2);
-
-    /* first step is compressing data 2:1 into calcbuff */
-
-    if (cnxt->num_chans == 2)
-        for (i = j = 0; i < cnxt->longest * 2; i += 4)
-            cnxt->calcbuff [j++] = (samples [i] + samples [i+1] + samples [i+2] + samples [i+3]) >> 2;
-    else
-        for (i = j = 0; i < cnxt->longest * 2; i += 2)
-            cnxt->calcbuff [j++] = (samples [i] + samples [i+1]) >> 1;
-
-    /* accumulate sum for shortest period */
-
-    for (sum = i = 0; i < period; ++i)
-        sum += abs (cnxt->calcbuff [i]) + abs (cnxt->calcbuff [i+period]);
-
-    /* this loop actually cycles through all period lengths */
-
-    while (1) {
-        short *comp = cnxt->calcbuff + period * 2;
-        short *ref = cnxt->calcbuff + period;
-
-        /* compute sum of absolute differences */
-
-        diff = 0;
-
-        while (ref != cnxt->calcbuff)
-            diff += abs (*--ref - *--comp);
-
-        /*
-         * Here we calculate and store the resulting correlation
-         * factor.  Note that we must watch for a difference of
-         * zero, meaning a perfect match.  Also, for increased
-         * precision using integer math, we scale the sum.  Care
-         * must be taken here to avoid possibility of overflow.
-         */
-
-        cnxt->results [period] = diff ? (sum * 128) / diff : UINT32_MAX;
-
-        if (cnxt->results [period] >= best_factor) {    /* check if best yet */
-            best_factor = cnxt->results [period];
-            best_period = period;
-        }
-
-        /* see if we're done */
-
-        if (period * cnxt->num_chans * 2 == cnxt->longest)
-            break;
-
-        /* update accumulating sum and current period */
-
-        sum += abs (cnxt->calcbuff [period * 2]);
-        sum += abs (cnxt->calcbuff [period++ * 2 + 1]);
-    }
-
-    if (best_period * cnxt->num_chans * 2 != cnxt->shortest && best_period * cnxt->num_chans * 2 != cnxt->longest) {
-        int high_side_diff = cnxt->results [best_period] - cnxt->results [best_period+1];
-        int low_side_diff = cnxt->results [best_period] - cnxt->results [best_period-1];
-
-        if (low_side_diff > high_side_diff * 2)
-            best_period = best_period * 2 + 1;
-        else if (high_side_diff > low_side_diff * 2)
-            best_period = best_period * 2 - 1;
-        else
-            best_period *= 2;
-    }
-    else
-        best_period *= 2;           /* shortest or longest use as is */
-
-    return best_period * cnxt->num_chans;
-}
-
-/*
- * To combine the two periods into one, each corresponding pair of samples
- * are averaged with a linearly sliding scale.  At the beginning of the period
- * the first sample dominates, and at the end the second sample dominates.  In
- * this way the resulting block blends with the previous and next blocks.
- *
- * The signed values are offset to unsigned for the calculation and then offset
- * back to signed.  This is done to avoid the compression around zero that occurs
- * with calculations of this type on C implementations that round division toward
- * zero.
- */
-
-static void merge_blocks (short *output, short *input1, short *input2, int samples)
-{
-    int i;
-
-    for (i = 0; i < samples; ++i)
-        output [i] = (((input1 [i] + 32768) * (samples - i) +
-            (input2 [i] + 32768) * i) / samples) - 32768;
-}
-
+////////////////////////////////////////////////////////////////////////////
+//                        **** AUDIO-STRETCH ****                         //
+//                      Time Domain Harmonic Scaler                       //
+//                    Copyright (c) 2019 David Bryant                     //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// stretch.c
+
+// Time Domain Harmonic Compression and Expansion
+//
+// This library performs time domain harmonic scaling with pitch detection
+// to stretch the timing of a 16-bit PCM signal (either mono or stereo) from
+// 1/2 to 2 times its original length. This is done without altering any of
+// the tonal characteristics.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <math.h>
+
+#include "stretch.h"
+
+#define MIN_PERIOD  24          /* minimum allowable pitch period */
+#define MAX_PERIOD  1024        /* maximum allowable pitch period */
+
+struct stretch_cnxt {
+    int num_chans, inbuff_samples, shortest, longest, tail, head, verbatim, fast_mode;
+    short *inbuff, *calcbuff;
+    unsigned int *results;
+};
+
+static void merge_blocks (short *output, short *input1, short *input2, int samples);
+static int find_period_fast (struct stretch_cnxt *cnxt, short *samples);
+static int find_period (struct stretch_cnxt *cnxt, short *samples);
+
+/*
+ * Initialize a context of the time stretching code. The shortest and longest periods
+ * are specified here. The longest period determines the lowest fundamental frequency
+ * that can be handled correctly. Note that higher frequencies can be handled than the
+ * shortest period would suggest because multiple periods can be combined, and the
+ * worst-case performance will suffer if too short a period is selected.
+ */
+
+StretchHandle stretch_init (int shortest_period, int longest_period, int num_channels, int fast_mode)
+{
+    struct stretch_cnxt *cnxt;
+
+    if (fast_mode) {
+        longest_period = (longest_period + 1) & ~1;
+        shortest_period &= ~1;
+    }
+
+    if (longest_period <= shortest_period || shortest_period < MIN_PERIOD || longest_period > MAX_PERIOD) {
+        printf ("stretch_init(): invalid periods!\n");
+        return NULL;
+    }
+
+    cnxt = (struct stretch_cnxt *) calloc (1, sizeof (struct stretch_cnxt));
+
+    if (cnxt) {
+        cnxt->inbuff_samples = longest_period * num_channels * 8;
+        cnxt->inbuff = calloc (cnxt->inbuff_samples, sizeof (*cnxt->inbuff));
+        cnxt->calcbuff = calloc (longest_period * num_channels, sizeof (*cnxt->calcbuff));
+        cnxt->results = calloc (longest_period, sizeof (*cnxt->results));
+    }
+
+    if (!cnxt || !cnxt->inbuff || !cnxt->calcbuff || !cnxt->results) {
+        fprintf (stderr, "stretch_init(): out of memory!\n");
+        return NULL;
+    }
+
+    cnxt->head = cnxt->tail = cnxt->longest = longest_period * num_channels;
+    cnxt->shortest = shortest_period * num_channels;
+    cnxt->num_chans = num_channels;
+    cnxt->fast_mode = fast_mode;
+
+    return (StretchHandle) cnxt;
+}
+
+/*
+ * Process the specified samples with the given ratio (which is clipped to the
+ * range 0.5 to 2.0). Note that the number of samples refers to total samples for
+ * both channels in stereo and can be as large as desired (samples are buffered
+ * here). The exact number of samples output is not possible to determine in
+ * advance, but it will be close to the number of input samples times the ratio
+ * plus or minus a couple of the longest periods.
+ */
+
+int stretch_samples (StretchHandle handle, short *samples, int num_samples, short *output, float ratio)
+{
+    struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle;
+    int out_samples = 0;
+
+    num_samples *= cnxt->num_chans;
+
+    if (ratio < 0.5)
+        ratio = 0.5;
+    else if (ratio > 2.0)
+        ratio = 2.0;
+
+    /* while we have samples to do... */
+
+    do {
+        /* if there are more samples and room for them, copy in */
+
+        if (num_samples && cnxt->head < cnxt->inbuff_samples) {
+            int samples_to_copy = num_samples;
+
+            if (samples_to_copy > cnxt->inbuff_samples - cnxt->head)
+                samples_to_copy = cnxt->inbuff_samples - cnxt->head; 
+
+            memcpy (cnxt->inbuff + cnxt->head, samples, samples_to_copy * sizeof (cnxt->inbuff [0]));
+            num_samples -= samples_to_copy;
+            samples += samples_to_copy;
+            cnxt->head += samples_to_copy;
+        }
+
+        /* while there are enough samples to process, do so */
+
+        while (1) {
+            if (cnxt->verbatim && cnxt->head > cnxt->tail) {
+                int samples_to_copy = cnxt->verbatim;
+                if (samples_to_copy > cnxt->head - cnxt->tail)
+                    samples_to_copy = cnxt->head - cnxt->tail;
+
+                memcpy (output + out_samples, cnxt->inbuff + cnxt->tail, samples_to_copy * sizeof (cnxt->inbuff [0]));
+                out_samples += samples_to_copy;
+                cnxt->verbatim -= samples_to_copy;
+                cnxt->tail += samples_to_copy;
+            }
+            else if (cnxt->tail >= cnxt->longest && cnxt->head - cnxt->tail >= cnxt->longest * 2) {
+                if (ratio == 1.0) {
+                    memcpy (output + out_samples, cnxt->inbuff + cnxt->tail,
+                        cnxt->longest * 2 * sizeof (cnxt->inbuff [0]));
+
+                    out_samples += cnxt->longest * 2;
+                    cnxt->tail += cnxt->longest * 2;
+                }
+                else {
+                    int period = cnxt->fast_mode ? find_period_fast (cnxt, cnxt->inbuff + cnxt->tail) : 
+                        find_period (cnxt, cnxt->inbuff + cnxt->tail), incnt, outcnt;
+
+                    if (ratio < 1.0) {
+                        merge_blocks (output + out_samples, cnxt->inbuff + cnxt->tail,
+                            cnxt->inbuff + cnxt->tail + period, period); 
+
+                        out_samples += period;
+                        cnxt->tail += period * 2;
+                        incnt = (outcnt = period) * 2;
+                    }
+                    else {
+                        merge_blocks (output + out_samples, cnxt->inbuff + cnxt->tail,
+                            cnxt->inbuff + cnxt->tail - period, period * 2);
+
+                        out_samples += period * 2;
+                        cnxt->tail += period;
+                        outcnt = (incnt = period) * 2;
+                    }
+
+                    cnxt->verbatim = (int) floor (((ratio * incnt) - outcnt) / (1.0 - ratio) / cnxt->num_chans + 0.5) * cnxt->num_chans;
+
+                    if (cnxt->verbatim < 0)     // this should not happen...
+                        cnxt->verbatim = 0;
+                }
+            }
+            else
+                break;
+        }
+
+        /* if we're almost done with buffer, copy the rest back to beginning */
+
+        if (cnxt->head == cnxt->inbuff_samples) {
+            int samples_to_move = cnxt->inbuff_samples - cnxt->tail + cnxt->longest;
+
+            memmove (cnxt->inbuff, cnxt->inbuff + cnxt->tail - cnxt->longest,
+                samples_to_move * sizeof (cnxt->inbuff [0]));
+
+            cnxt->head -= cnxt->tail - cnxt->longest;
+            cnxt->tail = cnxt->longest;
+        }
+
+    } while (num_samples);
+
+    return out_samples / cnxt->num_chans;
+}  
+
+/* flush any leftover samples out at normal speed */
+
+int stretch_flush (StretchHandle handle, short *output)
+{
+    struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle;
+    int samples_to_copy = (cnxt->head - cnxt->tail) / cnxt->num_chans;
+
+    memcpy (output, cnxt->inbuff + cnxt->tail, samples_to_copy * cnxt->num_chans * sizeof (*output));
+    cnxt->tail = cnxt->head;
+
+    return samples_to_copy;
+}
+
+/* free handle */
+
+void stretch_deinit (StretchHandle handle)
+{
+    struct stretch_cnxt *cnxt = (struct stretch_cnxt *) handle;
+
+    free (cnxt->calcbuff);
+    free (cnxt->results);
+    free (cnxt->inbuff);
+    free (cnxt);
+}
+
+/*
+ * The pitch detection is done by finding the period that produces the
+ * maximum value for the following correlation formula applied to two
+ * consecutive blocks of the given period length:
+ *
+ *         sum of the absolute values of each sample in both blocks
+ *   ---------------------------------------------------------------------
+ *   sum of the absolute differences of each corresponding pair of samples
+ *
+ * This formula was chosen for two reasons.  First, it produces output values
+ * that can directly compared regardless of the pitch period.  Second, the
+ * numerator can be accumulated for successive periods, and only the
+ * denominator need be completely recalculated.
+ */
+
+static int find_period (struct stretch_cnxt *cnxt, short *samples)
+{
+    unsigned int sum, diff, factor, best_factor = 0;
+    short *calcbuff = samples;
+    int period, best_period;
+    int i, j;
+
+    period = best_period = cnxt->shortest / cnxt->num_chans;
+
+    if (cnxt->num_chans == 2) {
+        calcbuff = cnxt->calcbuff;
+
+        for (i = j = 0; i < cnxt->longest * 2; i += 2)
+            calcbuff [j++] = (samples [i] + samples [i+1]) >> 1;
+    }
+
+    /* accumulate sum for shortest period size */
+
+    for (sum = i = 0; i < period; ++i)
+        sum += abs (calcbuff [i]) + abs (calcbuff [i+period]);
+
+    /* this loop actually cycles through all period lengths */
+
+    while (1) {
+        short *comp = calcbuff + period * 2;
+        short *ref = calcbuff + period;
+
+        /* compute sum of absolute differences */
+
+        diff = 0;
+
+        while (ref != calcbuff)
+            diff += abs (*--ref - *--comp);
+
+        /*
+         * Here we calculate and store the resulting correlation
+         * factor.  Note that we must watch for a difference of
+         * zero, meaning a perfect match.  Also, for increased
+         * precision using integer math, we scale the sum.  Care
+         * must be taken here to avoid possibility of overflow.
+         */
+
+        factor = diff ? (sum * 128) / diff : UINT32_MAX;
+
+        if (factor >= best_factor) {
+            best_factor = factor;
+            best_period = period;
+        }
+
+        /* see if we're done */
+
+        if (period * cnxt->num_chans == cnxt->longest)
+            break;
+
+        /* update accumulating sum and current period */
+
+        sum += abs (calcbuff [period * 2]);
+        sum += abs (calcbuff [period++ * 2 + 1]);
+    }
+
+    return best_period * cnxt->num_chans;
+}
+
+/*
+ * This pitch detection function is similar to find_period() above, except that it
+ * is optimized for speed. The audio data corresponding to two maximum periods is
+ * averaged 2:1 into the calculation buffer, and then the calulations are done
+ * for every other period length. Because the time is essentially proportional to
+ * both the number of samples and the number of period lengths to try, this scheme
+ * can reduce the time by a factor approaching 4x. The correlation results on either
+ * side of the peak are compared to calculate a more accurate center of the period.
+ */
+
+static int find_period_fast (struct stretch_cnxt *cnxt, short *samples)
+{
+    unsigned int sum, diff, best_factor = 0;
+    int period, best_period;
+    int i, j;
+
+    best_period = period = cnxt->shortest / (cnxt->num_chans * 2);
+
+    /* first step is compressing data 2:1 into calcbuff */
+
+    if (cnxt->num_chans == 2)
+        for (i = j = 0; i < cnxt->longest * 2; i += 4)
+            cnxt->calcbuff [j++] = (samples [i] + samples [i+1] + samples [i+2] + samples [i+3]) >> 2;
+    else
+        for (i = j = 0; i < cnxt->longest * 2; i += 2)
+            cnxt->calcbuff [j++] = (samples [i] + samples [i+1]) >> 1;
+
+    /* accumulate sum for shortest period */
+
+    for (sum = i = 0; i < period; ++i)
+        sum += abs (cnxt->calcbuff [i]) + abs (cnxt->calcbuff [i+period]);
+
+    /* this loop actually cycles through all period lengths */
+
+    while (1) {
+        short *comp = cnxt->calcbuff + period * 2;
+        short *ref = cnxt->calcbuff + period;
+
+        /* compute sum of absolute differences */
+
+        diff = 0;
+
+        while (ref != cnxt->calcbuff)
+            diff += abs (*--ref - *--comp);
+
+        /*
+         * Here we calculate and store the resulting correlation
+         * factor.  Note that we must watch for a difference of
+         * zero, meaning a perfect match.  Also, for increased
+         * precision using integer math, we scale the sum.  Care
+         * must be taken here to avoid possibility of overflow.
+         */
+
+        cnxt->results [period] = diff ? (sum * 128) / diff : UINT32_MAX;
+
+        if (cnxt->results [period] >= best_factor) {    /* check if best yet */
+            best_factor = cnxt->results [period];
+            best_period = period;
+        }
+
+        /* see if we're done */
+
+        if (period * cnxt->num_chans * 2 == cnxt->longest)
+            break;
+
+        /* update accumulating sum and current period */
+
+        sum += abs (cnxt->calcbuff [period * 2]);
+        sum += abs (cnxt->calcbuff [period++ * 2 + 1]);
+    }
+
+    if (best_period * cnxt->num_chans * 2 != cnxt->shortest && best_period * cnxt->num_chans * 2 != cnxt->longest) {
+        int high_side_diff = cnxt->results [best_period] - cnxt->results [best_period+1];
+        int low_side_diff = cnxt->results [best_period] - cnxt->results [best_period-1];
+
+        if (low_side_diff > high_side_diff * 2)
+            best_period = best_period * 2 + 1;
+        else if (high_side_diff > low_side_diff * 2)
+            best_period = best_period * 2 - 1;
+        else
+            best_period *= 2;
+    }
+    else
+        best_period *= 2;           /* shortest or longest use as is */
+
+    return best_period * cnxt->num_chans;
+}
+
+/*
+ * To combine the two periods into one, each corresponding pair of samples
+ * are averaged with a linearly sliding scale.  At the beginning of the period
+ * the first sample dominates, and at the end the second sample dominates.  In
+ * this way the resulting block blends with the previous and next blocks.
+ *
+ * The signed values are offset to unsigned for the calculation and then offset
+ * back to signed.  This is done to avoid the compression around zero that occurs
+ * with calculations of this type on C implementations that round division toward
+ * zero.
+ */
+
+static void merge_blocks (short *output, short *input1, short *input2, int samples)
+{
+    int i;
+
+    for (i = 0; i < samples; ++i)
+        output [i] = (((input1 [i] + 32768) * (samples - i) +
+            (input2 [i] + 32768) * i) / samples) - 32768;
+}
+
--- a/stretch.h
+++ b/stretch.h
@@ -1,37 +1,37 @@
-////////////////////////////////////////////////////////////////////////////
-//                        **** AUDIO-STRETCH ****                         //
-//                      Time Domain Harmonic Scaler                       //
-//                    Copyright (c) 2019 David Bryant                     //
-//                          All Rights Reserved.                          //
-//      Distributed under the BSD Software License (see license.txt)      //
-////////////////////////////////////////////////////////////////////////////
-
-// stretch.h
-
-// Time Domain Harmonic Compression and Expansion
-//
-// This library performs time domain harmonic scaling with pitch detection
-// to stretch the timing of a 16-bit PCM signal (either mono or stereo) from
-// 1/2 to 2 times its original length. This is done without altering any of
-// its tonal characteristics.
-
-#ifndef STRETCH_H
-#define STRETCH_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef void *StretchHandle;
-
-StretchHandle stretch_init (int shortest_period, int longest_period, int num_chans, int fast_mode);
-int stretch_samples (StretchHandle handle, short *samples, int num_samples, short *output, float ratio);
-int stretch_flush (StretchHandle handle, short *output);
-void stretch_deinit (StretchHandle handle);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
-
+////////////////////////////////////////////////////////////////////////////
+//                        **** AUDIO-STRETCH ****                         //
+//                      Time Domain Harmonic Scaler                       //
+//                    Copyright (c) 2019 David Bryant                     //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// stretch.h
+
+// Time Domain Harmonic Compression and Expansion
+//
+// This library performs time domain harmonic scaling with pitch detection
+// to stretch the timing of a 16-bit PCM signal (either mono or stereo) from
+// 1/2 to 2 times its original length. This is done without altering any of
+// its tonal characteristics.
+
+#ifndef STRETCH_H
+#define STRETCH_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void *StretchHandle;
+
+StretchHandle stretch_init (int shortest_period, int longest_period, int num_chans, int fast_mode);
+int stretch_samples (StretchHandle handle, short *samples, int num_samples, short *output, float ratio);
+int stretch_flush (StretchHandle handle, short *output);
+void stretch_deinit (StretchHandle handle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+