shithub: dav1d

--- a/include/dav1d/dav1d.h

+++ b/include/dav1d/dav1d.h

@@ -45,6 +45,7 @@

     int n_frame_threads;

     int n_tile_threads;

     Dav1dPicAllocator allocator;

+    int apply_grain;

 } Dav1dSettings;

/**

--- /dev/null

+++ b/src/film_grain.h

@@ -1,0 +1,39 @@

+/*

+ * Copyright © 2018, VideoLAN and dav1d authors

+ * Copyright © 2018, Two Orioles, LLC

+ * All rights reserved.

+ *

+ * Redistribution and use in source and binary forms, with or without

+ * modification, are permitted provided that the following conditions are met:

+ *

+ * 1. Redistributions of source code must retain the above copyright notice, this

+ *    list of conditions and the following disclaimer.

+ *

+ * 2. Redistributions in binary form must reproduce the above copyright notice,

+ *    this list of conditions and the following disclaimer in the documentation

+ *    and/or other materials provided with the distribution.

+ *

+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND

+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED

+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR

+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES

+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND

+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+ */

+#ifndef __DAV1D_SRC_FILM_GRAIN_H__

+#define __DAV1D_SRC_FILM_GRAIN_H__

+#include "dav1d/dav1d.h"

+void dav1d_apply_grain_8bpc(Dav1dPicture *const out,

+                            const Dav1dPicture *const in);

+void dav1d_apply_grain_10bpc(Dav1dPicture *const out,

+                             const Dav1dPicture *const in);

+#endif /* __DAV1D_SRC_FILM_GRAIN_H__ */

--- /dev/null

+++ b/src/film_grain_tmpl.c

@@ -1,0 +1,530 @@

+/*

+ * Copyright © 2018, Niklas Haas

+ * Copyright © 2018, VideoLAN and dav1d authors

+ * Copyright © 2018, Two Orioles, LLC

+ * All rights reserved.

+ *

+ * Redistribution and use in source and binary forms, with or without

+ * modification, are permitted provided that the following conditions are met:

+ *

+ * 1. Redistributions of source code must retain the above copyright notice, this

+ *    list of conditions and the following disclaimer.

+ *

+ * 2. Redistributions in binary form must reproduce the above copyright notice,

+ *    this list of conditions and the following disclaimer in the documentation

+ *    and/or other materials provided with the distribution.

+ *

+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND

+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED

+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR

+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES

+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND

+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+ */

+#include <assert.h>

+#include <stdint.h>

+#include "common.h"

+#include "common/intops.h"

+#include "common/bitdepth.h"

+#include "tables.h"

+#include "film_grain.h"

+#if BITDEPTH == 8

+typedef int8_t entry;

+#else

+typedef int16_t entry;

+#endif

+enum {

+    GRAIN_WIDTH  = 82,

+    GRAIN_HEIGHT = 73,

+    SUB_GRAIN_WIDTH = 44,

+    SUB_GRAIN_HEIGHT = 38,

+    SUB_GRAIN_OFFSET = 6,

+    BLOCK_SIZE = 32,

+    SCALING_SIZE = 1 << BITDEPTH,

+};

+static inline int get_random_number(const int bits, unsigned *state) {

+    const int r = *state;

+    unsigned bit = ((r >> 0) ^ (r >> 1) ^ (r >> 3) ^ (r >> 12)) & 1;

+    *state = (r >> 1) | (bit << 15);

+    return (*state >> (16 - bits)) & ((1 << bits) - 1);

+}

+static inline int round2(const int x, const int shift) {

+    return (x + ((1 << shift) >> 1)) >> shift;

+}

+enum {

+    GRAIN_CENTER = 128 << (BITDEPTH - 8),

+    GRAIN_MIN = -GRAIN_CENTER,

+    GRAIN_MAX = (256 << (BITDEPTH - 8)) - 1 - GRAIN_CENTER,

+};

+static void generate_grain_y(const Dav1dPicture *const in,

+                             entry buf[GRAIN_HEIGHT][GRAIN_WIDTH])

+{

+    const Dav1dFilmGrainData *data = &in->p.film_grain;

+    unsigned seed = data->seed;

+    const int shift = 12 - BITDEPTH + data->grain_scale_shift;

+    for (int y = 0; y < GRAIN_HEIGHT; y++) {

+        for (int x = 0; x < GRAIN_WIDTH; x++) {

+            const int value = get_random_number(11, &seed);

+            buf[y][x] = round2(dav1d_gaussian_sequence[ value ], shift);

+        }

+    }

+    const int ar_pad = 3;

+    const int ar_lag = data->ar_coeff_lag;

+    for (int y = ar_pad; y < GRAIN_HEIGHT; y++) {

+        for (int x = ar_pad; x < GRAIN_WIDTH - ar_pad; x++) {

+            const int8_t *coeff = data->ar_coeffs_y;

+            int sum = 0;

+            for (int dy = -ar_lag; dy <= 0; dy++) {

+                for (int dx = -ar_lag; dx <= ar_lag; dx++) {

+                    if (!dx && !dy)

+                        break;

+                    sum += *(coeff++) * buf[y + dy][x + dx];

+                }

+            }

+            int grain = buf[y][x] + round2(sum, data->ar_coeff_shift);

+            buf[y][x] = iclip(grain, GRAIN_MIN, GRAIN_MAX);

+        }

+    }

+}

+static void generate_grain_uv(const Dav1dPicture *const in, int uv,

+                              entry buf[GRAIN_HEIGHT][GRAIN_WIDTH],

+                              entry buf_y[GRAIN_HEIGHT][GRAIN_WIDTH])

+{

+    const Dav1dFilmGrainData *data = &in->p.film_grain;

+    unsigned seed = data->seed ^ (uv ? 0x49d8 : 0xb524);

+    const int shift = 12 - BITDEPTH + data->grain_scale_shift;

+    const int subx = in->p.layout != DAV1D_PIXEL_LAYOUT_I444;

+    const int suby = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;

+    const int chromaW = subx ? SUB_GRAIN_WIDTH  : GRAIN_WIDTH;

+    const int chromaH = suby ? SUB_GRAIN_HEIGHT : GRAIN_HEIGHT;

+    for (int y = 0; y < chromaH; y++) {

+        for (int x = 0; x < chromaW; x++) {

+            const int value = get_random_number(11, &seed);

+            buf[y][x] = round2(dav1d_gaussian_sequence[ value ], shift);

+        }

+    }

+    const int ar_pad = 3;

+    const int ar_lag = data->ar_coeff_lag;

+    for (int y = ar_pad; y < chromaH; y++) {

+        for (int x = ar_pad; x < chromaW - ar_pad; x++) {

+            const int8_t *coeff = data->ar_coeffs_uv[uv];

+            int sum = 0;

+            for (int dy = -ar_lag; dy <= 0; dy++) {

+                for (int dx = -ar_lag; dx <= ar_lag; dx++) {

+                    // For the final (current) pixel, we need to add in the

+                    // contribution from the luma grain texture

+                    if (!dx && !dy) {

+                        if (!data->num_y_points)

+                            break;

+                        int luma = 0;

+                        const int lumaX = ((x - ar_pad) << subx) + ar_pad;

+                        const int lumaY = ((y - ar_pad) << suby) + ar_pad;

+                        for (int i = 0; i <= suby; i++) {

+                            for (int j = 0; j <= subx; j++) {

+                                luma += buf_y[lumaY + i][lumaX + j];

+                            }

+                        }

+                        luma = round2(luma, subx + suby);

+                        sum += luma * (*coeff);

+                        break;

+                    }

+                    sum += *(coeff++) * buf[y + dy][x + dx];

+                }

+            }

+            const int grain = buf[y][x] + round2(sum, data->ar_coeff_shift);

+            buf[y][x] = iclip(grain, GRAIN_MIN, GRAIN_MAX);

+        }

+    }

+}

+static void generate_scaling(const uint8_t points[][2], int num,

+                             uint8_t scaling[SCALING_SIZE])

+{

+    const int shift_x = BITDEPTH - 8;

+    // Fill up the preceding entries with the initial value

+    for (int i = 0; i < points[0][0] << shift_x; i++)

+        scaling[i] = points[0][1];

+    // Linearly interpolate the values in the middle

+    for (int i = 0; i < num - 1; i++) {

+        const int bx = points[i][0] << shift_x;

+        const int by = points[i][1];

+        const int ex = points[i+1][0] << shift_x;

+        const int ey = points[i+1][1];

+        const int dx = ex - bx;

+        const int dy = ey - by;

+        const int delta = dy * ((0xFFFF + (dx >> 1))) / dx;

+        for (int x = 0; x < dx; x++) {

+            const int v = by + ((x * delta + 0x8000) >> 16);

+            scaling[bx + x] = v;

+        }

+    }

+    // Fill up the remaining entries with the final value

+    for (int i = points[num - 1][0] << shift_x; i < SCALING_SIZE; i++)

+        scaling[i] = points[num - 1][1];

+}

+// samples from the correct block of a grain LUT, while taking into account the

+// offsets provided by the offsets cache

+static inline entry sample_lut(entry grain_lut[GRAIN_HEIGHT][GRAIN_WIDTH],

+                               int offsets[2][2], int subx, int suby,

+                               int bx, int by, int x, int y)

+{

+    const int randval = offsets[bx][by];

+    const int offx = 3 + (2 >> subx) * (3 + (randval >> 4));

+    const int offy = 3 + (2 >> suby) * (3 + (randval & 0xF));

+    return grain_lut[offy + y + (BLOCK_SIZE >> suby) * by]

+                    [offx + x + (BLOCK_SIZE >> subx) * bx];

+}

+static void apply_to_row_y(Dav1dPicture *const out, const Dav1dPicture *const in,

+                           entry grain_lut[GRAIN_HEIGHT][GRAIN_WIDTH],

+                           uint8_t scaling[SCALING_SIZE], int row_num)

+{

+    const Dav1dFilmGrainData *const data = &out->p.film_grain;

+    const int rows = 1 + (data->overlap_flag && row_num > 0);

+    int min_value, max_value;

+    if (data->clip_to_restricted_range) {

+        min_value = 16 << (BITDEPTH - 8);

+        max_value = 235 << (BITDEPTH - 8);

+    } else {

+        min_value = 0;

+        max_value = (1 << BITDEPTH) - 1;

+    }

+    // seed[0] contains the current row, seed[1] contains the previous

+    unsigned seed[2];

+    for (int i = 0; i < rows; i++) {

+        seed[i] = data->seed;

+        seed[i] ^= (((row_num - i) * 37  + 178) & 0xFF) << 8;

+        seed[i] ^= (((row_num - i) * 173 + 105) & 0xFF);

+    }

+    const ptrdiff_t stride = out->stride[0];

+    assert(stride % (BLOCK_SIZE * sizeof(pixel)) == 0);

+    assert(stride == in->stride[0]);

+    void *const src_row =  in->data[0] + stride * row_num * BLOCK_SIZE;

+    void *const dst_row = out->data[0] + stride * row_num * BLOCK_SIZE;

+    // edge extend source pixels

+    const int row_len = (out->p.w + BLOCK_SIZE - 1) & ~(BLOCK_SIZE - 1);

+    for (int x = out->p.w; x < row_len; x++) {

+        for (int y = 0; y < BLOCK_SIZE; y++) {

+            pixel *src = src_row + y * stride + x * sizeof(pixel);

+            *src = 0;

+        }

+    }

+    const int row_h = (row_num + 1) * BLOCK_SIZE;

+    for (int y = out->p.h; y < row_h; y++)

+        memset(in->data[0] + stride * y, 0, row_len * sizeof(pixel));

+    int offsets[2 /* col offset */][2 /* row offset */];

+    // process this row in BLOCK_SIZE^2 blocks

+    for (int bx = 0; bx < out->p.w; bx += BLOCK_SIZE) {

+        if (data->overlap_flag && bx) {

+            // shift previous offsets left

+            for (int i = 0; i < rows; i++)

+                offsets[1][i] = offsets[0][i];

+        }

+        // update current offsets

+        for (int i = 0; i < rows; i++)

+            offsets[0][i] = get_random_number(8, &seed[i]);

+        // x/y block offsets to compensate for overlapped regions

+        const int ystart = data->overlap_flag && row_num ? 2 : 0;

+        const int xstart = data->overlap_flag && bx      ? 2 : 0;

+        static const int w[2][2] = { { 27, 17 }, { 17, 27 } };

+#define add_noise_y(x, y, grain)                                                \

+            pixel *src = src_row + (y) * stride + (bx + (x)) * sizeof(pixel);   \

+            pixel *dst = dst_row + (y) * stride + (bx + (x)) * sizeof(pixel);   \

+            int noise = round2(scaling[ *src ] * (grain), data->scaling_shift); \

+            *dst = iclip(*src + noise, min_value, max_value);

+        for (int y = ystart; y < BLOCK_SIZE; y++) {

+            // Non-overlapped image region (straightforward)

+            for (int x = xstart; x < BLOCK_SIZE; x++) {

+                int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);

+                add_noise_y(x, y, grain);

+            }

+            // Special case for overlapped column

+            for (int x = 0; x < xstart; x++) {

+                int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);

+                int old   = sample_lut(grain_lut, offsets, 0, 0, 1, 0, x, y);

+                grain = round2(old * w[x][0] + grain * w[x][1], 5);

+                grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);

+                add_noise_y(x, y, grain);

+            }

+        }

+        for (int y = 0; y < ystart; y++) {

+            // Special case for overlapped row (sans corner)

+            for (int x = xstart; x < BLOCK_SIZE; x++) {

+                int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);

+                int old   = sample_lut(grain_lut, offsets, 0, 0, 0, 1, x, y);

+                grain = round2(old * w[y][0] + grain * w[y][1], 5);

+                grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);

+                add_noise_y(x, y, grain);

+            }

+            // Special case for doubly-overlapped corner

+            for (int x = 0; x < xstart; x++) {

+                // Blend the top pixel with the top left block

+                int top = sample_lut(grain_lut, offsets, 0, 0, 0, 1, x, y);

+                int old = sample_lut(grain_lut, offsets, 0, 0, 1, 1, x, y);

+                top = round2(old * w[x][0] + top * w[x][1], 5);

+                top = iclip(top, GRAIN_MIN, GRAIN_MAX);

+                // Blend the current pixel with the left block

+                int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);

+                old = sample_lut(grain_lut, offsets, 0, 0, 1, 0, x, y);

+                grain = round2(old * w[x][0] + grain * w[x][1], 5);

+                grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);

+                // Mix the row rows together and apply grain

+                grain = round2(top * w[y][0] + grain * w[y][1], 5);

+                grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);

+                add_noise_y(x, y, grain);

+            }

+        }

+    }

+}

+static void apply_to_row_uv(Dav1dPicture *const out, const Dav1dPicture *const in,

+                            entry grain_lut[GRAIN_HEIGHT][GRAIN_WIDTH],

+                            uint8_t scaling[SCALING_SIZE], int uv, int row_num)

+{

+    const Dav1dFilmGrainData *const data = &out->p.film_grain;

+    const int rows = 1 + (data->overlap_flag && row_num > 0);

+    int min_value, max_value;

+    if (data->clip_to_restricted_range) {

+        min_value = 16 << (BITDEPTH - 8);

+        if (out->p.mtrx == DAV1D_MC_IDENTITY) {

+            max_value = 235 << (BITDEPTH - 8);

+        } else {

+            max_value = 240 << (BITDEPTH - 8);

+        }

+    } else {

+        min_value = 0;

+        max_value = (1 << BITDEPTH) - 1;

+    }

+    const int sx = in->p.layout != DAV1D_PIXEL_LAYOUT_I444;

+    const int sy = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;

+    // seed[0] contains the current row, seed[1] contains the previous

+    unsigned seed[2];

+    for (int i = 0; i < rows; i++) {

+        seed[i] = data->seed;

+        seed[i] ^= (((row_num - i) * 37  + 178) & 0xFF) << 8;

+        seed[i] ^= (((row_num - i) * 173 + 105) & 0xFF);

+    }

+    const ptrdiff_t stride = out->stride[1];

+    assert(stride % (BLOCK_SIZE * sizeof(pixel)) == 0);

+    assert(stride == in->stride[1]);

+    const int by = row_num * (BLOCK_SIZE >> sy);

+    void *const dst_row = out->data[1 + uv] + stride * by;

+    void *const src_row =  in->data[1 + uv] + stride * by;

+    void *const luma_row = out->data[0] + out->stride[0] * row_num * BLOCK_SIZE;

+    // edge extend source pixels

+    const int row_len = ((out->p.w >> sx) + (BLOCK_SIZE >> sx) - 1)

+                        & ~((BLOCK_SIZE >> sx) - 1);

+    for (int x = out->p.w >> sx; x < row_len; x++) {

+        for (int y = 0; y < BLOCK_SIZE >> sy; y++) {

+            pixel *src = src_row + y * stride + x * sizeof(pixel);

+            *src = 0;

+        }

+    }

+    const int row_h = (row_num + 1) * (BLOCK_SIZE >> sy);

+    for (int y = out->p.h >> sy; y < row_h; y++)

+        memset(in->data[1 + uv] + stride * y, 0, row_len * sizeof(pixel));

+    int offsets[2 /* col offset */][2 /* row offset */];

+    // process this row in BLOCK_SIZE^2 blocks (subsampled)

+    for (int bx = 0; bx < (out->p.w + sx) >> sx; bx += BLOCK_SIZE >> sx) {

+        if (data->overlap_flag && bx) {

+            // shift previous offsets left

+            for (int i = 0; i < rows; i++)

+                offsets[1][i] = offsets[0][i];

+        }

+        // update current offsets

+        for (int i = 0; i < rows; i++)

+            offsets[0][i] = get_random_number(8, &seed[i]);

+        // x/y block offsets to compensate for overlapped regions

+        const int ystart = data->overlap_flag && row_num ? (2 >> sy) : 0;

+        const int xstart = data->overlap_flag && bx      ? (2 >> sx) : 0;

+        static const int w[2 /* sub */][2 /* off */][2] = {

+            { { 27, 17 }, { 17, 27 } },

+            { { 23, 22 } },

+        };

+#define add_noise_uv(x, y, grain)                                               \

+            const int lx = (bx + x) << sx;                                      \

+            const int ly = y << sy;                                             \

+            pixel *luma = luma_row + ly * out->stride[0] + lx * sizeof(pixel);  \

+            pixel avg = luma[0];                                                \

+            if (sx && lx + 1 < out->p.w)                                        \

+                avg = (avg + luma[1] + 1) >> 1;                                 \

+                                                                                \

+            pixel *src = src_row + (y) * stride + (bx + (x)) * sizeof(pixel);   \

+            pixel *dst = dst_row + (y) * stride + (bx + (x)) * sizeof(pixel);   \

+            int val = avg;                                                      \

+            if (!data->chroma_scaling_from_luma) {                              \

+                int combined = avg * data->uv_luma_mult[uv] +                   \

+                               *src * data->uv_mult[uv];                        \

+                val = iclip_pixel( (combined >> 6) +                            \

+                                   (data->uv_offset[uv] << (BITDEPTH - 8)) );   \

+            }                                                                   \

+                                                                                \

+            int noise = round2(scaling[ val ] * (grain), data->scaling_shift);  \

+            *dst = iclip(*src + noise, min_value, max_value);

+        for (int y = ystart; y < BLOCK_SIZE >> sy; y++) {

+            // Non-overlapped image region (straightforward)

+            for (int x = xstart; x < BLOCK_SIZE >> sx; x++) {

+                int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);

+                add_noise_uv(x, y, grain);

+            }

+            // Special case for overlapped column

+            for (int x = 0; x < xstart; x++) {

+                int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);

+                int old   = sample_lut(grain_lut, offsets, sx, sy, 1, 0, x, y);

+                grain = (old * w[sx][x][0] + grain * w[sx][x][1] + 16) >> 5;

+                grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);

+                add_noise_uv(x, y, grain);

+            }

+        }

+        for (int y = 0; y < ystart; y++) {

+            // Special case for overlapped row (sans corner)

+            for (int x = xstart; x < BLOCK_SIZE >> sx; x++) {

+                int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);

+                int old   = sample_lut(grain_lut, offsets, sx, sy, 0, 1, x, y);

+                grain = (old * w[sy][y][0] + grain * w[sy][y][1] + 16) >> 5;

+                grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);

+                add_noise_uv(x, y, grain);

+            }

+            // Special case for doubly-overlapped corner

+            for (int x = 0; x < xstart; x++) {

+                // Blend the top pixel with the top left block

+                int top = sample_lut(grain_lut, offsets, sx, sy, 0, 1, x, y);

+                int old = sample_lut(grain_lut, offsets, sx, sy, 1, 1, x, y);

+                top = (old * w[sx][x][0] + top * w[sx][x][1] + 16) >> 5;

+                top = iclip(top, GRAIN_MIN, GRAIN_MAX);

+                // Blend the current pixel with the left block

+                int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);

+                old = sample_lut(grain_lut, offsets, sx, sy, 1, 0, x, y);

+                grain = (old * w[sx][x][0] + grain * w[sx][x][1] + 16) >> 5;

+                grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);

+                // Mix the row rows together and apply to image

+                grain = (top * w[sy][y][0] + grain * w[sy][y][1] + 16) >> 5;

+                grain = iclip(grain, GRAIN_MIN, GRAIN_MAX);

+                add_noise_uv(x, y, grain);

+            }

+        }

+    }

+}

+void bitfn(dav1d_apply_grain)(Dav1dPicture *const out,

+                              const Dav1dPicture *const in)

+{

+    const Dav1dFilmGrainData *const data = &out->p.film_grain;

+    entry grain_lut[3][GRAIN_HEIGHT][GRAIN_WIDTH];

+    uint8_t scaling[3][SCALING_SIZE];

+    // Generate grain LUTs as needed

+    generate_grain_y(out, grain_lut[0]); // always needed

+    if (data->num_uv_points[0] || data->chroma_scaling_from_luma)

+        generate_grain_uv(out, 0, grain_lut[1], grain_lut[0]);

+    if (data->num_uv_points[1] || data->chroma_scaling_from_luma)

+        generate_grain_uv(out, 1, grain_lut[2], grain_lut[0]);

+    // Generate scaling LUTs as needed

+    if (data->num_y_points)

+        generate_scaling(data->y_points, data->num_y_points, scaling[0]);

+    if (data->num_uv_points[0])

+        generate_scaling(data->uv_points[0], data->num_uv_points[0], scaling[1]);

+    if (data->num_uv_points[1])

+        generate_scaling(data->uv_points[1], data->num_uv_points[1], scaling[2]);

+    // Synthesize grain for the affected planes

+    int rows = (out->p.h + 16) >> 5;

+    for (int row = 0; row < rows; row++) {

+        if (data->num_y_points)

+            apply_to_row_y(out, in, grain_lut[0], scaling[0], row);

+        if (data->chroma_scaling_from_luma) {

+            apply_to_row_uv(out, in, grain_lut[1], scaling[0], 0, row);

+            apply_to_row_uv(out, in, grain_lut[2], scaling[0], 1, row);

+        } else {

+            if (data->num_uv_points[0])

+                apply_to_row_uv(out, in, grain_lut[1], scaling[1], 0, row);

+            if (data->num_uv_points[1])

+                apply_to_row_uv(out, in, grain_lut[2], scaling[2], 1, row);

+        }

+    }

+    // Copy over the non-modified planes

+    // TODO: eliminate in favor of per-plane refs

+    if (!data->num_y_points) {

+        assert(out->stride[0] == in->stride[0]);

+        memcpy(out->data[0], in->data[0], out->p.h * out->stride[0]);

+    }

+    for (int i = 0; i < 2; i++) {

+        if (!data->num_uv_points[i] && !data->chroma_scaling_from_luma) {

+            const int suby = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;

+            assert(out->stride[1] == in->stride[1]);

+            memcpy(out->data[1+i], in->data[1+i],

+                   (out->p.h >> suby) * out->stride[1]);

+        }

+    }

+}

--- a/src/internal.h

+++ b/src/internal.h

@@ -115,6 +115,7 @@

     } intra_edge;

     Dav1dPicAllocator allocator;

+    int apply_grain;

};

 struct Dav1dFrameContext {

--- a/src/lib.c

+++ b/src/lib.c

@@ -43,6 +43,7 @@

 #include "src/ref.h"

 #include "src/thread_task.h"

 #include "src/wedge.h"

+#include "src/film_grain.h"

 static void init_internal(void) {

     dav1d_init_wedge_masks();

@@ -57,6 +58,7 @@

 void dav1d_default_settings(Dav1dSettings *const s) {

     s->n_frame_threads = 1;

     s->n_tile_threads = 1;

+    s->apply_grain = 1;

     s->allocator.cookie = NULL;

     s->allocator.alloc_picture_callback = default_picture_allocator;

     s->allocator.release_picture_callback = default_picture_release;

@@ -84,6 +86,7 @@

     memset(c, 0, sizeof(*c));

     c->allocator = s->allocator;

+    c->apply_grain = s->apply_grain;

     c->n_fc = s->n_frame_threads;

     c->fc = dav1d_alloc_aligned(sizeof(*c->fc) * s->n_frame_threads, 32);

     if (!c->fc) goto error;

@@ -170,6 +173,39 @@

     return 0;

+static int output_image(Dav1dContext *const c, Dav1dPicture *const out,

+                        Dav1dPicture *const in)

+{

+    const Dav1dFilmGrainData *fgdata = &in->p.film_grain;

+    int has_grain = fgdata->num_y_points || fgdata->num_uv_points[0] ||

+                    fgdata->num_uv_points[1];

+    // If there is nothing to be done, skip the allocation/copy

+    if (!c->apply_grain || !has_grain) {

+        dav1d_picture_move_ref(out, in);

+        return 0;

+    }

+    // Apply film grain to a new copy of the image to avoid corrupting refs

+    int res = dav1d_picture_alloc_copy(out, in);

+    if (res < 0)

+        return res;

+    switch (out->p.bpc) {

+    case 8:

+        dav1d_apply_grain_8bpc(out, in);

+        break;

+    case 10:

+        dav1d_apply_grain_10bpc(out, in);

+        break;

+    default:

+        assert(!"apply_grain: missing bit depth");

+    }

+    dav1d_picture_unref(in);

+    return 0;

+}

 int dav1d_get_picture(Dav1dContext *const c, Dav1dPicture *const out)

     int res;

@@ -220,16 +256,12 @@

         in->sz -= res;

         in->data += res;

         if (!in->sz) dav1d_data_unref(in);

-        if (c->out.data[0]) {

-            dav1d_picture_move_ref(out, &c->out);

-            return 0;

-        }

+        if (c->out.data[0])

+            break;

-    if (c->out.data[0]) {

-        dav1d_picture_move_ref(out, &c->out);

-        return 0;

-    }

+    if (c->out.data[0])

+        return output_image(c, out, &c->out);

     return -EAGAIN;

--- a/src/meson.build

+++ b/src/meson.build

@@ -62,7 +62,8 @@

     'cdef_tmpl.c',

     'lr_apply_tmpl.c',

     'looprestoration_tmpl.c',

-    'recon_tmpl.c'

+    'recon_tmpl.c',

+    'film_grain_tmpl.c',

 # libdav1d entrypoint source files

--- a/src/picture.c

+++ b/src/picture.c

@@ -180,6 +180,21 @@

     return res;

+int dav1d_picture_alloc_copy(Dav1dPicture *const dst,

+                             const Dav1dPicture *const src)

+{

+    struct pic_ctx_context *const pic_ctx = src->ref->user_data;

+    int res = dav1d_picture_alloc(dst, src->p.w, src->p.h, src->p.layout,

+                                  src->p.bpc, &pic_ctx->allocator);

+    if (!res) {

+        dst->poc = src->poc;

+        dst->p = src->p;

+    }

+    return res;

+}

 void dav1d_picture_ref(Dav1dPicture *const dst, const Dav1dPicture *const src) {

     validate_input(dst != NULL);

     validate_input(dst->data[0] == NULL);

--- a/src/picture.h

+++ b/src/picture.h

@@ -64,6 +64,11 @@

                                Dav1dPicAllocator *);

/**

+ * Allocate a picture with identical metadata to an existing picture.

+ */

+int dav1d_picture_alloc_copy(Dav1dPicture *dst, const Dav1dPicture *src);

+/**

  * Create a copy of a picture.

*/

 void dav1d_picture_ref(Dav1dPicture *dst, const Dav1dPicture *src);

--- a/src/tables.c

+++ b/src/tables.c

@@ -872,3 +872,194 @@

     31, 29, 28, 26, 24, 23, 21, 20, 19, 17, 16, 14, 13, 12, 11,  9,

      8,  7,  6,  5,  4,  4,  3,  2,  0,  0,  0,  0,  0,  0,  0,  0,

};

+// Taken from the spec. Range is [-2048, 2047], mean is 0 and stddev is 512

+const int16_t dav1d_gaussian_sequence[2048] = {

+    56,    568,   -180,  172,   124,   -84,   172,   -64,   -900,  24,   820,

+    224,   1248,  996,   272,   -8,    -916,  -388,  -732,  -104,  -188, 800,

+    112,   -652,  -320,  -376,  140,   -252,  492,   -168,  44,    -788, 588,

+    -584,  500,   -228,  12,    680,   272,   -476,  972,   -100,  652,  368,

+    432,   -196,  -720,  -192,  1000,  -332,  652,   -136,  -552,  -604, -4,

+    192,   -220,  -136,  1000,  -52,   372,   -96,   -624,  124,   -24,  396,

+    540,   -12,   -104,  640,   464,   244,   -208,  -84,   368,   -528, -740,

+    248,   -968,  -848,  608,   376,   -60,   -292,  -40,   -156,  252,  -292,

+    248,   224,   -280,  400,   -244,  244,   -60,   76,    -80,   212,  532,

+    340,   128,   -36,   824,   -352,  -60,   -264,  -96,   -612,  416,  -704,

+    220,   -204,  640,   -160,  1220,  -408,  900,   336,   20,    -336, -96,

+    -792,  304,   48,    -28,   -1232, -1172, -448,  104,   -292,  -520, 244,

+    60,    -948,  0,     -708,  268,   108,   356,   -548,  488,   -344, -136,

+    488,   -196,  -224,  656,   -236,  -1128, 60,    4,     140,   276,  -676,

+    -376,  168,   -108,  464,   8,     564,   64,    240,   308,   -300, -400,

+    -456,  -136,  56,    120,   -408,  -116,  436,   504,   -232,  328,  844,

+    -164,  -84,   784,   -168,  232,   -224,  348,   -376,  128,   568,  96,

+    -1244, -288,  276,   848,   832,   -360,  656,   464,   -384,  -332, -356,

+    728,   -388,  160,   -192,  468,   296,   224,   140,   -776,  -100, 280,

+    4,     196,   44,    -36,   -648,  932,   16,    1428,  28,    528,  808,

+    772,   20,    268,   88,    -332,  -284,  124,   -384,  -448,  208,  -228,

+    -1044, -328,  660,   380,   -148,  -300,  588,   240,   540,   28,   136,

+    -88,   -436,  256,   296,   -1000, 1400,  0,     -48,   1056,  -136, 264,

+    -528,  -1108, 632,   -484,  -592,  -344,  796,   124,   -668,  -768, 388,

+    1296,  -232,  -188,  -200,  -288,  -4,    308,   100,   -168,  256,  -500,

+    204,   -508,  648,   -136,  372,   -272,  -120,  -1004, -552,  -548, -384,

+    548,   -296,  428,   -108,  -8,    -912,  -324,  -224,  -88,   -112, -220,

+    -100,  996,   -796,  548,   360,   -216,  180,   428,   -200,  -212, 148,

+    96,    148,   284,   216,   -412,  -320,  120,   -300,  -384,  -604, -572,

+    -332,  -8,    -180,  -176,  696,   116,   -88,   628,   76,    44,   -516,

+    240,   -208,  -40,   100,   -592,  344,   -308,  -452,  -228,  20,   916,

+    -1752, -136,  -340,  -804,  140,   40,    512,   340,   248,   184,  -492,

+    896,   -156,  932,   -628,  328,   -688,  -448,  -616,  -752,  -100, 560,

+    -1020, 180,   -800,  -64,   76,    576,   1068,  396,   660,   552,  -108,

+    -28,   320,   -628,  312,   -92,   -92,   -472,  268,   16,    560,  516,

+    -672,  -52,   492,   -100,  260,   384,   284,   292,   304,   -148, 88,

+    -152,  1012,  1064,  -228,  164,   -376,  -684,  592,   -392,  156,  196,

+    -524,  -64,   -884,  160,   -176,  636,   648,   404,   -396,  -436, 864,

+    424,   -728,  988,   -604,  904,   -592,  296,   -224,  536,   -176, -920,

+    436,   -48,   1176,  -884,  416,   -776,  -824,  -884,  524,   -548, -564,

+    -68,   -164,  -96,   692,   364,   -692,  -1012, -68,   260,   -480, 876,

+    -1116, 452,   -332,  -352,  892,   -1088, 1220,  -676,  12,    -292, 244,

+    496,   372,   -32,   280,   200,   112,   -440,  -96,   24,    -644, -184,

+    56,    -432,  224,   -980,  272,   -260,  144,   -436,  420,   356,  364,

+    -528,  76,    172,   -744,  -368,  404,   -752,  -416,  684,   -688, 72,

+    540,   416,   92,    444,   480,   -72,   -1416, 164,   -1172, -68,  24,

+    424,   264,   1040,  128,   -912,  -524,  -356,  64,    876,   -12,  4,

+    -88,   532,   272,   -524,  320,   276,   -508,  940,   24,    -400, -120,

+    756,   60,    236,   -412,  100,   376,   -484,  400,   -100,  -740, -108,

+    -260,  328,   -268,  224,   -200,  -416,  184,   -604,  -564,  -20,  296,

+    60,    892,   -888,  60,    164,   68,    -760,  216,   -296,  904,  -336,

+    -28,   404,   -356,  -568,  -208,  -1480, -512,  296,   328,   -360, -164,

+    -1560, -776,  1156,  -428,  164,   -504,  -112,  120,   -216,  -148, -264,

+    308,   32,    64,    -72,   72,    116,   176,   -64,   -272,  460,  -536,

+    -784,  -280,  348,   108,   -752,  -132,  524,   -540,  -776,  116,  -296,

+    -1196, -288,  -560,  1040,  -472,  116,   -848,  -1116, 116,   636,  696,

+    284,   -176,  1016,  204,   -864,  -648,  -248,  356,   972,   -584, -204,

+    264,   880,   528,   -24,   -184,  116,   448,   -144,  828,   524,  212,

+    -212,  52,    12,    200,   268,   -488,  -404,  -880,  824,   -672, -40,

+    908,   -248,  500,   716,   -576,  492,   -576,  16,    720,   -108, 384,

+    124,   344,   280,   576,   -500,  252,   104,   -308,  196,   -188, -8,

+    1268,  296,   1032,  -1196, 436,   316,   372,   -432,  -200,  -660, 704,

+    -224,  596,   -132,  268,   32,    -452,  884,   104,   -1008, 424,  -1348,

+    -280,  4,     -1168, 368,   476,   696,   300,   -8,    24,    180,  -592,

+    -196,  388,   304,   500,   724,   -160,  244,   -84,   272,   -256, -420,

+    320,   208,   -144,  -156,  156,   364,   452,   28,    540,   316,  220,

+    -644,  -248,  464,   72,    360,   32,    -388,  496,   -680,  -48,  208,

+    -116,  -408,  60,    -604,  -392,  548,   -840,  784,   -460,  656,  -544,

+    -388,  -264,  908,   -800,  -628,  -612,  -568,  572,   -220,  164,  288,

+    -16,   -308,  308,   -112,  -636,  -760,  280,   -668,  432,   364,  240,

+    -196,  604,   340,   384,   196,   592,   -44,   -500,  432,   -580, -132,

+    636,   -76,   392,   4,     -412,  540,   508,   328,   -356,  -36,  16,

+    -220,  -64,   -248,  -60,   24,    -192,  368,   1040,  92,    -24,  -1044,

+    -32,   40,    104,   148,   192,   -136,  -520,  56,    -816,  -224, 732,

+    392,   356,   212,   -80,   -424,  -1008, -324,  588,   -1496, 576,  460,

+    -816,  -848,  56,    -580,  -92,   -1372, -112,  -496,  200,   364,  52,

+    -140,  48,    -48,   -60,   84,    72,    40,    132,   -356,  -268, -104,

+    -284,  -404,  732,   -520,  164,   -304,  -540,  120,   328,   -76,  -460,

+    756,   388,   588,   236,   -436,  -72,   -176,  -404,  -316,  -148, 716,

+    -604,  404,   -72,   -88,   -888,  -68,   944,   88,    -220,  -344, 960,

+    472,   460,   -232,  704,   120,   832,   -228,  692,   -508,  132,  -476,

+    844,   -748,  -364,  -44,   1116,  -1104, -1056, 76,    428,   552,  -692,

+    60,    356,   96,    -384,  -188,  -612,  -576,  736,   508,   892,  352,

+    -1132, 504,   -24,   -352,  324,   332,   -600,  -312,  292,   508,  -144,

+    -8,    484,   48,    284,   -260,  -240,  256,   -100,  -292,  -204, -44,

+    472,   -204,  908,   -188,  -1000, -256,  92,    1164,  -392,  564,  356,

+    652,   -28,   -884,  256,   484,   -192,  760,   -176,  376,   -524, -452,

+    -436,  860,   -736,  212,   124,   504,   -476,  468,   76,    -472, 552,

+    -692,  -944,  -620,  740,   -240,  400,   132,   20,    192,   -196, 264,

+    -668,  -1012, -60,   296,   -316,  -828,  76,    -156,  284,   -768, -448,

+    -832,  148,   248,   652,   616,   1236,  288,   -328,  -400,  -124, 588,

+    220,   520,   -696,  1032,  768,   -740,  -92,   -272,  296,   448,  -464,

+    412,   -200,  392,   440,   -200,  264,   -152,  -260,  320,   1032, 216,

+    320,   -8,    -64,   156,   -1016, 1084,  1172,  536,   484,   -432, 132,

+    372,   -52,   -256,  84,    116,   -352,  48,    116,   304,   -384, 412,

+    924,   -300,  528,   628,   180,   648,   44,    -980,  -220,  1320, 48,

+    332,   748,   524,   -268,  -720,  540,   -276,  564,   -344,  -208, -196,

+    436,   896,   88,    -392,  132,   80,    -964,  -288,  568,   56,   -48,

+    -456,  888,   8,     552,   -156,  -292,  948,   288,   128,   -716, -292,

+    1192,  -152,  876,   352,   -600,  -260,  -812,  -468,  -28,   -120, -32,

+    -44,   1284,  496,   192,   464,   312,   -76,   -516,  -380,  -456, -1012,

+    -48,   308,   -156,  36,    492,   -156,  -808,  188,   1652,  68,   -120,

+    -116,  316,   160,   -140,  352,   808,   -416,  592,   316,   -480, 56,

+    528,   -204,  -568,  372,   -232,  752,   -344,  744,   -4,    324,  -416,

+    -600,  768,   268,   -248,  -88,   -132,  -420,  -432,  80,    -288, 404,

+    -316,  -1216, -588,  520,   -108,  92,    -320,  368,   -480,  -216, -92,

+    1688,  -300,  180,   1020,  -176,  820,   -68,   -228,  -260,  436,  -904,

+    20,    40,    -508,  440,   -736,  312,   332,   204,   760,   -372, 728,

+    96,    -20,   -632,  -520,  -560,  336,   1076,  -64,   -532,  776,  584,

+    192,   396,   -728,  -520,  276,   -188,  80,    -52,   -612,  -252, -48,

+    648,   212,   -688,  228,   -52,   -260,  428,   -412,  -272,  -404, 180,

+    816,   -796,  48,    152,   484,   -88,   -216,  988,   696,   188,  -528,

+    648,   -116,  -180,  316,   476,   12,    -564,  96,    476,   -252, -364,

+    -376,  -392,  556,   -256,  -576,  260,   -352,  120,   -16,   -136, -260,

+    -492,  72,    556,   660,   580,   616,   772,   436,   424,   -32,  -324,

+    -1268, 416,   -324,  -80,   920,   160,   228,   724,   32,    -516, 64,

+    384,   68,    -128,  136,   240,   248,   -204,  -68,   252,   -932, -120,

+    -480,  -628,  -84,   192,   852,   -404,  -288,  -132,  204,   100,  168,

+    -68,   -196,  -868,  460,   1080,  380,   -80,   244,   0,     484,  -888,

+    64,    184,   352,   600,   460,   164,   604,   -196,  320,   -64,  588,

+    -184,  228,   12,    372,   48,    -848,  -344,  224,   208,   -200, 484,

+    128,   -20,   272,   -468,  -840,  384,   256,   -720,  -520,  -464, -580,

+    112,   -120,  644,   -356,  -208,  -608,  -528,  704,   560,   -424, 392,

+    828,   40,    84,    200,   -152,  0,     -144,  584,   280,   -120, 80,

+    -556,  -972,  -196,  -472,  724,   80,    168,   -32,   88,    160,  -688,

+    0,     160,   356,   372,   -776,  740,   -128,  676,   -248,  -480, 4,

+    -364,  96,    544,   232,   -1032, 956,   236,   356,   20,    -40,  300,

+    24,    -676,  -596,  132,   1120,  -104,  532,   -1096, 568,   648,  444,

+    508,   380,   188,   -376,  -604,  1488,  424,   24,    756,   -220, -192,

+    716,   120,   920,   688,   168,   44,    -460,  568,   284,   1144, 1160,

+    600,   424,   888,   656,   -356,  -320,  220,   316,   -176,  -724, -188,

+    -816,  -628,  -348,  -228,  -380,  1012,  -452,  -660,  736,   928,  404,

+    -696,  -72,   -268,  -892,  128,   184,   -344,  -780,  360,   336,  400,

+    344,   428,   548,   -112,  136,   -228,  -216,  -820,  -516,  340,  92,

+    -136,  116,   -300,  376,   -244,  100,   -316,  -520,  -284,  -12,  824,

+    164,   -548,  -180,  -128,  116,   -924,  -828,  268,   -368,  -580, 620,

+    192,   160,   0,     -1676, 1068,  424,   -56,   -360,  468,   -156, 720,

+    288,   -528,  556,   -364,  548,   -148,  504,   316,   152,   -648, -620,

+    -684,  -24,   -376,  -384,  -108,  -920,  -1032, 768,   180,   -264, -508,

+    -1268, -260,  -60,   300,   -240,  988,   724,   -376,  -576,  -212, -736,

+    556,   192,   1092,  -620,  -880,  376,   -56,   -4,    -216,  -32,  836,

+    268,   396,   1332,  864,   -600,  100,   56,    -412,  -92,   356,  180,

+    884,   -468,  -436,  292,   -388,  -804,  -704,  -840,  368,   -348, 140,

+    -724,  1536,  940,   372,   112,   -372,  436,   -480,  1136,  296,  -32,

+    -228,  132,   -48,   -220,  868,   -1016, -60,   -1044, -464,  328,  916,

+    244,   12,    -736,  -296,  360,   468,   -376,  -108,  -92,   788,  368,

+    -56,   544,   400,   -672,  -420,  728,   16,    320,   44,    -284, -380,

+    -796,  488,   132,   204,   -596,  -372,  88,    -152,  -908,  -636, -572,

+    -624,  -116,  -692,  -200,  -56,   276,   -88,   484,   -324,  948,  864,

+    1000,  -456,  -184,  -276,  292,   -296,  156,   676,   320,   160,  908,

+    -84,   -1236, -288,  -116,  260,   -372,  -644,  732,   -756,  -96,  84,

+    344,   -520,  348,   -688,  240,   -84,   216,   -1044, -136,  -676, -396,

+    -1500, 960,   -40,   176,   168,   1516,  420,   -504,  -344,  -364, -360,

+    1216,  -940,  -380,  -212,  252,   -660,  -708,  484,   -444,  -152, 928,

+    -120,  1112,  476,   -260,  560,   -148,  -344,  108,   -196,  228,  -288,

+    504,   560,   -328,  -88,   288,   -1008, 460,   -228,  468,   -836, -196,

+    76,    388,   232,   412,   -1168, -716,  -644,  756,   -172,  -356, -504,

+    116,   432,   528,   48,    476,   -168,  -608,  448,   160,   -532, -272,

+    28,    -676,  -12,   828,   980,   456,   520,   104,   -104,  256,  -344,

+    -4,    -28,   -368,  -52,   -524,  -572,  -556,  -200,  768,   1124, -208,

+    -512,  176,   232,   248,   -148,  -888,  604,   -600,  -304,  804,  -156,

+    -212,  488,   -192,  -804,  -256,  368,   -360,  -916,  -328,  228,  -240,

+    -448,  -472,  856,   -556,  -364,  572,   -12,   -156,  -368,  -340, 432,

+    252,   -752,  -152,  288,   268,   -580,  -848,  -592,  108,   -76,  244,

+    312,   -716,  592,   -80,   436,   360,   4,     -248,  160,   516,  584,

+    732,   44,    -468,  -280,  -292,  -156,  -588,  28,    308,   912,  24,

+    124,   156,   180,   -252,  944,   -924,  -772,  -520,  -428,  -624, 300,

+    -212,  -1144, 32,    -724,  800,   -1128, -212,  -1288, -848,  180,  -416,

+    440,   192,   -576,  -792,  -76,   -1080, 80,    -532,  -352,  -132, 380,

+    -820,  148,   1112,  128,   164,   456,   700,   -924,  144,   -668, -384,

+    648,   -832,  508,   552,   -52,   -100,  -656,  208,   -568,  748,  -88,

+    680,   232,   300,   192,   -408,  -1012, -152,  -252,  -268,  272,  -876,

+    -664,  -648,  -332,  -136,  16,    12,    1152,  -28,   332,   -536, 320,

+    -672,  -460,  -316,  532,   -260,  228,   -40,   1052,  -816,  180,  88,

+    -496,  -556,  -672,  -368,  428,   92,    356,   404,   -408,  252,  196,

+    -176,  -556,  792,   268,   32,    372,   40,    96,    -332,  328,  120,

+    372,   -900,  -40,   472,   -264,  -592,  952,   128,   656,   112,  664,

+    -232,  420,   4,     -344,  -464,  556,   244,   -416,  -32,   252,  0,

+    -412,  188,   -696,  508,   -476,  324,   -1096, 656,   -312,  560,  264,

+    -136,  304,   160,   -64,   -580,  248,   336,   -720,  560,   -348, -288,

+    -276,  -196,  -500,  852,   -544,  -236,  -1128, -992,  -776,  116,  56,

+    52,    860,   884,   212,   -12,   168,   1020,  512,   -552,  924,  -148,

+    716,   188,   164,   -340,  -520,  -184,  880,   -152,  -680,  -208, -1156,

+    -300,  -528,  -472,  364,   100,   -744,  -1056, -32,   540,   280,  144,

+    -676,  -32,   -232,  -280,  -224,  96,    568,   -76,   172,   148,  148,

+    104,   32,    -296,  -32,   788,   -80,   32,    -16,   280,   288,  944,

+    428,   -484

+};

--- a/src/tables.h

+++ b/src/tables.h

@@ -119,4 +119,6 @@

 extern const uint8_t dav1d_obmc_masks[64];

+extern const int16_t dav1d_gaussian_sequence[2048]; // for fgs

 #endif /* __DAV1D_SRC_TABLES_H__ */

--- a/tools/dav1d_cli_parse.c

+++ b/tools/dav1d_cli_parse.c

@@ -48,6 +48,7 @@

     ARG_FRAME_THREADS,

     ARG_TILE_THREADS,

     ARG_VERIFY,

+    ARG_FILM_GRAIN,

};

 static const struct option long_opts[] = {

@@ -62,6 +63,7 @@

     { "framethreads",   1, NULL, ARG_FRAME_THREADS },

     { "tilethreads",    1, NULL, ARG_TILE_THREADS },

     { "verify",         1, NULL, ARG_VERIFY },

+    { "filmgrain",      1, NULL, ARG_FILM_GRAIN },

     { NULL,             0, NULL, 0 },

};

@@ -86,6 +88,7 @@

             " --version/-v:        print version and exit\n"

             " --framethreads $num: number of frame threads (default: 1)\n"

             " --tilethreads $num:  number of tile threads (default: 1)\n"

+            " --filmgrain          enable film grain application (default: 1, except if muxer is md5)\n"

             " --verify $md5:       verify decoded md5. implies --muxer md5, no output\n");

     exit(1);

@@ -124,6 +127,7 @@

     memset(cli_settings, 0, sizeof(*cli_settings));

     dav1d_default_settings(lib_settings);

+    int grain_specified = 0;

     while ((o = getopt_long(argc, argv, short_opts, long_opts, NULL)) >= 0) {

         switch (o) {

@@ -159,6 +163,11 @@

         case ARG_VERIFY:

             cli_settings->verify = optarg;

             break;

+        case ARG_FILM_GRAIN:

+            lib_settings->apply_grain =

+                !!parse_unsigned(optarg, ARG_FILM_GRAIN, argv[0]);

+            grain_specified = 1;

+            break;

         case 'v':

             fprintf(stderr, "%s\n", dav1d_version());

             exit(0);

@@ -176,6 +185,12 @@

         cli_settings->outputfile = "-";

         if (!cli_settings->muxer)

             cli_settings->muxer = "md5";

+    }

+    if (!grain_specified && cli_settings->muxer &&

+        !strcmp(cli_settings->muxer, "md5"))

+    {

+        lib_settings->apply_grain = 0;

     if (!cli_settings->inputfile)