ref: b9d4630c6dfb9bfeac7c3fa5aa59217670595f7b
parent: 79c4aa95cd1f0fd849e130aa282c632d51fb70da
author: Ronald S. Bultje <[email protected]>
date: Sun Sep 1 07:18:46 EDT 2019
Split out film grain block functions into a DSPContext
--- a/src/decode.c
+++ b/src/decode.c
@@ -42,6 +42,7 @@
#include "src/decode.h"
#include "src/dequant_tables.h"
#include "src/env.h"
+#include "src/film_grain.h"
#include "src/log.h"
#include "src/qm.h"
#include "src/recon.h"
@@ -3190,6 +3191,7 @@
dav1d_loop_filter_dsp_init_##bd##bpc(&dsp->lf); \
dav1d_loop_restoration_dsp_init_##bd##bpc(&dsp->lr); \
dav1d_mc_dsp_init_##bd##bpc(&dsp->mc); \
+ dav1d_film_grain_dsp_init_##bd##bpc(&dsp->fg); \
break
#if CONFIG_8BPC
case 8:
--- /dev/null
+++ b/src/fg_apply.h
@@ -1,0 +1,41 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_SRC_FG_APPLY_H
+#define DAV1D_SRC_FG_APPLY_H
+
+#include "dav1d/picture.h"
+
+#include "common/bitdepth.h"
+
+#include "src/film_grain.h"
+
+bitfn_decls(void dav1d_apply_grain, const Dav1dFilmGrainDSPContext *const dsp,
+ Dav1dPicture *const out,
+ const Dav1dPicture *const in);
+
+#endif /* DAV1D_SRC_FG_APPLY_H */
--- /dev/null
+++ b/src/fg_apply_tmpl.c
@@ -1,0 +1,176 @@
+/*
+ * Copyright © 2018, Niklas Haas
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stdint.h>
+
+#include "dav1d/picture.h"
+
+#include "common.h"
+#include "common/intops.h"
+#include "common/bitdepth.h"
+
+#include "fg_apply.h"
+
+static void generate_scaling(const int bitdepth,
+ const uint8_t points[][2], const int num,
+ uint8_t scaling[SCALING_SIZE])
+{
+ const int shift_x = bitdepth - 8;
+ const int scaling_size = 1 << bitdepth;
+ const int pad = 1 << shift_x;
+
+ // Fill up the preceding entries with the initial value
+ for (int i = 0; i < points[0][0] << shift_x; i++)
+ scaling[i] = points[0][1];
+
+ // Linearly interpolate the values in the middle
+ for (int i = 0; i < num - 1; i++) {
+ const int bx = points[i][0];
+ const int by = points[i][1];
+ const int ex = points[i+1][0];
+ const int ey = points[i+1][1];
+ const int dx = ex - bx;
+ const int dy = ey - by;
+ const int delta = dy * ((0x10000 + (dx >> 1)) / dx);
+ for (int x = 0; x < dx; x++) {
+ const int v = by + ((x * delta + 0x8000) >> 16);
+ scaling[(bx + x) << shift_x] = v;
+ }
+ }
+
+ // Fill up the remaining entries with the final value
+ for (int i = points[num - 1][0] << shift_x; i < scaling_size; i++)
+ scaling[i] = points[num - 1][1];
+
+ if (pad <= 1) return;
+
+ const int rnd = pad >> 1;
+ for (int i = 0; i < num - 1; i++) {
+ const int bx = points[i][0] << shift_x;
+ const int ex = points[i+1][0] << shift_x;
+ const int dx = ex - bx;
+ for (int x = 0; x < dx; x += pad) {
+ const int range = scaling[bx + x + pad] - scaling[bx + x];
+ for (int n = 1; n < pad; n++) {
+ scaling[bx + x + n] = scaling[bx + x] + ((range * n + rnd) >> shift_x);
+ }
+ }
+ }
+}
+
+#ifndef UNIT_TEST
+void bitfn(dav1d_apply_grain)(const Dav1dFilmGrainDSPContext *const dsp,
+ Dav1dPicture *const out,
+ const Dav1dPicture *const in)
+{
+ const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data;
+
+ entry grain_lut[3][GRAIN_HEIGHT][GRAIN_WIDTH];
+ uint8_t scaling[3][SCALING_SIZE];
+#if BITDEPTH != 8
+ const int bitdepth_max = (1 << out->p.bpc) - 1;
+#endif
+
+ // Generate grain LUTs as needed
+ dsp->generate_grain_y(grain_lut[0], data HIGHBD_TAIL_SUFFIX); // always needed
+ if (data->num_uv_points[0] || data->chroma_scaling_from_luma)
+ dsp->generate_grain_uv[in->p.layout - 1](grain_lut[1], grain_lut[0],
+ data, 0 HIGHBD_TAIL_SUFFIX);
+ if (data->num_uv_points[1] || data->chroma_scaling_from_luma)
+ dsp->generate_grain_uv[in->p.layout - 1](grain_lut[2], grain_lut[0],
+ data, 1 HIGHBD_TAIL_SUFFIX);
+
+ // Generate scaling LUTs as needed
+ if (data->num_y_points)
+ generate_scaling(in->p.bpc, data->y_points, data->num_y_points, scaling[0]);
+ if (data->num_uv_points[0])
+ generate_scaling(in->p.bpc, data->uv_points[0], data->num_uv_points[0], scaling[1]);
+ if (data->num_uv_points[1])
+ generate_scaling(in->p.bpc, data->uv_points[1], data->num_uv_points[1], scaling[2]);
+
+ // Copy over the non-modified planes
+ // TODO: eliminate in favor of per-plane refs
+ assert(out->stride[0] == in->stride[0]);
+ if (!data->num_y_points) {
+ memcpy(out->data[0], in->data[0], out->p.h * out->stride[0]);
+ }
+
+ if (in->p.layout != DAV1D_PIXEL_LAYOUT_I400) {
+ assert(out->stride[1] == in->stride[1]);
+ for (int i = 0; i < 2; i++) {
+ if (!data->num_uv_points[i] && !data->chroma_scaling_from_luma) {
+ const int suby = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
+ memcpy(out->data[1+i], in->data[1+i],
+ (out->p.h >> suby) * out->stride[1]);
+ }
+ }
+ }
+
+ // Synthesize grain for the affected planes
+ const int rows = (out->p.h + 31) >> 5;
+ const int ss_y = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
+ const int is_id = out->seq_hdr->mtrx == DAV1D_MC_IDENTITY;
+ for (int row = 0; row < rows; row++) {
+ const pixel *const luma_src =
+ ((pixel *) in->data[0]) + row * BLOCK_SIZE * PXSTRIDE(in->stride[0]);
+
+ if (data->num_y_points) {
+ const int bh = imin(out->p.h - row * BLOCK_SIZE, BLOCK_SIZE);
+ dsp->fgy_32x32xn(((pixel *) out->data[0]) + row * BLOCK_SIZE * PXSTRIDE(out->stride[0]),
+ luma_src, out->stride[0], &out->frame_hdr->film_grain.data,
+ out->p.w, scaling[0], grain_lut[0], bh, row HIGHBD_TAIL_SUFFIX);
+ }
+
+ const int bh = (imin(out->p.h - row * BLOCK_SIZE, BLOCK_SIZE) + ss_y) >> ss_y;
+ const ptrdiff_t uv_off = row * BLOCK_SIZE * PXSTRIDE(out->stride[1]) >> ss_y;
+ if (data->chroma_scaling_from_luma) {
+ for (int pl = 0; pl < 2; pl++)
+ dsp->fguv_32x32xn[in->p.layout - 1](((pixel *) out->data[1 + pl]) + uv_off,
+ ((const pixel *) in->data[1 + pl]) + uv_off,
+ in->stride[1], luma_src,
+ in->stride[0], out->p.w, bh,
+ &out->frame_hdr->film_grain.data,
+ grain_lut[1 + pl], scaling[0],
+ pl, row, is_id HIGHBD_TAIL_SUFFIX);
+ } else {
+ for (int pl = 0; pl < 2; pl++)
+ if (data->num_uv_points[pl])
+ dsp->fguv_32x32xn[in->p.layout - 1](((pixel *) out->data[1 + pl]) + uv_off,
+ ((const pixel *) in->data[1 + pl]) + uv_off,
+ in->stride[1], luma_src,
+ in->stride[0], out->p.w, bh,
+ &out->frame_hdr->film_grain.data,
+ grain_lut[1 + pl],
+ scaling[1 + pl], pl, row, is_id
+ HIGHBD_TAIL_SUFFIX);
+ }
+ }
+}
+#endif
--- a/src/film_grain.h
+++ b/src/film_grain.h
@@ -28,9 +28,57 @@
#ifndef DAV1D_SRC_FILM_GRAIN_H
#define DAV1D_SRC_FILM_GRAIN_H
-#include "dav1d/dav1d.h"
+#include "common/bitdepth.h"
-bitfn_decls(void dav1d_apply_grain, Dav1dPicture *const out,
- const Dav1dPicture *const in);
+#include "src/levels.h"
+
+#define GRAIN_WIDTH 82
+#define GRAIN_HEIGHT 73
+#define BLOCK_SIZE 32
+#if !defined(BITDEPTH) || BITDEPTH == 8
+#define SCALING_SIZE 256
+typedef int8_t entry;
+#else
+#define SCALING_SIZE 4096
+typedef int16_t entry;
+#endif
+
+#define decl_generate_grain_y_fn(name) \
+void (name)(entry buf[GRAIN_HEIGHT][GRAIN_WIDTH], \
+ const Dav1dFilmGrainData *const data HIGHBD_DECL_SUFFIX)
+typedef decl_generate_grain_y_fn(*generate_grain_y_fn);
+
+#define decl_generate_grain_uv_fn(name) \
+void (name)(entry buf[GRAIN_HEIGHT][GRAIN_WIDTH], \
+ const entry buf_y[GRAIN_HEIGHT][GRAIN_WIDTH], \
+ const Dav1dFilmGrainData *const data, const int uv HIGHBD_DECL_SUFFIX)
+typedef decl_generate_grain_uv_fn(*generate_grain_uv_fn);
+
+#define decl_fgy_32x32xn_fn(name) \
+void (name)(pixel *dst_row, const pixel *src_row, ptrdiff_t stride, \
+ const Dav1dFilmGrainData *data, \
+ int pw, const uint8_t scaling[SCALING_SIZE], \
+ const entry grain_lut[GRAIN_HEIGHT][GRAIN_WIDTH], \
+ int bh, int row_num HIGHBD_DECL_SUFFIX)
+typedef decl_fgy_32x32xn_fn(*fgy_32x32xn_fn);
+
+#define decl_fguv_32x32xn_fn(name) \
+void (name)(pixel *dst_row, const pixel *src_row, ptrdiff_t stride, \
+ const pixel *luma_row, ptrdiff_t luma_stride, int pw, int bh, \
+ const Dav1dFilmGrainData *data, \
+ const entry grain_lut[GRAIN_HEIGHT][GRAIN_WIDTH], \
+ const uint8_t scaling[SCALING_SIZE], \
+ int uv_pl, int row_num, int is_id HIGHBD_DECL_SUFFIX)
+typedef decl_fguv_32x32xn_fn(*fguv_32x32xn_fn);
+
+typedef struct Dav1dFilmGrainDSPContext {
+ generate_grain_y_fn generate_grain_y;
+ generate_grain_uv_fn generate_grain_uv[3];
+
+ fgy_32x32xn_fn fgy_32x32xn;
+ fguv_32x32xn_fn fguv_32x32xn[3];
+} Dav1dFilmGrainDSPContext;
+
+bitfn_decls(void dav1d_film_grain_dsp_init, Dav1dFilmGrainDSPContext *c);
#endif /* DAV1D_SRC_FILM_GRAIN_H */
--- a/src/film_grain_tmpl.c
+++ b/src/film_grain_tmpl.c
@@ -26,38 +26,16 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include "config.h"
-
-#include <stdint.h>
-
-#include "common.h"
+#include "common/attributes.h"
#include "common/intops.h"
-#include "common/bitdepth.h"
-#include "tables.h"
#include "film_grain.h"
+#include "tables.h"
-#if BITDEPTH == 8
-typedef int8_t entry;
-#else
-typedef int16_t entry;
-#endif
+#define SUB_GRAIN_WIDTH 44
+#define SUB_GRAIN_HEIGHT 38
-enum {
- GRAIN_WIDTH = 82,
- GRAIN_HEIGHT = 73,
- SUB_GRAIN_WIDTH = 44,
- SUB_GRAIN_HEIGHT = 38,
- SUB_GRAIN_OFFSET = 6,
- BLOCK_SIZE = 32,
-#if BITDEPTH == 8
- SCALING_SIZE = 256
-#else
- SCALING_SIZE = 4096
-#endif
-};
-
-static inline int get_random_number(const int bits, unsigned *state) {
+static inline int get_random_number(const int bits, unsigned *const state) {
const int r = *state;
unsigned bit = ((r >> 0) ^ (r >> 1) ^ (r >> 3) ^ (r >> 12)) & 1;
*state = (r >> 1) | (bit << 15);
@@ -69,13 +47,14 @@
return (x + ((1 << shift) >> 1)) >> shift;
}
-static void generate_grain_y(const Dav1dPicture *const in,
- entry buf[GRAIN_HEIGHT][GRAIN_WIDTH])
+static void generate_grain_y_c(entry buf[GRAIN_HEIGHT][GRAIN_WIDTH],
+ const Dav1dFilmGrainData *const data
+ HIGHBD_DECL_SUFFIX)
{
- const Dav1dFilmGrainData *data = &in->frame_hdr->film_grain.data;
+ const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
unsigned seed = data->seed;
- const int shift = 12 - in->p.bpc + data->grain_scale_shift;
- const int grain_ctr = 128 << (in->p.bpc - 8);
+ const int shift = 4 - bitdepth_min_8 + data->grain_scale_shift;
+ const int grain_ctr = 128 << bitdepth_min_8;
const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
for (int y = 0; y < GRAIN_HEIGHT; y++) {
@@ -100,25 +79,24 @@
}
}
- int grain = buf[y][x] + round2(sum, data->ar_coeff_shift);
+ const int grain = buf[y][x] + round2(sum, data->ar_coeff_shift);
buf[y][x] = iclip(grain, grain_min, grain_max);
}
}
}
-static void generate_grain_uv(const Dav1dPicture *const in, int uv,
- entry buf[GRAIN_HEIGHT][GRAIN_WIDTH],
- entry buf_y[GRAIN_HEIGHT][GRAIN_WIDTH])
+static NOINLINE void
+generate_grain_uv_c(entry buf[GRAIN_HEIGHT][GRAIN_WIDTH],
+ const entry buf_y[GRAIN_HEIGHT][GRAIN_WIDTH],
+ const Dav1dFilmGrainData *const data, const int uv,
+ const int subx, const int suby HIGHBD_DECL_SUFFIX)
{
- const Dav1dFilmGrainData *data = &in->frame_hdr->film_grain.data;
+ const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
unsigned seed = data->seed ^ (uv ? 0x49d8 : 0xb524);
- const int shift = 12 - in->p.bpc + data->grain_scale_shift;
- const int grain_ctr = 128 << (in->p.bpc - 8);
+ const int shift = 4 - bitdepth_min_8 + data->grain_scale_shift;
+ const int grain_ctr = 128 << bitdepth_min_8;
const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
- const int subx = in->p.layout != DAV1D_PIXEL_LAYOUT_I444;
- const int suby = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
-
const int chromaW = subx ? SUB_GRAIN_WIDTH : GRAIN_WIDTH;
const int chromaH = suby ? SUB_GRAIN_HEIGHT : GRAIN_HEIGHT;
@@ -166,56 +144,18 @@
}
}
-static void generate_scaling(const int bitdepth,
- const uint8_t points[][2], int num,
- uint8_t scaling[SCALING_SIZE])
-{
- const int shift_x = bitdepth - 8;
- const int scaling_size = 1 << bitdepth;
- const int pad = 1 << shift_x;
-
- // Fill up the preceding entries with the initial value
- for (int i = 0; i < points[0][0] << shift_x; i++)
- scaling[i] = points[0][1];
-
- // Linearly interpolate the values in the middle
- for (int i = 0; i < num - 1; i++) {
- const int bx = points[i][0];
- const int by = points[i][1];
- const int ex = points[i+1][0];
- const int ey = points[i+1][1];
- const int dx = ex - bx;
- const int dy = ey - by;
- const int delta = dy * ((0x10000 + (dx >> 1)) / dx);
- for (int x = 0; x < dx; x++) {
- const int v = by + ((x * delta + 0x8000) >> 16);
- scaling[(bx + x) << shift_x] = v;
- }
- }
-
- // Fill up the remaining entries with the final value
- for (int i = points[num - 1][0] << shift_x; i < scaling_size; i++)
- scaling[i] = points[num - 1][1];
-
- if (pad > 1) {
- const int rnd = pad >> 1;
- for (int i = 0; i < num - 1; i++) {
- const int bx = points[i][0] << shift_x;
- const int ex = points[i+1][0] << shift_x;
- const int dx = ex - bx;
- for (int x = 0; x < dx; x += pad) {
- const int range = scaling[bx + x + pad] - scaling[bx + x];
- for (int n = 1; n < pad; n++) {
- scaling[bx + x + n] = scaling[bx + x] + ((range * n + rnd) >> shift_x);
- }
- }
- }
- }
-}
+#define gnuv_ss_fn(nm, ss_x, ss_y) \
+static decl_generate_grain_uv_fn(generate_grain_uv_##nm##_c) { \
+ generate_grain_uv_c(buf, buf_y, data, uv, ss_x, ss_y HIGHBD_TAIL_SUFFIX); \
+}
+gnuv_ss_fn(420, 1, 1);
+gnuv_ss_fn(422, 1, 0);
+gnuv_ss_fn(444, 0, 0);
+
// samples from the correct block of a grain LUT, while taking into account the
// offsets provided by the offsets cache
-static inline entry sample_lut(entry grain_lut[GRAIN_HEIGHT][GRAIN_WIDTH],
+static inline entry sample_lut(const entry grain_lut[GRAIN_HEIGHT][GRAIN_WIDTH],
int offsets[2][2], int subx, int suby,
int bx, int by, int x, int y)
{
@@ -226,13 +166,15 @@
[offx + x + (BLOCK_SIZE >> subx) * bx];
}
-static void apply_to_row_y(Dav1dPicture *const out, const Dav1dPicture *const in,
- entry grain_lut[GRAIN_HEIGHT][GRAIN_WIDTH],
- uint8_t scaling[SCALING_SIZE], int row_num)
+static void fgy_32x32xn_c(pixel *const dst_row, const pixel *const src_row,
+ const ptrdiff_t stride,
+ const Dav1dFilmGrainData *const data, const int pw,
+ const uint8_t scaling[SCALING_SIZE],
+ const entry grain_lut[GRAIN_HEIGHT][GRAIN_WIDTH],
+ const int bh, const int row_num HIGHBD_DECL_SUFFIX)
{
- const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data;
const int rows = 1 + (data->overlap_flag && row_num > 0);
- const int bitdepth_min_8 = in->p.bpc - 8;
+ const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
const int grain_ctr = 128 << bitdepth_min_8;
const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
@@ -242,7 +184,11 @@
max_value = 235 << bitdepth_min_8;
} else {
min_value = 0;
- max_value = (1U << in->p.bpc) - 1;
+#if BITDEPTH == 8
+ max_value = 0xff;
+#else
+ max_value = bitdepth_max;
+#endif
}
// seed[0] contains the current row, seed[1] contains the previous
@@ -253,18 +199,13 @@
seed[i] ^= (((row_num - i) * 173 + 105) & 0xFF);
}
- const ptrdiff_t stride = out->stride[0];
assert(stride % (BLOCK_SIZE * sizeof(pixel)) == 0);
- assert(stride == in->stride[0]);
- pixel *const src_row = (pixel *) in->data[0] + PXSTRIDE(stride) * row_num * BLOCK_SIZE;
- pixel *const dst_row = (pixel *) out->data[0] + PXSTRIDE(stride) * row_num * BLOCK_SIZE;
int offsets[2 /* col offset */][2 /* row offset */];
// process this row in BLOCK_SIZE^2 blocks
- const int bh = imin(out->p.h - row_num * BLOCK_SIZE, BLOCK_SIZE);
- for (int bx = 0; bx < out->p.w; bx += BLOCK_SIZE) {
- const int bw = imin(BLOCK_SIZE, out->p.w - bx);
+ for (int bx = 0; bx < pw; bx += BLOCK_SIZE) {
+ const int bw = imin(BLOCK_SIZE, pw - bx);
if (data->overlap_flag && bx) {
// shift previous offsets left
@@ -282,11 +223,11 @@
static const int w[2][2] = { { 27, 17 }, { 17, 27 } };
-#define add_noise_y(x, y, grain) \
- pixel *src = src_row + (y) * PXSTRIDE(stride) + (bx + (x)); \
- pixel *dst = dst_row + (y) * PXSTRIDE(stride) + (bx + (x)); \
- int noise = round2(scaling[ *src ] * (grain), data->scaling_shift); \
- *dst = iclip(*src + noise, min_value, max_value);
+#define add_noise_y(x, y, grain) \
+ const pixel *const src = src_row + (y) * PXSTRIDE(stride) + (x) + bx; \
+ pixel *const dst = dst_row + (y) * PXSTRIDE(stride) + (x) + bx; \
+ const int noise = round2(scaling[ *src ] * (grain), data->scaling_shift); \
+ *dst = iclip(*src + noise, min_value, max_value);
for (int y = ystart; y < bh; y++) {
// Non-overlapped image region (straightforward)
@@ -338,14 +279,18 @@
}
}
-static void apply_to_row_uv(Dav1dPicture *const out, const Dav1dPicture *const in,
- entry grain_lut[GRAIN_HEIGHT][GRAIN_WIDTH],
- uint8_t scaling[SCALING_SIZE], int uv, int row_num)
+static NOINLINE void
+fguv_32x32xn_c(pixel *const dst_row, const pixel *const src_row,
+ const ptrdiff_t stride, const pixel *const luma_row,
+ const ptrdiff_t luma_stride, const int pw, const int bh,
+ const Dav1dFilmGrainData *const data,
+ const entry grain_lut[GRAIN_HEIGHT][GRAIN_WIDTH],
+ const uint8_t scaling[SCALING_SIZE],
+ const int uv, const int row_num, const int is_id,
+ const int sx, const int sy HIGHBD_DECL_SUFFIX)
{
- const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data;
const int rows = 1 + (data->overlap_flag && row_num > 0);
- const int bitdepth_max = (1 << in->p.bpc) - 1;
- const int bitdepth_min_8 = in->p.bpc - 8;
+ const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
const int grain_ctr = 128 << bitdepth_min_8;
const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
@@ -352,19 +297,16 @@
int min_value, max_value;
if (data->clip_to_restricted_range) {
min_value = 16 << bitdepth_min_8;
- if (out->seq_hdr->mtrx == DAV1D_MC_IDENTITY) {
- max_value = 235 << bitdepth_min_8;
- } else {
- max_value = 240 << bitdepth_min_8;
- }
+ max_value = (is_id ? 235 : 240) << bitdepth_min_8;
} else {
min_value = 0;
+#if BITDEPTH == 8
+ max_value = 0xff;
+#else
max_value = bitdepth_max;
+#endif
}
- const int sx = in->p.layout != DAV1D_PIXEL_LAYOUT_I444;
- const int sy = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
-
// seed[0] contains the current row, seed[1] contains the previous
unsigned seed[2];
for (int i = 0; i < rows; i++) {
@@ -373,21 +315,13 @@
seed[i] ^= (((row_num - i) * 173 + 105) & 0xFF);
}
- const ptrdiff_t stride = out->stride[1];
assert(stride % (BLOCK_SIZE * sizeof(pixel)) == 0);
- assert(stride == in->stride[1]);
- const int by = row_num * (BLOCK_SIZE >> sy);
- pixel *const dst_row = (pixel *) out->data[1 + uv] + PXSTRIDE(stride) * by;
- pixel *const src_row = (pixel *) in->data[1 + uv] + PXSTRIDE(stride) * by;
- pixel *const luma_row = (pixel *) in->data[0] + PXSTRIDE(in->stride[0]) * row_num * BLOCK_SIZE;
-
int offsets[2 /* col offset */][2 /* row offset */];
// process this row in BLOCK_SIZE^2 blocks (subsampled)
- const int bh = (imin(out->p.h - row_num * BLOCK_SIZE, BLOCK_SIZE) + sy) >> sy;
- for (int bx = 0; bx < (out->p.w + sx) >> sx; bx += BLOCK_SIZE >> sx) {
- const int bw = (imin(BLOCK_SIZE, out->p.w - (bx << sx)) + sx) >> sx;
+ for (int bx = 0; bx < (pw + sx) >> sx; bx += BLOCK_SIZE >> sx) {
+ const int bw = (imin(BLOCK_SIZE, pw - (bx << sx)) + sx) >> sx;
if (data->overlap_flag && bx) {
// shift previous offsets left
for (int i = 0; i < rows; i++)
@@ -407,25 +341,23 @@
{ { 23, 22 } },
};
-#define add_noise_uv(x, y, grain) \
- const int lx = (bx + x) << sx; \
- const int ly = y << sy; \
- pixel *luma = luma_row + ly * PXSTRIDE(in->stride[0]) + lx; \
- pixel avg = luma[0]; \
- if (sx && lx + 1 < out->p.w) \
- avg = (avg + luma[1] + 1) >> 1; \
- \
- pixel *src = src_row + (y) * PXSTRIDE(stride) + (bx + (x)); \
- pixel *dst = dst_row + (y) * PXSTRIDE(stride) + (bx + (x)); \
- int val = avg; \
- if (!data->chroma_scaling_from_luma) { \
- int combined = avg * data->uv_luma_mult[uv] + \
- *src * data->uv_mult[uv]; \
- val = iclip_pixel( (combined >> 6) + \
- (data->uv_offset[uv] * (1 << bitdepth_min_8)) ); \
- } \
- \
- int noise = round2(scaling[ val ] * (grain), data->scaling_shift); \
+#define add_noise_uv(x, y, grain) \
+ const int lx = (bx + x) << sx; \
+ const int ly = y << sy; \
+ const pixel *const luma = luma_row + ly * PXSTRIDE(luma_stride) + lx; \
+ pixel avg = luma[0]; \
+ if (sx && lx + 1 < pw) \
+ avg = (avg + luma[1] + 1) >> 1; \
+ const pixel *const src = src_row + (y) * PXSTRIDE(stride) + (bx + (x)); \
+ pixel *const dst = dst_row + (y) * PXSTRIDE(stride) + (bx + (x)); \
+ int val = avg; \
+ if (!data->chroma_scaling_from_luma) { \
+ const int combined = avg * data->uv_luma_mult[uv] + \
+ *src * data->uv_mult[uv]; \
+ val = iclip_pixel( (combined >> 6) + \
+ (data->uv_offset[uv] * (1 << bitdepth_min_8)) ); \
+ } \
+ const int noise = round2(scaling[ val ] * (grain), data->scaling_shift); \
*dst = iclip(*src + noise, min_value, max_value);
for (int y = ystart; y < bh; y++) {
@@ -478,61 +410,25 @@
}
}
-void bitfn(dav1d_apply_grain)(Dav1dPicture *const out,
- const Dav1dPicture *const in)
-{
- const Dav1dFilmGrainData *const data = &out->frame_hdr->film_grain.data;
+#define fguv_ss_fn(nm, ss_x, ss_y) \
+static decl_fguv_32x32xn_fn(fguv_32x32xn_##nm##_c) { \
+ fguv_32x32xn_c(dst_row, src_row, stride, luma_row, luma_stride, pw, bh, \
+ data, grain_lut, scaling, uv_pl, row_num, is_id, ss_x, ss_y \
+ HIGHBD_TAIL_SUFFIX); \
+}
- entry grain_lut[3][GRAIN_HEIGHT][GRAIN_WIDTH];
- uint8_t scaling[3][SCALING_SIZE];
+fguv_ss_fn(420, 1, 1);
+fguv_ss_fn(422, 1, 0);
+fguv_ss_fn(444, 0, 0);
- // Generate grain LUTs as needed
- generate_grain_y(out, grain_lut[0]); // always needed
- if (data->num_uv_points[0] || data->chroma_scaling_from_luma)
- generate_grain_uv(out, 0, grain_lut[1], grain_lut[0]);
- if (data->num_uv_points[1] || data->chroma_scaling_from_luma)
- generate_grain_uv(out, 1, grain_lut[2], grain_lut[0]);
+COLD void bitfn(dav1d_film_grain_dsp_init)(Dav1dFilmGrainDSPContext *const c) {
+ c->generate_grain_y = generate_grain_y_c;
+ c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = generate_grain_uv_420_c;
+ c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = generate_grain_uv_422_c;
+ c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I444 - 1] = generate_grain_uv_444_c;
- // Generate scaling LUTs as needed
- if (data->num_y_points)
- generate_scaling(in->p.bpc, data->y_points, data->num_y_points, scaling[0]);
- if (data->num_uv_points[0])
- generate_scaling(in->p.bpc, data->uv_points[0], data->num_uv_points[0], scaling[1]);
- if (data->num_uv_points[1])
- generate_scaling(in->p.bpc, data->uv_points[1], data->num_uv_points[1], scaling[2]);
-
- // Copy over the non-modified planes
- // TODO: eliminate in favor of per-plane refs
- if (!data->num_y_points) {
- assert(out->stride[0] == in->stride[0]);
- memcpy(out->data[0], in->data[0], out->p.h * out->stride[0]);
- }
-
- if (in->p.layout != DAV1D_PIXEL_LAYOUT_I400) {
- for (int i = 0; i < 2; i++) {
- if (!data->num_uv_points[i] && !data->chroma_scaling_from_luma) {
- const int suby = in->p.layout == DAV1D_PIXEL_LAYOUT_I420;
- assert(out->stride[1] == in->stride[1]);
- memcpy(out->data[1+i], in->data[1+i],
- (out->p.h >> suby) * out->stride[1]);
- }
- }
- }
-
- // Synthesize grain for the affected planes
- int rows = (out->p.h + 31) >> 5;
- for (int row = 0; row < rows; row++) {
- if (data->num_y_points)
- apply_to_row_y(out, in, grain_lut[0], scaling[0], row);
-
- if (data->chroma_scaling_from_luma) {
- apply_to_row_uv(out, in, grain_lut[1], scaling[0], 0, row);
- apply_to_row_uv(out, in, grain_lut[2], scaling[0], 1, row);
- } else {
- if (data->num_uv_points[0])
- apply_to_row_uv(out, in, grain_lut[1], scaling[1], 0, row);
- if (data->num_uv_points[1])
- apply_to_row_uv(out, in, grain_lut[2], scaling[2], 1, row);
- }
- }
+ c->fgy_32x32xn = fgy_32x32xn_c;
+ c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = fguv_32x32xn_420_c;
+ c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = fguv_32x32xn_422_c;
+ c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = fguv_32x32xn_444_c;
}
--- a/src/internal.h
+++ b/src/internal.h
@@ -42,6 +42,7 @@
#include "src/cdf.h"
#include "src/data.h"
#include "src/env.h"
+#include "src/film_grain.h"
#include "src/intra_edge.h"
#include "src/ipred.h"
#include "src/itx.h"
@@ -57,6 +58,7 @@
#include "src/thread.h"
typedef struct Dav1dDSPContext {
+ Dav1dFilmGrainDSPContext fg;
Dav1dIntraPredDSPContext ipred;
Dav1dMCDSPContext mc;
Dav1dInvTxfmDSPContext itx;
--- a/src/lib.c
+++ b/src/lib.c
@@ -37,6 +37,7 @@
#include "common/mem.h"
#include "common/validate.h"
+#include "src/fg_apply.h"
#include "src/internal.h"
#include "src/log.h"
#include "src/obu.h"
@@ -44,7 +45,6 @@
#include "src/ref.h"
#include "src/thread_task.h"
#include "src/wedge.h"
-#include "src/film_grain.h"
static COLD void init_internal(void) {
dav1d_init_wedge_masks();
@@ -290,13 +290,13 @@
switch (out->p.bpc) {
#if CONFIG_8BPC
case 8:
- dav1d_apply_grain_8bpc(out, in);
+ dav1d_apply_grain_8bpc(&c->dsp[0].fg, out, in);
break;
#endif
#if CONFIG_16BPC
case 10:
case 12:
- dav1d_apply_grain_16bpc(out, in);
+ dav1d_apply_grain_16bpc(&c->dsp[(out->p.bpc >> 1) - 4].fg, out, in);
break;
#endif
default:
--- a/src/meson.build
+++ b/src/meson.build
@@ -55,6 +55,7 @@
libdav1d_tmpl_sources = files(
'cdef_apply_tmpl.c',
'cdef_tmpl.c',
+ 'fg_apply_tmpl.c',
'film_grain_tmpl.c',
'ipred_prepare_tmpl.c',
'ipred_tmpl.c',