ref: 14072e733465b034644dd08cfaffb3bf7ac0a310
parent: 8bfd7f2f06dc413180f7c7c795ee3b801df68601
author: Henrik Gramner <[email protected]>
date: Sun Sep 23 13:20:46 EDT 2018
Downshift mc subpel multiplier constants Downshift all the constants by one, and reduce the rounding shift by one. This is mathematically equivalent since all constants are a multiple of two, but allows for using 16-bit intermediates in the 1st pass of the 8-tap filter.
--- a/src/mc.c
+++ b/src/mc.c
@@ -102,7 +102,7 @@
src -= src_stride * 3;
do {
for (int x = 0; x < w; x++)
- mid_ptr[x] = FILTER_8TAP_RND(src, x, fh, 1, 3);
+ mid_ptr[x] = FILTER_8TAP_RND(src, x, fh, 1, 2);
mid_ptr += 128;
src += src_stride;
@@ -111,7 +111,7 @@
mid_ptr = mid + 128 * 3;
do {
for (int x = 0; x < w; x++)
- dst[x] = FILTER_8TAP_CLIP(mid_ptr, x, fv, 128, 11);
+ dst[x] = FILTER_8TAP_CLIP(mid_ptr, x, fv, 128, 10);
mid_ptr += 128;
dst += dst_stride;
@@ -119,7 +119,7 @@
} else {
do {
for (int x = 0; x < w; x++) {
- const int px = FILTER_8TAP_RND(src, x, fh, 1, 3);
+ const int px = FILTER_8TAP_RND(src, x, fh, 1, 2);
dst[x] = iclip_pixel((px + 8) >> 4);
}
@@ -130,7 +130,7 @@
} else if (fv) {
do {
for (int x = 0; x < w; x++)
- dst[x] = FILTER_8TAP_CLIP(src, x, fv, src_stride, 7);
+ dst[x] = FILTER_8TAP_CLIP(src, x, fv, src_stride, 6);
dst += dst_stride;
src += src_stride;
@@ -155,7 +155,7 @@
src -= src_stride * 3;
do {
for (int x = 0; x < w; x++)
- mid_ptr[x] = FILTER_8TAP_RND(src, x, fh, 1, 3);
+ mid_ptr[x] = FILTER_8TAP_RND(src, x, fh, 1, 2);
mid_ptr += 128;
src += src_stride;
@@ -164,7 +164,7 @@
mid_ptr = mid + 128 * 3;
do {
for (int x = 0; x < w; x++)
- tmp[x] = FILTER_8TAP_RND(mid_ptr, x, fv, 128, 7);
+ tmp[x] = FILTER_8TAP_RND(mid_ptr, x, fv, 128, 6);
mid_ptr += 128;
tmp += w;
@@ -172,7 +172,7 @@
} else {
do {
for (int x = 0; x < w; x++)
- tmp[x] = FILTER_8TAP_RND(src, x, fh, 1, 3);
+ tmp[x] = FILTER_8TAP_RND(src, x, fh, 1, 2);
tmp += w;
src += src_stride;
@@ -181,7 +181,7 @@
} else if (fv) {
do {
for (int x = 0; x < w; x++)
- tmp[x] = FILTER_8TAP_RND(src, x, fv, src_stride, 3);
+ tmp[x] = FILTER_8TAP_RND(src, x, fv, src_stride, 2);
tmp += w;
src += src_stride;
--- a/src/tables.c
+++ b/src/tables.c
@@ -29,6 +29,8 @@
#include <stdint.h>
+#include "common/attributes.h"
+
#include "src/levels.h"
#include "src/tables.h"
@@ -562,89 +564,88 @@
293, 273, 256, 241, 228, 216, 205, 195, 186, 178, 171, 164,
};
-const int8_t dav1d_mc_subpel_filters[5][15][8] = {
+ALIGN(const int8_t dav1d_mc_subpel_filters[5][15][8], 8 ) = {
[FILTER_8TAP_REGULAR] = {
- { 0, 2, -6, 126, 8, -2, 0, 0 },
- { 0, 2, -10, 122, 18, -4, 0, 0 },
- { 0, 2, -12, 116, 28, -8, 2, 0 },
- { 0, 2, -14, 110, 38, -10, 2, 0 },
- { 0, 2, -14, 102, 48, -12, 2, 0 },
- { 0, 2, -16, 94, 58, -12, 2, 0 },
- { 0, 2, -14, 84, 66, -12, 2, 0 },
- { 0, 2, -14, 76, 76, -14, 2, 0 },
- { 0, 2, -12, 66, 84, -14, 2, 0 },
- { 0, 2, -12, 58, 94, -16, 2, 0 },
- { 0, 2, -12, 48, 102, -14, 2, 0 },
- { 0, 2, -10, 38, 110, -14, 2, 0 },
- { 0, 2, -8, 28, 116, -12, 2, 0 },
- { 0, 0, -4, 18, 122, -10, 2, 0 },
- { 0, 0, -2, 8, 126, -6, 2, 0 }
- }, [FILTER_8TAP_SHARP] = {
- { -2, 2, -6, 126, 8, -2, 2, 0 },
- { -2, 6, -12, 124, 16, -6, 4, -2 },
- { -2, 8, -18, 120, 26, -10, 6, -2 },
- { -4, 10, -22, 116, 38, -14, 6, -2 },
- { -4, 10, -22, 108, 48, -18, 8, -2 },
- { -4, 10, -24, 100, 60, -20, 8, -2 },
- { -4, 10, -24, 90, 70, -22, 10, -2 },
- { -4, 12, -24, 80, 80, -24, 12, -4 },
- { -2, 10, -22, 70, 90, -24, 10, -4 },
- { -2, 8, -20, 60, 100, -24, 10, -4 },
- { -2, 8, -18, 48, 108, -22, 10, -4 },
- { -2, 6, -14, 38, 116, -22, 10, -4 },
- { -2, 6, -10, 26, 120, -18, 8, -2 },
- { -2, 4, -6, 16, 124, -12, 6, -2 },
- { 0, 2, -2, 8, 126, -6, 2, -2 }
+ { 0, 1, -3, 63, 4, -1, 0, 0 },
+ { 0, 1, -5, 61, 9, -2, 0, 0 },
+ { 0, 1, -6, 58, 14, -4, 1, 0 },
+ { 0, 1, -7, 55, 19, -5, 1, 0 },
+ { 0, 1, -7, 51, 24, -6, 1, 0 },
+ { 0, 1, -8, 47, 29, -6, 1, 0 },
+ { 0, 1, -7, 42, 33, -6, 1, 0 },
+ { 0, 1, -7, 38, 38, -7, 1, 0 },
+ { 0, 1, -6, 33, 42, -7, 1, 0 },
+ { 0, 1, -6, 29, 47, -8, 1, 0 },
+ { 0, 1, -6, 24, 51, -7, 1, 0 },
+ { 0, 1, -5, 19, 55, -7, 1, 0 },
+ { 0, 1, -4, 14, 58, -6, 1, 0 },
+ { 0, 0, -2, 9, 61, -5, 1, 0 },
+ { 0, 0, -1, 4, 63, -3, 1, 0 }
}, [FILTER_8TAP_SMOOTH] = {
- { 0, 2, 28, 62, 34, 2, 0, 0 },
- { 0, 0, 26, 62, 36, 4, 0, 0 },
- { 0, 0, 22, 62, 40, 4, 0, 0 },
- { 0, 0, 20, 60, 42, 6, 0, 0 },
- { 0, 0, 18, 58, 44, 8, 0, 0 },
- { 0, 0, 16, 56, 46, 10, 0, 0 },
- { 0, -2, 16, 54, 48, 12, 0, 0 },
- { 0, -2, 14, 52, 52, 14, -2, 0 },
- { 0, 0, 12, 48, 54, 16, -2, 0 },
- { 0, 0, 10, 46, 56, 16, 0, 0 },
- { 0, 0, 8, 44, 58, 18, 0, 0 },
- { 0, 0, 6, 42, 60, 20, 0, 0 },
- { 0, 0, 4, 40, 62, 22, 0, 0 },
- { 0, 0, 4, 36, 62, 26, 0, 0 },
- { 0, 0, 2, 34, 62, 28, 2, 0 },
- },
+ { 0, 1, 14, 31, 17, 1, 0, 0 },
+ { 0, 0, 13, 31, 18, 2, 0, 0 },
+ { 0, 0, 11, 31, 20, 2, 0, 0 },
+ { 0, 0, 10, 30, 21, 3, 0, 0 },
+ { 0, 0, 9, 29, 22, 4, 0, 0 },
+ { 0, 0, 8, 28, 23, 5, 0, 0 },
+ { 0, -1, 8, 27, 24, 6, 0, 0 },
+ { 0, -1, 7, 26, 26, 7, -1, 0 },
+ { 0, 0, 6, 24, 27, 8, -1, 0 },
+ { 0, 0, 5, 23, 28, 8, 0, 0 },
+ { 0, 0, 4, 22, 29, 9, 0, 0 },
+ { 0, 0, 3, 21, 30, 10, 0, 0 },
+ { 0, 0, 2, 20, 31, 11, 0, 0 },
+ { 0, 0, 2, 18, 31, 13, 0, 0 },
+ { 0, 0, 1, 17, 31, 14, 1, 0 }
+ }, [FILTER_8TAP_SHARP] = {
+ { -1, 1, -3, 63, 4, -1, 1, 0 },
+ { -1, 3, -6, 62, 8, -3, 2, -1 },
+ { -1, 4, -9, 60, 13, -5, 3, -1 },
+ { -2, 5, -11, 58, 19, -7, 3, -1 },
+ { -2, 5, -11, 54, 24, -9, 4, -1 },
+ { -2, 5, -12, 50, 30, -10, 4, -1 },
+ { -2, 5, -12, 45, 35, -11, 5, -1 },
+ { -2, 6, -12, 40, 40, -12, 6, -2 },
+ { -1, 5, -11, 35, 45, -12, 5, -2 },
+ { -1, 4, -10, 30, 50, -12, 5, -2 },
+ { -1, 4, -9, 24, 54, -11, 5, -2 },
+ { -1, 3, -7, 19, 58, -11, 5, -2 },
+ { -1, 3, -5, 13, 60, -9, 4, -1 },
+ { -1, 2, -3, 8, 62, -6, 3, -1 },
+ { 0, 1, -1, 4, 63, -3, 1, -1 }
/* width <= 4 */
- [3 + FILTER_8TAP_REGULAR] = {
- { 0, 0, -4, 126, 8, -2, 0, 0 },
- { 0, 0, -8, 122, 18, -4, 0, 0 },
- { 0, 0, -10, 116, 28, -6, 0, 0 },
- { 0, 0, -12, 110, 38, -8, 0, 0 },
- { 0, 0, -12, 102, 48, -10, 0, 0 },
- { 0, 0, -14, 94, 58, -10, 0, 0 },
- { 0, 0, -12, 84, 66, -10, 0, 0 },
- { 0, 0, -12, 76, 76, -12, 0, 0 },
- { 0, 0, -10, 66, 84, -12, 0, 0 },
- { 0, 0, -10, 58, 94, -14, 0, 0 },
- { 0, 0, -10, 48, 102, -12, 0, 0 },
- { 0, 0, -8, 38, 110, -12, 0, 0 },
- { 0, 0, -6, 28, 116, -10, 0, 0 },
- { 0, 0, -4, 18, 122, -8, 0, 0 },
- { 0, 0, -2, 8, 126, -4, 0, 0 }
+ }, [3 + FILTER_8TAP_REGULAR] = {
+ { 0, 0, -2, 63, 4, -1, 0, 0 },
+ { 0, 0, -4, 61, 9, -2, 0, 0 },
+ { 0, 0, -5, 58, 14, -3, 0, 0 },
+ { 0, 0, -6, 55, 19, -4, 0, 0 },
+ { 0, 0, -6, 51, 24, -5, 0, 0 },
+ { 0, 0, -7, 47, 29, -5, 0, 0 },
+ { 0, 0, -6, 42, 33, -5, 0, 0 },
+ { 0, 0, -6, 38, 38, -6, 0, 0 },
+ { 0, 0, -5, 33, 42, -6, 0, 0 },
+ { 0, 0, -5, 29, 47, -7, 0, 0 },
+ { 0, 0, -5, 24, 51, -6, 0, 0 },
+ { 0, 0, -4, 19, 55, -6, 0, 0 },
+ { 0, 0, -3, 14, 58, -5, 0, 0 },
+ { 0, 0, -2, 9, 61, -4, 0, 0 },
+ { 0, 0, -1, 4, 63, -2, 0, 0 }
}, [3 + FILTER_8TAP_SMOOTH] = {
- { 0, 0, 30, 62, 34, 2, 0, 0 },
- { 0, 0, 26, 62, 36, 4, 0, 0 },
- { 0, 0, 22, 62, 40, 4, 0, 0 },
- { 0, 0, 20, 60, 42, 6, 0, 0 },
- { 0, 0, 18, 58, 44, 8, 0, 0 },
- { 0, 0, 16, 56, 46, 10, 0, 0 },
- { 0, 0, 14, 54, 48, 12, 0, 0 },
- { 0, 0, 12, 52, 52, 12, 0, 0 },
- { 0, 0, 12, 48, 54, 14, 0, 0 },
- { 0, 0, 10, 46, 56, 16, 0, 0 },
- { 0, 0, 8, 44, 58, 18, 0, 0 },
- { 0, 0, 6, 42, 60, 20, 0, 0 },
- { 0, 0, 4, 40, 62, 22, 0, 0 },
- { 0, 0, 4, 36, 62, 26, 0, 0 },
- { 0, 0, 2, 34, 62, 30, 0, 0 }
+ { 0, 0, 15, 31, 17, 1, 0, 0 },
+ { 0, 0, 13, 31, 18, 2, 0, 0 },
+ { 0, 0, 11, 31, 20, 2, 0, 0 },
+ { 0, 0, 10, 30, 21, 3, 0, 0 },
+ { 0, 0, 9, 29, 22, 4, 0, 0 },
+ { 0, 0, 8, 28, 23, 5, 0, 0 },
+ { 0, 0, 7, 27, 24, 6, 0, 0 },
+ { 0, 0, 6, 26, 26, 6, 0, 0 },
+ { 0, 0, 6, 24, 27, 7, 0, 0 },
+ { 0, 0, 5, 23, 28, 8, 0, 0 },
+ { 0, 0, 4, 22, 29, 9, 0, 0 },
+ { 0, 0, 3, 21, 30, 10, 0, 0 },
+ { 0, 0, 2, 20, 31, 11, 0, 0 },
+ { 0, 0, 2, 18, 31, 13, 0, 0 },
+ { 0, 0, 1, 17, 31, 15, 0, 0 }
}
};