ref: afe901a624990ad98090c1a15f9d599eaff3a4cf
parent: dfadb6df41acd02daddcae0f42c64f3e25fcb4b0
author: Henrik Gramner <[email protected]>
date: Wed Oct 9 19:14:16 EDT 2019
Simplify ipred_z C code
--- a/src/ipred_tmpl.c
+++ b/src/ipred_tmpl.c
@@ -324,44 +324,37 @@
}
}
-static int get_filter_strength(const unsigned blk_wh, const unsigned d,
- const int type)
-{
- int strength = 0;
-
- if (type == 0) {
- if (blk_wh <= 8) {
- if (d >= 56) strength = 1;
- } else if (blk_wh <= 12) {
- if (d >= 40) strength = 1;
- } else if (blk_wh <= 16) {
- if (d >= 40) strength = 1;
- } else if (blk_wh <= 24) {
- if (d >= 8) strength = 1;
- if (d >= 16) strength = 2;
- if (d >= 32) strength = 3;
- } else if (blk_wh <= 32) {
- if (d >= 1) strength = 1;
- if (d >= 4) strength = 2;
- if (d >= 32) strength = 3;
+static int get_filter_strength(const int wh, const int angle, const int is_sm) {
+ if (is_sm) {
+ if (wh <= 8) {
+ if (angle >= 64) return 2;
+ if (angle >= 40) return 1;
+ } else if (wh <= 16) {
+ if (angle >= 48) return 2;
+ if (angle >= 20) return 1;
+ } else if (wh <= 24) {
+ if (angle >= 4) return 3;
} else {
- if (d >= 1) strength = 3;
+ return 3;
}
} else {
- if (blk_wh <= 8) {
- if (d >= 40) strength = 1;
- if (d >= 64) strength = 2;
- } else if (blk_wh <= 16) {
- if (d >= 20) strength = 1;
- if (d >= 48) strength = 2;
- } else if (blk_wh <= 24) {
- if (d >= 4) strength = 3;
+ if (wh <= 8) {
+ if (angle >= 56) return 1;
+ } else if (wh <= 16) {
+ if (angle >= 40) return 1;
+ } else if (wh <= 24) {
+ if (angle >= 32) return 3;
+ if (angle >= 16) return 2;
+ if (angle >= 8) return 1;
+ } else if (wh <= 32) {
+ if (angle >= 32) return 3;
+ if (angle >= 4) return 2;
+ return 1;
} else {
- if (d >= 1) strength = 3;
+ return 3;
}
}
-
- return strength;
+ return 0;
}
static void filter_edge(pixel *const out, const int sz,
@@ -451,12 +444,12 @@
for (int y = 0, xpos = dx; y < height;
y++, dst += PXSTRIDE(stride), xpos += dx)
{
- const int frac = (xpos >> 1) & 0x1F;
+ const int frac = xpos & 0x3E;
for (int x = 0, base = xpos >> 6; x < width; x++, base += base_inc) {
if (base < max_base_x) {
- const int v = top[base] * (32 - frac) + top[base + 1] * frac;
- dst[x] = iclip_pixel((v + 16) >> 5);
+ const int v = top[base] * (64 - frac) + top[base + 1] * frac;
+ dst[x] = (v + 32) >> 6;
} else {
pixel_set(&dst[x], top[max_base_x], width - x);
break;
@@ -518,30 +511,29 @@
}
*topleft = *topleft_in;
- const int min_base_x = -(1 + upsample_above);
const int base_inc_x = 1 + upsample_above;
const pixel *const left = &topleft[-(1 + upsample_left)];
- const pixel *const top = &topleft[1 + upsample_above];
- for (int y = 0, xpos = -dx; y < height;
+ for (int y = 0, xpos = ((1 + upsample_above) << 6) - dx; y < height;
y++, xpos -= dx, dst += PXSTRIDE(stride))
{
int base_x = xpos >> 6;
- const int frac_x = (xpos >> 1) & 0x1F;
+ const int frac_x = xpos & 0x3E;
for (int x = 0, ypos = (y << (6 + upsample_left)) - dy; x < width;
x++, base_x += base_inc_x, ypos -= dy)
{
int v;
-
- if (base_x >= min_base_x) {
- v = top[base_x] * (32 - frac_x) + top[base_x + 1] * frac_x;
+ if (base_x >= 0) {
+ v = topleft[base_x] * (64 - frac_x) +
+ topleft[base_x + 1] * frac_x;
} else {
const int base_y = ypos >> 6;
assert(base_y >= -(1 + upsample_left));
- const int frac_y = (ypos >> 1) & 0x1F;
- v = left[-base_y] * (32 - frac_y) + left[-(base_y + 1)] * frac_y;
+ const int frac_y = ypos & 0x3E;
+ v = left[-base_y] * (64 - frac_y) +
+ left[-(base_y + 1)] * frac_y;
}
- dst[x] = iclip_pixel((v + 16) >> 5);
+ dst[x] = (v + 32) >> 6;
}
}
}
@@ -588,13 +580,13 @@
}
const int base_inc = 1 + upsample_left;
for (int x = 0, ypos = dy; x < width; x++, ypos += dy) {
- const int frac = (ypos >> 1) & 0x1F;
+ const int frac = ypos & 0x3E;
for (int y = 0, base = ypos >> 6; y < height; y++, base += base_inc) {
if (base < max_base_y) {
- const int v = left[-base] * (32 - frac) +
+ const int v = left[-base] * (64 - frac) +
left[-(base + 1)] * frac;
- dst[y * PXSTRIDE(stride) + x] = iclip_pixel((v + 16) >> 5);
+ dst[y * PXSTRIDE(stride) + x] = (v + 32) >> 6;
} else {
do {
dst[y * PXSTRIDE(stride) + x] = left[-max_base_y];
--- a/src/x86/ipred.asm
+++ b/src/x86/ipred.asm
@@ -1361,11 +1361,11 @@
shr r5d, 6 ; base3
movhps xm0, [rsp+r5]
vpblendd m1, m2, 0xc0
- pand m2, m4, m6 ; frac << 1
+ pand m2, m4, m6 ; frac
vpblendd m0, m1, 0xf0
- psubw m1, m5, m2 ; (32 - frac) << 1
+ psubw m1, m5, m2 ; 64-frac
psllw m2, 8
- por m1, m2 ; (32-frac, frac) << 1
+ por m1, m2 ; 64-frac, frac
pmaddubsw m0, m1
paddw m6, m7 ; xpos += dx
pmulhrsw m0, m3
@@ -1462,12 +1462,12 @@
shr r5d, 6 ; base3
movhps xm0, [tlq+r5]
vpblendd m1, m2, 0xc0
- pand m2, m4, m6 ; frac << 1
+ pand m2, m4, m6 ; frac
vpblendd m0, m1, 0xf0
- psubw m1, m5, m2 ; (32 - frac) << 1
+ psubw m1, m5, m2 ; 64-frac
psllw m2, 8
pshufb m0, m8
- por m1, m2 ; (32-frac, frac) << 1
+ por m1, m2 ; 64-frac, frac
pmaddubsw m0, m1
pcmpgtw m1, m9, m6 ; base < max_base_x
pmulhrsw m0, m3
@@ -2299,12 +2299,12 @@
sar r5, 6 ; base3
movhps xm0, [tlq+r5]
vpblendd m1, m2, 0xc0
- pand m2, m4, m6 ; frac << 1
+ pand m2, m4, m6 ; frac
vpblendd m0, m1, 0xf0
- psubw m1, m5, m2 ; (32 - frac) << 1
+ psubw m1, m5, m2 ; 64-frac
psllw m2, 8
pshufb m0, m8
- por m1, m2 ; (32-frac, frac) << 1
+ por m1, m2 ; 64-frac, frac
pmaddubsw m0, m1
pcmpgtw m1, m9, m6 ; base < max_base_y
pmulhrsw m0, m3