shithub: dav1d

Download patch

ref: 69dae683fdce08dabf406787cdd8d1f76a9f0437
parent: a9315f5fde02530f64358375c3d2444a506b3a58
author: Henrik Gramner <[email protected]>
date: Thu Sep 5 18:50:28 EDT 2019

x86: Fix buffer overead in mc put

For w <= 32 we can't process more than two rows per loop iteration.

Credit to OSS-Fuzz.

--- a/src/x86/mc.asm
+++ b/src/x86/mc.asm
@@ -170,8 +170,6 @@
 .put:
     movzx                wd, word [t2+wq*2+table_offset(put,)]
     add                  wq, t2
-    lea                  t1, [ssq*3]
-    lea                  t2, [dsq*3]
     jmp                  wq
 .put_w2:
     movzx               t0d, word [srcq+ssq*0]
@@ -194,11 +192,11 @@
     jg .put_w4
     RET
 .put_w8:
-    movq                 m0, [srcq+ssq*0]
-    movq                 m1, [srcq+ssq*1]
+    mov                  t0, [srcq+ssq*0]
+    mov                  t1, [srcq+ssq*1]
     lea                srcq, [srcq+ssq*2]
-    movq       [dstq+dsq*0], m0
-    movq       [dstq+dsq*1], m1
+    mov        [dstq+dsq*0], t0
+    mov        [dstq+dsq*1], t1
     lea                dstq, [dstq+dsq*2]
     sub                  hd, 2
     jg .put_w8
@@ -206,15 +204,11 @@
 .put_w16:
     movu                 m0, [srcq+ssq*0]
     movu                 m1, [srcq+ssq*1]
-    movu                 m2, [srcq+ssq*2]
-    movu                 m3, [srcq+t1   ]
-    lea                srcq, [srcq+ssq*4]
+    lea                srcq, [srcq+ssq*2]
     mova       [dstq+dsq*0], m0
     mova       [dstq+dsq*1], m1
-    mova       [dstq+dsq*2], m2
-    mova       [dstq+t2   ], m3
-    lea                dstq, [dstq+dsq*4]
-    sub                  hd, 4
+    lea                dstq, [dstq+dsq*2]
+    sub                  hd, 2
     jg .put_w16
     RET
 INIT_YMM avx2
@@ -221,15 +215,11 @@
 .put_w32:
     movu                 m0, [srcq+ssq*0]
     movu                 m1, [srcq+ssq*1]
-    movu                 m2, [srcq+ssq*2]
-    movu                 m3, [srcq+t1   ]
-    lea                srcq, [srcq+ssq*4]
+    lea                srcq, [srcq+ssq*2]
     mova       [dstq+dsq*0], m0
     mova       [dstq+dsq*1], m1
-    mova       [dstq+dsq*2], m2
-    mova       [dstq+t2   ], m3
-    lea                dstq, [dstq+dsq*4]
-    sub                  hd, 4
+    lea                dstq, [dstq+dsq*2]
+    sub                  hd, 2
     jg .put_w32
     RET
 .put_w64:
--- a/src/x86/mc_ssse3.asm
+++ b/src/x86/mc_ssse3.asm
@@ -177,7 +177,6 @@
 .put:
     movzx                wd, word [t0+wq*2+table_offset(put,)]
     add                  wq, t0
-    lea                  r6, [ssq*3]
     RESTORE_DSQ_32       t0
     jmp                  wq
 .put_w2:
@@ -211,20 +210,14 @@
     jg .put_w8
     RET
 .put_w16:
-    lea                  r4, [dsq*3]
-.put_w16_in:
     movu                 m0, [srcq+ssq*0]
     movu                 m1, [srcq+ssq*1]
-    movu                 m2, [srcq+ssq*2]
-    movu                 m3, [srcq+r6   ]
-    lea                srcq, [srcq+ssq*4]
+    lea                srcq, [srcq+ssq*2]
     mova       [dstq+dsq*0], m0
     mova       [dstq+dsq*1], m1
-    mova       [dstq+dsq*2], m2
-    mova       [dstq+r4   ], m3
-    lea                dstq, [dstq+dsq*4]
-    sub                  hd, 4
-    jg .put_w16_in
+    lea                dstq, [dstq+dsq*2]
+    sub                  hd, 2
+    jg .put_w16
     RET
 .put_w32:
     movu                 m0, [srcq+ssq*0+16*0]