shithub: dav1d

Download patch

ref: 36f76f424f410f1d0427a9df55a34d8485e2ec8e
parent: 502e5b92ecb00d6993440ee82afb3c34657a8c7f
author: Ronald S. Bultje <[email protected]>
date: Mon Dec 17 04:06:20 EST 2018

Remove carriage return characters

--- a/include/compat/msvc/stdatomic.h
+++ b/include/compat/msvc/stdatomic.h
@@ -23,10 +23,10 @@
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
-
-#ifndef MSCVER_STDATOMIC_H_
-#define MSCVER_STDATOMIC_H_
-
+
+#ifndef MSCVER_STDATOMIC_H_
+#define MSCVER_STDATOMIC_H_
+
 #if !defined(__cplusplus) && defined(_MSC_VER)
 
 #pragma warning(push)
@@ -37,34 +37,34 @@
 #  include_next <stdatomic.h>
 #else /* ! stdatomic.h */
 
-#include <windows.h>
-
-#include "common/attributes.h"
-
-typedef volatile LONG  __declspec(align(32)) atomic_int;
-typedef volatile ULONG __declspec(align(32)) atomic_uint;
-
-typedef enum {
-    memory_order_relaxed,
-    memory_order_acquire
-} msvc_atomic_memory_order;
-
-#define atomic_init(p_a, v)           do { *(p_a) = (v); } while(0)
-#define atomic_store(p_a, v)          InterlockedExchange((LONG*)p_a, v)
-#define atomic_load(p_a)              InterlockedCompareExchange((LONG*)p_a, 0, 0)
-#define atomic_load_explicit(p_a, mo) atomic_load(p_a)
-
-/*
- * TODO use a special call to increment/decrement
- * using InterlockedIncrement/InterlockedDecrement
- */
-#define atomic_fetch_add(p_a, inc)    InterlockedExchangeAdd(p_a, inc)
-#define atomic_fetch_sub(p_a, dec)    InterlockedExchangeAdd(p_a, -(dec))
-
-#endif /* ! stdatomic.h */
-
-#pragma warning(pop)
-
-#endif /* !defined(__cplusplus) && defined(_MSC_VER) */
-
-#endif /* MSCVER_STDATOMIC_H_ */
+#include <windows.h>
+
+#include "common/attributes.h"
+
+typedef volatile LONG  __declspec(align(32)) atomic_int;
+typedef volatile ULONG __declspec(align(32)) atomic_uint;
+
+typedef enum {
+    memory_order_relaxed,
+    memory_order_acquire
+} msvc_atomic_memory_order;
+
+#define atomic_init(p_a, v)           do { *(p_a) = (v); } while(0)
+#define atomic_store(p_a, v)          InterlockedExchange((LONG*)p_a, v)
+#define atomic_load(p_a)              InterlockedCompareExchange((LONG*)p_a, 0, 0)
+#define atomic_load_explicit(p_a, mo) atomic_load(p_a)
+
+/*
+ * TODO use a special call to increment/decrement
+ * using InterlockedIncrement/InterlockedDecrement
+ */
+#define atomic_fetch_add(p_a, inc)    InterlockedExchangeAdd(p_a, inc)
+#define atomic_fetch_sub(p_a, dec)    InterlockedExchangeAdd(p_a, -(dec))
+
+#endif /* ! stdatomic.h */
+
+#pragma warning(pop)
+
+#endif /* !defined(__cplusplus) && defined(_MSC_VER) */
+
+#endif /* MSCVER_STDATOMIC_H_ */
--- a/src/x86/itx_ssse3.asm
+++ b/src/x86/itx_ssse3.asm
@@ -31,76 +31,76 @@
 
 deint_shuf: db  0,  1,  4,  5,  8,  9, 12, 13,  2,  3,  6,  7, 10, 11, 14, 15
 
-qw_2896x8:      times 8 dw  2896*8
-qw_1567_m3784:  times 4 dw  1567, -3784
-qw_3784_1567:   times 4 dw  3784,  1567
-
-qw_1321_3803:   times 4 dw  1321,  3803
-qw_2482_m1321:  times 4 dw  2482, -1321
-qw_3344_2482:   times 4 dw  3344,  2482
-qw_3344_m3803:  times 4 dw  3344, -3803
-qw_m6688_m3803: times 4 dw -6688, -3803
-qw_3344x8:      times 8 dw  3344*8
-qw_5793x4:      times 8 dw  5793*4
-
-pd_2048:        times 4 dd  2048
-qw_2048:        times 8 dw  2048
-
+qw_2896x8:      times 8 dw  2896*8
+qw_1567_m3784:  times 4 dw  1567, -3784
+qw_3784_1567:   times 4 dw  3784,  1567
+
+qw_1321_3803:   times 4 dw  1321,  3803
+qw_2482_m1321:  times 4 dw  2482, -1321
+qw_3344_2482:   times 4 dw  3344,  2482
+qw_3344_m3803:  times 4 dw  3344, -3803
+qw_m6688_m3803: times 4 dw -6688, -3803
+qw_3344x8:      times 8 dw  3344*8
+qw_5793x4:      times 8 dw  5793*4
+
+pd_2048:        times 4 dd  2048
+qw_2048:        times 8 dw  2048
+
 iadst4_dconly1a: times 2 dw 10568, 19856, 26752, 30424
-iadst4_dconly1b: times 2 dw 30424, 26752, 19856, 10568
+iadst4_dconly1b: times 2 dw 30424, 26752, 19856, 10568
 iadst4_dconly2a: dw 10568, 10568, 10568, 10568, 19856, 19856, 19856, 19856
-iadst4_dconly2b: dw 26752, 26752, 26752, 26752, 30424, 30424, 30424, 30424
-
-SECTION .text
-
-%define m(x) mangle(private_prefix %+ _ %+ x %+ SUFFIX)
-
-%macro ITX4_END 4-5 2048 ; row[1-4], rnd
-%if %5
-    mova                 m2, [qw_%5]
-    pmulhrsw             m0, m2
-    pmulhrsw             m1, m2
-%endif
-    lea                  r2, [dstq+strideq*2]
-%assign %%i 1
-%rep 4
-    %if %1 & 2
-        CAT_XDEFINE %%row_adr, %%i, r2   + strideq*(%1&1)
-    %else
-        CAT_XDEFINE %%row_adr, %%i, dstq + strideq*(%1&1)
-    %endif
-    %assign %%i %%i + 1
-    %rotate 1
-%endrep
-
-    movd                 m2, [%%row_adr1]       ;dst0
-    movd                 m4, [%%row_adr2]       ;dst1
-    punpckldq            m2, m4                 ;high: dst1 :low: dst0
-    movd                 m3, [%%row_adr3]       ;dst2
-    movd                 m4, [%%row_adr4]       ;dst3
-    punpckldq            m3, m4                 ;high: dst3 :low: dst2
-
-    pxor                 m4, m4
-    punpcklbw            m2, m4                 ;extend byte to word
-    punpcklbw            m3, m4                 ;extend byte to word
-
-    paddw                m0, m2                 ;high: dst1 + out1 ;low: dst0 + out0
-    paddw                m1, m3                 ;high: dst3 + out3 ;low: dst2 + out2
-
-    packuswb             m0, m1                 ;high->low: dst3 + out3, dst2 + out2, dst1 + out1, dst0 + out0
-
-    movd       [%%row_adr1], m0                 ;store dst0 + out0
-    pshuflw              m1, m0, q1032
-    movd       [%%row_adr2], m1                 ;store dst1 + out1
-    punpckhqdq           m0, m0
-    movd       [%%row_adr3], m0                 ;store dst2 + out2
-    psrlq                m0, 32
-    movd       [%%row_adr4], m0                 ;store dst3 + out3
-
-    ret
-%endmacro
-
-
+iadst4_dconly2b: dw 26752, 26752, 26752, 26752, 30424, 30424, 30424, 30424
+
+SECTION .text
+
+%define m(x) mangle(private_prefix %+ _ %+ x %+ SUFFIX)
+
+%macro ITX4_END 4-5 2048 ; row[1-4], rnd
+%if %5
+    mova                 m2, [qw_%5]
+    pmulhrsw             m0, m2
+    pmulhrsw             m1, m2
+%endif
+    lea                  r2, [dstq+strideq*2]
+%assign %%i 1
+%rep 4
+    %if %1 & 2
+        CAT_XDEFINE %%row_adr, %%i, r2   + strideq*(%1&1)
+    %else
+        CAT_XDEFINE %%row_adr, %%i, dstq + strideq*(%1&1)
+    %endif
+    %assign %%i %%i + 1
+    %rotate 1
+%endrep
+
+    movd                 m2, [%%row_adr1]       ;dst0
+    movd                 m4, [%%row_adr2]       ;dst1
+    punpckldq            m2, m4                 ;high: dst1 :low: dst0
+    movd                 m3, [%%row_adr3]       ;dst2
+    movd                 m4, [%%row_adr4]       ;dst3
+    punpckldq            m3, m4                 ;high: dst3 :low: dst2
+
+    pxor                 m4, m4
+    punpcklbw            m2, m4                 ;extend byte to word
+    punpcklbw            m3, m4                 ;extend byte to word
+
+    paddw                m0, m2                 ;high: dst1 + out1 ;low: dst0 + out0
+    paddw                m1, m3                 ;high: dst3 + out3 ;low: dst2 + out2
+
+    packuswb             m0, m1                 ;high->low: dst3 + out3, dst2 + out2, dst1 + out1, dst0 + out0
+
+    movd       [%%row_adr1], m0                 ;store dst0 + out0
+    pshuflw              m1, m0, q1032
+    movd       [%%row_adr2], m1                 ;store dst1 + out1
+    punpckhqdq           m0, m0
+    movd       [%%row_adr3], m0                 ;store dst2 + out2
+    psrlq                m0, 32
+    movd       [%%row_adr4], m0                 ;store dst3 + out3
+
+    ret
+%endmacro
+
+
 ; flags: 1 = swap, 2: coef_regs
 %macro ITX_MUL2X_PACK 5-6 0 ; dst/src, tmp[1], rnd, coef[1-2], flags
 %if %6 & 2
@@ -118,27 +118,27 @@
     psrad                m%2, 12
     psrad                m%1, 12
     packssdw             m%1, m%2
-%endmacro
-
-%macro IDCT4_1D_PACKED 0-1   ;qw_2896x8
-    punpckhwd            m2, m0, m1           ;unpacked in1 in3
-    psubw                m3, m0, m1
-    paddw                m0, m1
-    punpcklqdq           m0, m3               ;high: in0-in2 ;low: in0+in2
-
-    mova                 m3, [pd_2048]
-    ITX_MUL2X_PACK 2, 1, 3, 1567, 3784
-
-%if %0 == 1
-    pmulhrsw             m0, m%1
-%else
-    pmulhrsw             m0, [qw_2896x8]     ;high: t1 ;low: t0
-%endif
-
+%endmacro
+
+%macro IDCT4_1D_PACKED 0-1   ;qw_2896x8
+    punpckhwd            m2, m0, m1           ;unpacked in1 in3
+    psubw                m3, m0, m1
+    paddw                m0, m1
+    punpcklqdq           m0, m3               ;high: in0-in2 ;low: in0+in2
+
+    mova                 m3, [pd_2048]
+    ITX_MUL2X_PACK 2, 1, 3, 1567, 3784
+
+%if %0 == 1
+    pmulhrsw             m0, m%1
+%else
+    pmulhrsw             m0, [qw_2896x8]     ;high: t1 ;low: t0
+%endif
+
     psubsw               m1, m0, m2          ;high: out2 ;low: out3
     paddsw               m0, m2              ;high: out1 ;low: out0
-%endmacro
-
+%endmacro
+
 %macro IADST4_1D_PACKED 0
     punpcklwd            m2, m0, m1                ;unpacked in0 in2
     punpckhwd            m3, m0, m1                ;unpacked in1 in3
@@ -166,27 +166,27 @@
     psrad                m2, 12                    ;out3
     packssdw             m0, m5                    ;high: out1 ;low: out0
     packssdw             m2, m2                    ;high: out3 ;low: out3
-%endmacro
-
-%macro INV_TXFM_FN 4 ; type1, type2, fast_thresh, size
-cglobal inv_txfm_add_%1_%2_%4, 4, 5, 0, dst, stride, coeff, eob, tx2
-    %undef cmp
-    lea tx2q, [m(i%2_%4_internal).pass2]
-%if %3 > 0
-    cmp                  eobd, %3
-    jle %%end
-%elif %3 == 0
-    test                 eobd, eobd
-    jz %%end
-%endif
+%endmacro
+
+%macro INV_TXFM_FN 4 ; type1, type2, fast_thresh, size
+cglobal inv_txfm_add_%1_%2_%4, 4, 5, 0, dst, stride, coeff, eob, tx2
+    %undef cmp
+    lea tx2q, [m(i%2_%4_internal).pass2]
+%if %3 > 0
+    cmp                  eobd, %3
+    jle %%end
+%elif %3 == 0
+    test                 eobd, eobd
+    jz %%end
+%endif
     call i%1_%4_internal
-    RET
-ALIGN function_align
-%%end:
-%endmacro
-
-%macro INV_TXFM_4X4_FN 2-3 -1 ; type1, type2, fast_thresh
-    INV_TXFM_FN          %1, %2, %3, 4x4
+    RET
+ALIGN function_align
+%%end:
+%endmacro
+
+%macro INV_TXFM_4X4_FN 2-3 -1 ; type1, type2, fast_thresh
+    INV_TXFM_FN          %1, %2, %3, 4x4
 %ifidn %1_%2, dct_identity
     mova                 m0, [qw_2896x8]
     pmulhrsw             m0, [coeffq]
@@ -238,35 +238,35 @@
     RET
 %endif
 %endif
-%endmacro
-
-
-INIT_XMM ssse3
-
-cglobal idct_4x4_internal, 0, 0, 4, dst, stride, coeff, eob, tx2
-    mova                 m0, [coeffq+16*0]      ;high: in1 ;low: in0
-    mova                 m1, [coeffq+16*1]      ;high: in3 ;low in2
-
-    IDCT4_1D_PACKED
-
-    mova                 m2, [deint_shuf]
-    shufps               m3, m0, m1, q1331
-    shufps               m0, m1, q0220
-    pshufb               m0, m2                 ;high: in1 ;low: in0
-    pshufb               m1, m3, m2             ;high: in3 ;low :in2
+%endmacro
+
+
+INIT_XMM ssse3
+
+cglobal idct_4x4_internal, 0, 0, 4, dst, stride, coeff, eob, tx2
+    mova                 m0, [coeffq+16*0]      ;high: in1 ;low: in0
+    mova                 m1, [coeffq+16*1]      ;high: in3 ;low in2
+
+    IDCT4_1D_PACKED
+
+    mova                 m2, [deint_shuf]
+    shufps               m3, m0, m1, q1331
+    shufps               m0, m1, q0220
+    pshufb               m0, m2                 ;high: in1 ;low: in0
+    pshufb               m1, m3, m2             ;high: in3 ;low :in2
     jmp                tx2q
 
-.pass2:
-    IDCT4_1D_PACKED
-
-    pxor                 m2, m2
-    mova      [coeffq+16*0], m2
-    mova      [coeffq+16*1], m2                 ;memset(coeff, 0, sizeof(*coeff) * sh * sw);
-
-    ITX4_END     0, 1, 3, 2
-
-INV_TXFM_4X4_FN dct, dct, 0
-
+.pass2:
+    IDCT4_1D_PACKED
+
+    pxor                 m2, m2
+    mova      [coeffq+16*0], m2
+    mova      [coeffq+16*1], m2                 ;memset(coeff, 0, sizeof(*coeff) * sh * sw);
+
+    ITX4_END     0, 1, 3, 2
+
+INV_TXFM_4X4_FN dct, dct, 0
+
 cglobal iadst_4x4_internal, 0, 0, 6, dst, stride, coeff, eob, tx2
     mova                 m0, [coeffq+16*0]
     mova                 m1, [coeffq+16*1]
@@ -391,4 +391,4 @@
     IWHT4_1D_PACKED
 
     shufpd               m0, m2, 0x01
-    ITX4_END              0, 3, 2, 1, 0
+    ITX4_END              0, 3, 2, 1, 0