shithub: dav1d

Download patch

ref: d5d560ff367f111e0fd62664e9473643d2afbaff
parent: 2989cb3f79e34d561dd2311532b1cb9b182e1170
author: Sigrid Haflínudóttir <[email protected]>
date: Sat Sep 5 12:58:14 EDT 2020

fix atomics and few other things to make decoding work

--- a/src/av19.c
+++ b/src/av19.c
@@ -1,14 +1,48 @@
 #include "dav1d.h"
 #include "tools/input/input.h"
+#include <draw.h>
+#include <memdraw.h>
+#include <keyboard.h>
 
-int mainstacksize = 65536*8;
+int mainstacksize = 512*1024;
 
+static uchar *buf;
+static int nfr;
+
+static int
+dav1d_loadimage(Image **oim, Dav1dPicture *p)
+{
+	Image *im;
+	int w, h;
+
+	w = p->p.w;
+	h = p->p.h;
+
+	if(*oim == nil){
+		*oim = allocimage(display, Rect(0,0,w,h), RGB24, 0, 0);
+		buf = malloc(w * h * 3);
+	}
+	im = *oim;
+
+	/* FIXME convert YUV → RGB24 */
+	USED(im);
+
+	return 0;
+}
+
+static int
+info(void *, char *)
+{
+	print("frames: %d\n", nfr);
+	return 0;
+}
+
 void
 threadmain(int argc, char **argv)
 {
 	Dav1dSettings av1s;
 	Dav1dContext *c;
-	Dav1dPicture p;
+	Dav1dPicture *p;
 	Dav1dData data;
 	DemuxerContext *dc;
 	unsigned fps[2], timebase[2], total;
@@ -17,30 +51,35 @@
 	if(argc != 2)
 		sysfatal("usage");
 
-	dav1d_default_settings(&av1s);
-	c = nil;
 	if(input_open(&dc, "ivf", argv[1], fps, &total, timebase) < 0)
 		sysfatal("input_open");
-	av1s.n_frame_threads = 1;
-	av1s.n_tile_threads = 1;
-	if(dav1d_open(&c, &av1s) != 0)
-		sysfatal("dav1d_open");
-
 	if(input_read(dc, &data) < 0)
 		sysfatal("input_read");
 
+	dav1d_default_settings(&av1s);
+	av1s.n_frame_threads = 1; // FIXME threads
+	av1s.n_tile_threads = 1; // FIXME threads
+
+	if(dav1d_open(&c, &av1s) != 0)
+		sysfatal("dav1d_open");
+
+	threadnotify(info, 1);
+	nfr = 0;
 	do{
-	    fprint(2, "data.sz: %zd\n", data.sz);
-		memset(&p, 0, sizeof(p));
 		res = dav1d_send_data(c, &data);
 		if(res < 0 && res != DAV1D_ERR(EAGAIN))
 			sysfatal("dav1d_send_data: %d", res);
-		fprint(2, "dav1d_send_data: %d\n", res);
-		if((res = dav1d_get_picture(c, &p)) < 0){
-			if(res != DAV1D_ERR(EAGAIN))
-				sysfatal("dav1d_get_picture");
+		else{
+			p = calloc(1, sizeof(*p));
+			if((res = dav1d_get_picture(c, p)) < 0){
+				if(res != DAV1D_ERR(EAGAIN))
+					sysfatal("dav1d_get_picture");
+			}else{
+				dav1d_picture_unref(p);
+				free(p);
+				nfr++;
+			}
 		}
-		fprint(2, "dav1d_get_picture: %d\n", res);
 	}while(data.sz > 0 || input_read(dc, &data) == 0);
 
 	if(data.sz > 0)
--- a/src/cdef.h
+++ b/src/cdef.h
@@ -60,7 +60,7 @@
 int (name)(const pixel *dst, ptrdiff_t dst_stride, unsigned *var HIGHBD_DECL_SUFFIX)
 typedef decl_cdef_dir_fn(*cdef_dir_fn);
 
-typedef struct  {
+typedef struct Dav1dCdefDSPContext {
     cdef_dir_fn dir;
     cdef_fn fb[3 /* 444/luma, 422, 420 */];
 } Dav1dCdefDSPContext;
--- a/src/decode.c
+++ b/src/decode.c
@@ -700,7 +700,6 @@
                     const enum BlockPartition bp,
                     const enum EdgeFlags intra_edge_flags)
 {
-	static int decode_b_index = 0;
     Dav1dTileState *const ts = t->ts;
     const Dav1dFrameContext *const f = t->f;
     Av1Block b_mem, *const b = f->frame_thread.pass ?
--- a/src/levels.h
+++ b/src/levels.h
@@ -243,6 +243,7 @@
     INTER_INTRA_WEDGE,
 };
 
+#pragma pack on
 typedef union mv {
     struct {
         int16_t y, x;
@@ -249,6 +250,7 @@
     };
     uint32_t n;
 } mv;
+#pragma pack off
 
 enum MotionMode {
     MM_TRANSLATION,
--- a/src/lib.c
+++ b/src/lib.c
@@ -489,7 +489,7 @@
             f->frame_thread.die = 1;
             pthread_cond_signal(&f->frame_thread.td.cond);
             pthread_mutex_unlock(&f->frame_thread.td.lock);
-            pthread_join(&f->frame_thread.td.thread, NULL);
+            pthread_join(f->frame_thread.td.thread, NULL);
             freep(&f->frame_thread.b);
             dav1d_freep_aligned(&f->frame_thread.pal_idx);
             dav1d_freep_aligned(&f->frame_thread.cf);
@@ -516,7 +516,7 @@
             for (int m = 0; m < f->n_tc; m++) {
                 Dav1dTileContext *const t = &f->tc[m];
                 if (f->n_tc > 1 && t->tile_thread.td.inited) {
-                    pthread_join(&t->tile_thread.td.thread, NULL);
+                    pthread_join(t->tile_thread.td.thread, NULL);
                     pthread_mutex_destroy(&t->tile_thread.td.lock);
                     pthread_cond_destroy(&t->tile_thread.td.cond);
                 }
--- a/src/mkfile
+++ b/src/mkfile
@@ -1,6 +1,6 @@
 </$objtype/mkfile
 
-CFLAGS=$CFLAGS -I../include/dav1d -I.. -I../include -Iplan9 -p -D__plan9__ -DBITDEPTH=8
+CFLAGS=$CFLAGS -DNDEBUG -I../include/dav1d -I.. -I../include -Iplan9 -p -D__plan9__ -DBITDEPTH=8
 
 HFILES=\
 
--- a/src/mkfile.lib
+++ b/src/mkfile.lib
@@ -1,13 +1,18 @@
 </$objtype/mkfile
 
 LIB=/$objtype/lib/libdav1d.a
-CFLAGS=$CFLAGS -I.. -I../include -I../include/dav1d -Iplan9 -p -D__plan9__
 
+CFLAGS=$CFLAGS -DNDEBUG -I../include/dav1d -I.. -I../include -Iplan9 -p -D__plan9__ -DBITDEPTH=8
+
 HFILES=\
 
 OFILES=\
+	annexb.$O\
+	av19.$O\
 	cdef_apply_tmpl.$O\
+	cdef_apply_tmpl16.$O\
 	cdef_tmpl.$O\
+	cdef_tmpl16.$O\
 	cdf.$O\
 	cpu.$O\
 	data.$O\
@@ -14,36 +19,54 @@
 	decode.$O\
 	dequant_tables.$O\
 	fg_apply_tmpl.$O\
+	fg_apply_tmpl16.$O\
 	film_grain_tmpl.$O\
+	film_grain_tmpl16.$O\
 	getbits.$O\
+	input.$O\
 	intra_edge.$O\
 	ipred_prepare_tmpl.$O\
+	ipred_prepare_tmpl16.$O\
 	ipred_tmpl.$O\
+	ipred_tmpl16.$O\
 	itx_1d.$O\
 	itx_tmpl.$O\
+	itx_tmpl16.$O\
+	ivf.$O\
 	lf_apply_tmpl.$O\
+	lf_apply_tmpl16.$O\
 	lf_mask.$O\
 	lib.$O\
 	log.$O\
 	loopfilter_tmpl.$O\
+	loopfilter_tmpl16.$O\
 	looprestoration_tmpl.$O\
+	looprestoration_tmpl16.$O\
 	lr_apply_tmpl.$O\
+	lr_apply_tmpl16.$O\
 	mc_tmpl.$O\
+	mc_tmpl16.$O\
 	msac.$O\
 	obu.$O\
 	picture.$O\
+	plan9_builtins.$O\
+	plan9_thread.$O\
 	qm.$O\
 	recon_tmpl.$O\
+	recon_tmpl16.$O\
 	ref.$O\
 	refmvs.$O\
 	scan.$O\
+	section5.$O\
 	tables.$O\
 	thread_task.$O\
 	warpmv.$O\
 	wedge.$O\
-	plan9_thread.$O\
 
 /sys/include/%.h:
 	cp ../include/$stem.h /sys/include/$stem.h
 
 </sys/src/cmd/mksyslib
+
+%16.$O: %.c
+	$CC $CFLAGS '-DBITDEPTH=16' -o $target $prereq
--- a/src/obu.c
+++ b/src/obu.c
@@ -548,9 +548,8 @@
             }
         }
         for (int i = 0; i < 7; i++) {
-            if (!hdr->frame_ref_short_signaling){
+            if (!hdr->frame_ref_short_signaling)
                 hdr->refidx[i] = dav1d_get_bits(gb, 3);
-            }
             if (seqhdr->frame_id_numbers_present)
                 dav1d_get_bits(gb, seqhdr->delta_frame_id_n_bits);
         }
@@ -920,7 +919,7 @@
         const unsigned poc = hdr->frame_offset;
         unsigned off_before = 0xFFFFFFFFU;
         int off_after = -1;
-        int off_before_idx = 0, off_after_idx = 0;;
+        int off_before_idx = 0, off_after_idx = 0;
         for (int i = 0; i < 7; i++) {
             if (!c->refs[hdr->refidx[i]].p.p.data[0]) return DAV1D_ERR(EINVAL);
             const unsigned refpoc = c->refs[hdr->refidx[i]].p.p.frame_hdr->frame_offset;
--- a/src/picture.c
+++ b/src/picture.c
@@ -31,15 +31,6 @@
 #include <stdint.h>
 #ifndef __plan9__
 #include <stdio.h>
-#else
-#define errno -1
-static char *
-strerror(int)
-{
-	static char e[ERRMAX];
-	errstr(e, sizeof(e));
-	return e;
-}
 #endif
 #include <stdlib.h>
 #include <string.h>
@@ -160,7 +151,11 @@
     if (!(p->ref = dav1d_ref_wrap(p->data[0], free_buffer, pic_ctx))) {
         p_allocator->release_picture_callback(p, p_allocator->cookie);
         free(pic_ctx);
+#ifndef __plan9__
         dav1d_log(c, "Failed to wrap picture: %s\n", strerror(errno));
+#else
+        dav1d_log(c, "Failed to wrap picture\n");
+#endif
         return DAV1D_ERR(ENOMEM);
     }
 
--- a/src/plan9/plan9.h
+++ b/src/plan9/plan9.h
@@ -11,12 +11,12 @@
 #include <stdio.h>
 #include </sys/include/thread.h>
 
-#define EINVAL -1
-#define ENOMEM -2
-#define ENOPROTOOPT -3
-#define ERANGE -4
-#define EAGAIN -5
-#define EIO -6
+#define EINVAL -15
+#define ENOMEM -16
+#define ENOPROTOOPT -17
+#define ERANGE -18
+#define EAGAIN -19
+#define EIO -20
 
 typedef s8int int8_t;
 typedef u8int uint8_t;
@@ -24,14 +24,14 @@
 typedef u16int uint16_t;
 typedef s32int int32_t;
 typedef u32int uint32_t;
-typedef s64int int64_t;
-typedef u64int uint64_t;
-typedef uintptr size_t;
-typedef intptr ptrdiff_t;
-typedef intptr intptr_t;
+typedef vlong int64_t;
+typedef uvlong uint64_t;
+typedef uvlong size_t;
+typedef vlong ptrdiff_t;
+typedef vlong intptr_t;
 typedef uintptr uintptr_t;
 typedef long atomic_int;
-typedef ulong atomic_uint;
+typedef long atomic_uint;
 
 #define __func__ "no."
 #define __attribute__(a)
@@ -41,18 +41,19 @@
 #define INT_MIN (-INT_MAX-1)
 #define INT16_MIN ((int16_t)0x8000)
 #define INT16_MAX 0x7fff
-#define INT64_MIN ((int64_t)0x8000000000000000LL)
+#define INT64_MIN ((int64_t)0x8000000000000000ULL)
 #define SHRT_MAX 0x7fff
 #define SHRT_MIN (-SHRT_MAX-1)
-#define PRIu64 "llu"
+#define PRIu64 "zu"
 
 #define memory_order_acquire 0
+#define memory_order_relaxed 1
 #define atomic_init(p,v) do { *(p) = (v); } while(0)
 #define atomic_store(p,v) do { *(p) = (v); } while(0)
 #define atomic_load(p) *(p)
 #define atomic_load_explicit(p,o) *(p)
-#define atomic_fetch_add(p,i) (*(p) += i)
-#define atomic_fetch_sub(p,i) (*(p) -= i)
+#define atomic_fetch_add(i, v) (ainc(i)-1) /* let's hope v is always 1 */
+#define atomic_fetch_sub(i, v) (adec(i)+1) /* let's hope v is always 1 */
 
 int __builtin_ctz(unsigned int x);
 int __builtin_clz(unsigned int x);
@@ -61,6 +62,6 @@
 #define _aligned_malloc(sz, align) mallocalign(sz, align, 0, 0)
 #define _aligned_free(p) free(p)
 
-#define llabs(a) ((vlong)(a)<0?-(vlong)(a):(vlong)(a))
+#define llabs(a) ((a)<0?-(a):(a))
 
 #endif
--- a/src/plan9_builtins.c
+++ b/src/plan9_builtins.c
@@ -1,3 +1,5 @@
+/* FIXME bring back amd64 assembly for this */
+
 int
 __builtin_ctz(unsigned int x)
 {
--- a/src/plan9_thread.c
+++ b/src/plan9_thread.c
@@ -26,7 +26,7 @@
 {
 	uint stack;
 
-	stack = attr->stack_size > 65536 ? attr->stack_size : 65536;
+	stack = attr->stack_size > mainstacksize ? attr->stack_size : mainstacksize;
 	thread->waitchan = chancreate(sizeof(void*), 0);
 	thread->func = func;
 	thread->arg = arg;
@@ -36,7 +36,7 @@
 }
 
 int
-pthread_join(pthread_t *thread, void **res)
+dav1d_pthread_join(pthread_t *thread, void **res)
 {
 	// FIXME this is wrong ofc
 	if (thread->waitchan != nil) {
--- a/src/thread.h
+++ b/src/thread.h
@@ -165,9 +165,10 @@
 } pthread_attr_t;
 
 void dav1d_set_thread_name(const char *const name);
+int dav1d_pthread_join(pthread_t *thread, void **res);
 int pthread_create(pthread_t *thread, const pthread_attr_t *attr, void *(*func)(void*), void *arg);
-int pthread_join(pthread_t *thread, void **res);
 int pthread_once(pthread_once_t *once_control, void (*init_routine)(void));
+#define pthread_join(thread, res) dav1d_pthread_join(&(thread), res)
 
 static int pthread_attr_init(pthread_attr_t *const attr) {
     attr->stack_size = 0;
--- a/tests/checkasm/cdef.c
+++ b/tests/checkasm/cdef.c
@@ -27,8 +27,10 @@
 
 #include "tests/checkasm/checkasm.h"
 
+#ifndef __plan9__
 #include <string.h>
 #include <stdio.h>
+#endif
 
 #include "common/dump.h"
 
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -26,14 +26,36 @@
  */
 #include "tests/checkasm/checkasm.h"
 
+#ifndef __plan9__
 #include <math.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <string.h>
+#endif
 
 #include "src/cpu.h"
 
-#ifdef _WIN32
+#ifdef __plan9__
+#define fabsf fabs
+#define exit(x) exits(x ? "error" : nil)
+#define isatty(x) 0
+#define SIGILL 0
+#define SIGFPE 0
+#define SIG_DFL 0
+#define SIGBUS 0
+#define SIGSEGV 0
+#define COLOR_RED    1
+#define COLOR_GREEN  2
+#define COLOR_YELLOW 3
+static unsigned get_seed(void) {
+	return time(0);
+}
+void checkasm_simd_warmup(void);
+void checkasm_warmup_avx2(void) {}
+void checkasm_warmup_avx512(void) {}
+static void signal(int x, void(*h)(int)){USED(x); USED(h); }
+
+#elif defined(_WIN32)
 #include <windows.h>
 #define COLOR_RED    FOREGROUND_RED
 #define COLOR_GREEN  FOREGROUND_GREEN
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -28,6 +28,8 @@
 #ifndef DAV1D_TESTS_CHECKASM_CHECKASM_H
 #define DAV1D_TESTS_CHECKASM_CHECKASM_H
 
+#pragma lib "libdav1d.a"
+
 #include "config.h"
 
 #include <stdint.h>
@@ -41,7 +43,9 @@
 #define checkasm_save_context() RtlCaptureContext(&checkasm_context_buf)
 #define checkasm_load_context() RtlRestoreContext(&checkasm_context_buf, NULL)
 #else
+#ifndef __plan9__
 #include <setjmp.h>
+#endif
 #define checkasm_context jmp_buf
 #define checkasm_save_context() setjmp(checkasm_context_buf)
 #define checkasm_load_context() longjmp(checkasm_context_buf, 1)
--- a/tests/checkasm/ipred.c
+++ b/tests/checkasm/ipred.c
@@ -29,7 +29,9 @@
 #include "src/ipred.h"
 #include "src/levels.h"
 
+#ifndef __plan9__
 #include <stdio.h>
+#endif
 
 static const char *const intra_pred_mode_names[N_IMPL_INTRA_PRED_MODES] = {
     [DC_PRED]       = "dc",
--- a/tests/checkasm/itx.c
+++ b/tests/checkasm/itx.c
@@ -27,7 +27,9 @@
 
 #include "tests/checkasm/checkasm.h"
 
+#ifndef __plan9__
 #include <math.h>
+#endif
 
 #include "src/itx.h"
 #include "src/levels.h"
--- /dev/null
+++ b/tests/checkasm/mkfile
@@ -1,0 +1,28 @@
+</$objtype/mkfile
+
+CFLAGS=$CFLAGS -I../../src/plan9 -I../.. -I../../include -I../../include/dav1d -p -D__plan9__ -DBITDEPTH=8
+
+HFILES=\
+
+OFILES=\
+	cdef.$O\
+	cdef16.$O\
+	checkasm.$O\
+	filmgrain.$O\
+	filmgrain16.$O\
+	ipred.$O\
+	ipred16.$O\
+	itx.$O\
+	itx16.$O\
+	loopfilter.$O\
+	loopfilter16.$O\
+	looprestoration.$O\
+	looprestoration16.$O\
+	mc.$O\
+	mc16.$O\
+	msac.$O\
+
+</sys/src/cmd/mkone
+
+%16.$O: %.c
+	$CC $CFLAGS '-DBITDEPTH=16' -o $target $prereq
--- a/tests/checkasm/msac.c
+++ b/tests/checkasm/msac.c
@@ -30,8 +30,10 @@
 #include "src/cpu.h"
 #include "src/msac.h"
 
+#ifndef __plan9__
 #include <stdio.h>
 #include <string.h>
+#endif
 
 #define BUF_SIZE 8192
 
--- a/tools/dav1d.c
+++ b/tools/dav1d.c
@@ -220,6 +220,7 @@
         nspf = (uint64_t)(1000000000.0 / cli_settings.realtime_fps);
     }
     tfirst = get_time_nanos();
+	dav1d_debug(c);
 
     do {
         memset(&p, 0, sizeof(p));