shithub: pdffs

Download patch

ref: a9516693e7142a658f4e3ea190272f4cd73b24be
parent: ef6cdd0d40612067504d1587aa4e64206fe1ea8a
author: Sigrid Haflínudóttir <[email protected]>
date: Sat Aug 29 12:29:56 EDT 2020

add and use flate filter

--- /dev/null
+++ b/buffer.c
@@ -1,0 +1,121 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "pdf.h"
+
+static int
+bufgrow(Buffer *b, int sz)
+{
+	uchar *r;
+	int maxsz;
+
+	if(b->maxsz < 1){
+		if((b->b = mallocz(128, 1)) == nil)
+			return -1;
+		b->maxsz = 128;
+	}
+	for(maxsz = b->maxsz; b->sz+sz > maxsz; maxsz *= 2);
+	if(maxsz != b->maxsz){
+		if((r = realloc(b->b, maxsz)) == nil)
+			return -1;
+		memset(r+b->maxsz, 0, maxsz-b->maxsz);
+		b->b = r;
+		b->maxsz = maxsz;
+	}
+
+	return 0;
+}
+
+void
+bufinit(Buffer *b, uchar *d, int sz)
+{
+	memset(b, 0, sizeof(*b));
+	if(d != nil){
+		b->b = d;
+		b->sz = sz;
+		b->ro = 1;
+	}
+}
+
+void
+buffree(Buffer *b)
+{
+	if(b->ro == 0)
+		free(b->b);
+}
+
+int
+bufeof(Buffer *b)
+{
+	return b->off == b->sz;
+}
+
+uchar *
+bufdata(Buffer *b, int *sz)
+{
+	*sz = b->sz;
+	return b->b;
+}
+
+int
+bufreadn(Buffer *b, void *bio, int sz)
+{
+	int n, end;
+
+	if(bufgrow(b, sz) != 0)
+		return -1;
+	for(end = b->sz+sz; b->sz < end; b->sz += n){
+		if((n = Bread(bio, b->b+b->sz, sz)) < 1)
+			return -1;
+		sz -= n;
+	}
+	return 0;
+}
+
+int
+bufput(Buffer *b, uchar *d, int sz)
+{
+	if(b->ro)
+		sysfatal("bufferput on readonly buffer");
+	if(bufgrow(b, sz) != 0)
+		return -1;
+
+	memmove(b->b+b->sz, d, sz);
+	b->sz += sz;
+
+	return sz;
+}
+
+int
+bufget(Buffer *b, uchar *d, int sz)
+{
+	if(sz == 0)
+		return 0;
+
+	if(b->off > b->sz)
+		sysfatal("buffer: off(%d) > sz(%d)", b->off, b->sz);
+	if(sz > b->sz - b->off)
+		sz = b->sz - b->off;
+	memmove(d, b->b+b->off, sz);
+	b->off += sz;
+	b->eof = sz == 0;
+
+	return sz;
+}
+
+void
+bufdump(Buffer *b)
+{
+	Biobuf bio;
+	int i, j;
+
+	Binit(&bio, 2, OWRITE);
+	Bprint(&bio, "%d bytes:\n", b->sz);
+	for(i = 0; i < b->sz;){
+		Bprint(&bio, "%04x\t", i);
+		for(j = 0; i < b->sz && j < 16; j++, i++)
+			Bprint(&bio, "%02x%s", b->b[i], (j+1)&7 ? " " : "  ");
+		Bprint(&bio, "\n");
+	}
+	Bterm(&bio);
+}
--- a/dict.c
+++ b/dict.c
@@ -69,7 +69,7 @@
 {
 	int i;
 
-	if(o == nil || o->type != Odict || name == nil)
+	if(o == nil || (o->type != Ostream && o->type != Odict) || name == nil)
 		return nil;
 	for(i = 0; i < o->dict.nkv && strcmp(name, o->dict.kv[i].key) != 0; i++);
 
--- /dev/null
+++ b/f_flate.c
@@ -1,0 +1,38 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <flate.h>
+#include "pdf.h"
+
+static int
+bw(void *aux, void *d, int n)
+{
+	return bufput(aux, d, n);
+}
+
+static int
+bget(void *aux)
+{
+	uchar c;
+
+	return bufget(aux, &c, 1) == 1 ? c : -1;
+}
+
+int
+fFlate(void *aux, Buffer *bi, Buffer *bo)
+{
+	int r;
+
+	USED(aux);
+
+	do{
+		r = inflatezlib(bo, bw, bi, bget);
+	}while(r == FlateOk && !bufeof(bi));
+
+	if(r != FlateOk){
+		werrstr("%s", flateerr(r));
+		return -1;
+	}
+
+	return 0;
+}
--- a/filter.c
+++ b/filter.c
@@ -3,17 +3,72 @@
 #include <bio.h>
 #include "pdf.h"
 
-/*
-7.4
+/* 7.4 Filters */
 
-ASCIIHex
-ASCII85
-LZW
-Flate
-RunLength
-CCITTFax
-JBIG2
-DCT
-JPX
-Crypt
-*/
+struct Filter {
+	char *name;
+	int (*readall)(void *aux, Buffer *bi, Buffer *bo);
+	int (*open)(Filter *f, Object *o);
+	void (*close)(Filter *f);
+	void *aux;
+};
+
+int fFlate(void *aux, Buffer *bi, Buffer *bo);
+
+static Filter filters[] = {
+	{"ASCII85Decode", nil, nil, nil},
+	{"ASCIIHexDecode", nil, nil, nil},
+	{"CCITTFaxDecode", nil, nil, nil},
+	{"CryptDecode", nil, nil, nil},
+	{"DCTDecode", nil, nil, nil},
+	{"FlateDecode", fFlate},
+	{"JBIG2Decode", nil, nil, nil},
+	{"JPXDecode", nil, nil, nil},
+	{"LZWDecode", nil, nil, nil},
+	{"RunLengthDecode", nil, nil, nil},
+};
+
+Filter *
+filteropen(char *name, Object *o)
+{
+	int i;
+	Filter *f;
+
+	for(i = 0; i < nelem(filters) && strcmp(filters[i].name, name) != 0; i++);
+	if(i >= nelem(filters)){
+		werrstr("no such filter %q", name);
+		return nil;
+	}
+	if(filters[i].readall == nil){
+		werrstr("filter %q not implemented", name);
+		return nil;
+	}
+	if((f = malloc(sizeof(*f))) == nil)
+		return nil;
+	memmove(f, &filters[i], sizeof(*f));
+	if(f->open != nil && f->open(f, o) != 0){
+		free(f);
+		return nil;
+	}
+
+	return f;
+}
+
+int
+filterrun(Filter *f, Buffer *bi, Buffer *bo)
+{
+	if(f->readall(f->aux, bi, bo) != 0){
+		werrstr("filter[%s]: %r", f->name);
+		return -1;
+	}
+	fprint(2, "filter[%s]: %d → %d %d\n", f->name, bi->sz, bo->sz, bo->off);
+	return 0;
+}
+
+void
+filterclose(Filter *f)
+{
+	if(f->close != nil)
+		f->close(f);
+	free(f);
+}
--- a/main.c
+++ b/main.c
@@ -22,7 +22,7 @@
 	Object *v;
 
 	quotefmtinstall();
-	deflateinit();
+	inflateinit();
 
 	ARGBEGIN{
 	default:
--- a/mkfile
+++ b/mkfile
@@ -5,8 +5,10 @@
 
 OFILES=\
 	array.$O\
+	buffer.$O\
 	dict.$O\
 	eval.$O\
+	f_flate.$O\
 	filter.$O\
 	main.$O\
 	misc.$O\
@@ -14,6 +16,7 @@
 	object.$O\
 	pdf.$O\
 	pdfs.$O\
+	stream.$O\
 	string.$O\
 
 HFILES=\
--- a/object.c
+++ b/object.c
@@ -13,6 +13,31 @@
 	.type = Onull,
 };
 
+static char *otypes[] = {
+	[Obool] = "bool",
+	[Onum] = "num",
+	[Ostr] = "str",
+	[Oname] = "name",
+	[Oarray] = "array",
+	[Odict] = "dict",
+	[Ostream] = "stream",
+	[Onull] = "null",
+	[Oindir] = "indir",
+};
+
+int
+Tfmt(Fmt *f)
+{
+	Object *o;
+
+	o = va_arg(f->args, Object*);
+	if(o == nil || o == &null)
+		return fmtprint(f, "null");
+	if(o->type < 0 || o->type >= nelem(otypes))
+		return fmtprint(f, "????");
+	return fmtprint(f, "%s", otypes[o->type]);
+}
+
 /* General function to parse an object of any type. */
 Object *
 pdfobj(Pdf *pdf, void *b)
@@ -49,8 +74,8 @@
 						goto err;
 					}
 					o->type = Ostream;
-					o->stream.length = m->num;
-					o->stream.offset = Boffset(b);
+					o->stream.len = m->num;
+					o->stream.off = Boffset(b);
 					return o;
 				}
 				Bseek(b, off, 0);
--- a/pdf.c
+++ b/pdf.c
@@ -75,9 +75,7 @@
 static int
 trailerread(Pdf *pdf)
 {
-	int i;
 	Object *o;
-	KeyValue *kv;
 
 	if((o = pdfobj(pdf, pdf->bio)) == nil)
 		goto err;
@@ -87,12 +85,8 @@
 		goto err;
 	}
 
-	for(i = 0, kv = o->dict.kv; i < o->dict.nkv; i++, kv++){
-		if(strcmp(kv->key, "Root") == 0)
-			pdf->root = kv->value;
-		else if(strcmp(kv->key, "Info") == 0)
-			pdf->info = kv->value;
-	}
+	pdf->root = pdfdictget(o, "Root");
+	pdf->info = pdfdictget(o, "Info");
 	pdfobjfree(o);
 	o = nil;
 
@@ -101,8 +95,6 @@
 		werrstr("no root");
 		goto err;
 	}
-	if(pdfeval(pdf, pdf->root) != 0 || pdfeval(pdf, pdf->info) != 0)
-		goto err;
 
 	return 0;
 err:
@@ -121,7 +113,10 @@
 	int nxref; /* 7.5.4 xref subsection number of objects */
 	int xreftb; /* 7.5.4 xref table offset from the beginning of the file */
 	int i, n, off;
+	Stream *stream;
 
+	fmtinstall('T', Tfmt);
+
 	b = nil;
 	o = nil;
 	if((pdf = calloc(1, sizeof(*pdf))) == nil || (b = Bfdopen(fd, OREAD)) == nil)
@@ -196,10 +191,16 @@
 		}
 	}else if(isdigit(tmp[0])){ /* could be 7.5.8 xref stream (since PDF 1.5) */
 		Bseek(b, xreftb, 0);
-		if((o = pdfobj(pdf, b)) == nil || pdfeval(pdf, o) != 0)
-			goto err;
-		
+		if((o = pdfobj(pdf, b)) == nil || (stream = streamopen(pdf, o)) == nil)
+			goto badxref;
+		streamclose(stream);
+		pdf->root = pdfdictget(o, "Root");
+		pdf->info = pdfdictget(o, "Info");
 	}
+	if(pdfeval(pdf, pdf->root) != 0 || pdfeval(pdf, pdf->info) != 0)
+		goto err;
+	fprint(2, "root %T\n", pdf->root);
+	fprint(2, "info %T\n", pdf->info);
 
 	return pdf;
 err:
--- a/pdf.h
+++ b/pdf.h
@@ -10,12 +10,24 @@
 	Oindir,  /* 7.3.10 */
 };
 
+typedef struct Buffer Buffer;
+typedef struct Filter Filter;
 typedef struct KeyValue KeyValue;
 typedef struct Object Object;
 typedef struct Pdf Pdf;
-typedef struct Xref Xref;
 typedef struct Stream Stream;
+typedef struct Xref Xref;
+#pragma incomplete Filter
 
+struct Buffer {
+	uchar *b;
+	int ro;
+	int maxsz;
+	int sz;
+	int off;
+	int eof;
+};
+
 struct Object {
 	int type;
 	union {
@@ -45,8 +57,8 @@
 		struct {
 			KeyValue *kv;
 			int nkv;
-			u32int length; /* packed */
-			u32int offset;
+			u32int len; /* packed */
+			u32int off;
 		}stream;
 	};
 };
@@ -72,9 +84,8 @@
 };
 
 struct Stream {
-	Biobuf;
-	Object *o;
-	u32int offset;
+	Buffer buf;
+	void *bio;
 };
 
 Pdf *pdfopen(int fd);
@@ -96,3 +107,19 @@
 
 Stream *streamopen(Pdf *pdf, Object *o);
 void streamclose(Stream *s);
+
+Filter *filteropen(char *name, Object *o);
+int filterrun(Filter *f, Buffer *bi, Buffer *bo);
+void filterclose(Filter *f);
+
+void bufinit(Buffer *b, uchar *d, int sz);
+void buffree(Buffer *b);
+int bufeof(Buffer *b);
+uchar *bufdata(Buffer *b, int *sz);
+int bufreadn(Buffer *b, void *bio, int sz);
+int bufput(Buffer *b, uchar *d, int sz);
+int bufget(Buffer *b, uchar *d, int sz);
+void bufdump(Buffer *b);
+
+#pragma varargck type "T" Object *
+int Tfmt(Fmt *f);
--- /dev/null
+++ b/stream.c
@@ -1,0 +1,93 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "pdf.h"
+
+static int
+bufiof(Biobufhdr *b, void *data, long n)
+{
+	Stream *s;
+
+	s = (void*)((char*)b - sizeof(*s));
+
+	return bufget(&s->buf, data, n);
+}
+
+Stream *
+streamopen(Pdf *pdf, Object *o)
+{
+	Stream *s;
+	Buffer b, x;
+	Object *of, **flts;
+	Filter *f;
+	int i, nflts;
+
+	s = nil;
+	if(pdfeval(pdf, o) != 0 || o == nil || o->type != Ostream) /* FIXME open a string object as a stream as well? */
+		return nil;
+
+	bufinit(&b, nil, 0);
+	if(Bseek(pdf->bio, o->stream.off, 0) != o->stream.off)
+		return nil;
+	if(bufreadn(&b, pdf->bio, o->stream.len) < 0)
+		goto err;
+	bufdump(&b);
+
+	/* see if there are any filters */
+	if((of = pdfdictget(o, "Filter")) != nil){
+		if(pdfeval(pdf, of) != 0)
+			goto err;
+		if(of->type == Oname){ /* one filter */
+			flts = &of;
+			nflts = 1;
+		}else if(of->type == Oarray){ /* array of filters */
+			flts = of->array.e;
+			nflts = of->array.ne;
+		}else{
+			werrstr("filters type invalid (%T)", of);
+			goto err;
+		}
+
+		for(i = nflts-1; i >= 0; i--){
+			if(flts[i]->type != Oname){
+				werrstr("filter type invalid (%T)", flts[i]);
+				goto err;
+			}
+			if((f = filteropen(flts[i]->name, o)) == nil)
+				goto err;
+			bufinit(&x, nil, 0);
+			if(filterrun(f, &b, &x) != 0){
+				buffree(&x);
+				goto err;
+			}
+			buffree(&b);
+			b = x;
+		}
+	}
+
+	if((s = calloc(1, sizeof(*s)+sizeof(Biobuf))) == nil){
+		buffree(&b);
+		return nil;
+	}
+	s->bio = (uchar*)(s+1);
+	s->buf = b;
+	Binit(s->bio, Bfildes(pdf->bio), OREAD);
+	Biofn(s->bio, bufiof);
+
+	bufdump(&s->buf);
+
+	return s;
+err:
+	werrstr("stream: %r");
+	buffree(&b);
+	free(s);
+	return nil;
+}
+
+void
+streamclose(Stream *s)
+{
+	buffree(&s->buf);
+	Bterm(s->bio);
+	free(s);
+}