shithub: pdffs

ref: 17128cefa8384e9433de8a725686b4e544a83308
dir: /object.c/

View raw version
#include <u.h>
#include <libc.h>
#include <ctype.h>
#include <bio.h>
#include "pdf.h"

Object *pdfstring(Biobuf *b);
Object *pdfname(Biobuf *b);
Object *pdfarray(Pdf *pdf, Biobuf *b);
Object *pdfdict(Pdf *pdf, Biobuf *b);

/* General function to parse an object of any type. */
Object *
pdfobj(Pdf *pdf, Biobuf *b)
{
	Object *o, *o2;
	vlong off;
	int c, tf;
	Xref xref;
	char s[16];

	o = o2 = nil;
	do; while(isws(c = Bgetc(b)));
	if(c < 0)
		goto err;

	switch(c){
	case '<': /* dictionary or a string */
		c = Bgetc(b);
		if(c == '<'){
			Bseek(b, -2, 1);
			if((o = pdfdict(pdf, b)) != nil){
				/* check for attached stream */
				off = Boffset(b);
				do; while(isws(Bgetc(b)));
				Bungetc(b);
				if(Bread(b, s, 7) == 7 && memcmp(s, "stream", 6) == 0 && isws(c = s[6])){
					/* there IS a stream */
					if(c == '\r' && (c = Bgetc(b)) < 0)
						goto err;
					if(c != '\n'){
						werrstr("stream has no newline after dict");
						goto err;
					}
					o->stream.off = Boffset(b);
					o->type = Ostream;
					o->stream.len = dictint(o, "Length");
					return o;
				}
				Bseek(b, off, 0);
				return o;
			}
		}
		Bungetc(b);
		/* fall through */

	case '(':
		Bungetc(b);
		if((o = pdfstring(b)) != nil)
			o->pdf = pdf;
		return o;

	case '/':
		Bungetc(b);
		if((o = pdfname(b)) != nil)
			o->pdf = pdf;
		return o;

	case '[':
		Bungetc(b);
		if((o = pdfarray(pdf, b)) != nil)
			o->pdf = pdf;
		return o;

	case 'n':
		off = Boffset(b);
		if(Bgetc(b) == 'u' && Bgetc(b) == 'l' && Bgetc(b) == 'l' && (isws(c = Bgetc(b)) || isdelim(c))){
			Bungetc(b);
			return &null;
		}
		Bseek(b, off, 0);
		c = 'f';
		goto unexpected;

	case 't':
		off = Boffset(b);
		tf = 1;
		if(Bgetc(b) == 'r' && Bgetc(b) == 'u' && Bgetc(b) == 'e' && (isws(c = Bgetc(b)) || isdelim(c)))
			goto bool;
		Bseek(b, off, 0);
		c = 't';
		goto unexpected;

	case 'f':
		off = Boffset(b);
		tf = 0;
		if(Bgetc(b) == 'a' && Bgetc(b) == 'l' && Bgetc(b) == 's' && Bgetc(b) == 'e' && (isws(c = Bgetc(b)) || isdelim(c)))
			goto bool;
		Bseek(b, off, 0);
		c = 'f';
		goto unexpected;
bool:
		Bungetc(b);
		if((o = calloc(1, sizeof(*o))) == nil)
			goto err;
		o->type = Obool;
		o->pdf = pdf;
		o->bool = tf;
		return o;

	default:
		if(!isdigit(c)){
unexpected:
			Bungetc(b);
			werrstr("unexpected char '%c'", c);
			goto err;
		}
		 /* it could be a number or an indirect object */
		Bungetc(b);
		if((o = calloc(1, sizeof(*o))) == nil)
			goto err;
		o->pdf = pdf;
		Bgetd(b, &o->num); /* get the first number */
		off = Boffset(b); /* seek here if not an indirect object later */

		if((o2 = pdfobj(pdf, b)) != nil && o2->type == Onum){ /* second object is number too */
			do; while(isws(c = Bgetc(b)));
			if(c < 0)
				goto err;
			if(c == 'R'){ /* indirect object */
				o->type = Oindir;
				o->indir.id = o->num;
				o->indir.gen = o2->num;
				pdfobjfree(o2);
				return o;
			}
			if(c == 'o' && Bgetc(b) == 'b' && Bgetc(b) == 'j'){ /* object */
				xref.id = o->num;
				xref.gen = o2->num;
				/* FIXME put into a map */
				pdfobjfree(o);
				pdfobjfree(o2);
				if((o = pdfobj(pdf, b)) != nil)
					return o;
				o2 = nil;
			}
		}

		/* just a number, go back and return it */
		o->type = Onum;
		if(Bseek(b, off, 0) != off){
			werrstr("seek failed");
			goto err;
		}
		return o;
	}

err:
	werrstr("object: %r");
	pdfobjfree(o);
	pdfobjfree(o2);
	return nil;
}

void
pdfobjfree(Object *o)
{
	int i;

	if(o == nil || --o->ref >= 0)
		return;

	switch(o->type){
	case Onull:
		return;

	case Ostr:
	case Oname:
		free(o->str);
		break;

	case Obool:
	case Onum:
		break;

	case Oarray:
		for(i = 0; i < o->array.ne; i++)
			pdfobjfree(o->array.e[i]);
		free(o->array.e);
		break;

	case Odict:
	case Ostream:
		for(i = 0; i < o->dict.nkv; i++){
			free(o->dict.kv[i].key);
			pdfobjfree(o->dict.kv[i].value);
		}
		free(o->dict.kv);
		break;

	case Oindir:
		break;
	}

	free(o);
}

Object *
pdfref(Object *o)
{
	o->ref++;
	return o;
}