shithub: femtolisp

Download patch

ref: 40cff81550d8ba5692868723c2c336916f768057
parent: 2cf5187ca998708d1b55f13cc9b4c22a5362f8d0
author: JeffBezanson <[email protected]>
date: Mon Mar 2 22:16:30 EST 2009

more reorganization of LLT
adding io.readuntil, io.readline
improvements to ios_readprep


--- a/femtolisp/builtins.c
+++ b/femtolisp/builtins.c
@@ -14,6 +14,7 @@
 #include <errno.h>
 #include "llt.h"
 #include "flisp.h"
+#include "random.h"
 
 size_t llength(value_t v)
 {
--- a/femtolisp/cvalues.c
+++ b/femtolisp/cvalues.c
@@ -838,7 +838,7 @@
     // directly-callable values are assumed not to move for
     // evaluator performance, so put builtin func metadata on the
     // unmanaged heap
-    cvalue_t *buf = malloc_aligned(nw * sizeof(value_t), 8);
+    cvalue_t *buf = malloc(nw * sizeof(value_t));
     memcpy(buf, ptr(gf), nw*sizeof(value_t));
     return tagptr(buf, TAG_BUILTIN);
     */
--- a/femtolisp/flisp.c
+++ b/femtolisp/flisp.c
@@ -202,8 +202,8 @@
     symbol_t *sym;
     size_t len = strlen(str);
 
-    sym = (symbol_t*)malloc_aligned(sizeof(symbol_t)-sizeof(void*) + len + 1,
-                                    8);
+    sym = (symbol_t*)malloc(sizeof(symbol_t)-sizeof(void*) + len + 1);
+    assert(((uptrint_t)sym & 0x7) == 0); // make sure malloc aligns 8
     sym->left = sym->right = NULL;
     if (str[0] == ':') {
         value_t s = tagptr(sym, TAG_SYM);
@@ -502,7 +502,7 @@
     // more space to fill next time. if we grew tospace last time,
     // grow the other half of the heap this time to catch up.
     if (grew || ((lim-curheap) < (int)(heapsize/5)) || mustgrow) {
-        temp = realloc_aligned(tospace, grew ? heapsize : heapsize*2, 16);
+        temp = realloc(tospace, grew ? heapsize : heapsize*2);
         if (temp == NULL)
             raise(memory_exception_value);
         tospace = temp;
@@ -1442,8 +1442,8 @@
 
     llt_init();
 
-    fromspace = malloc_aligned(heapsize, 16);
-    tospace   = malloc_aligned(heapsize, 16);
+    fromspace = malloc(heapsize);
+    tospace   = malloc(heapsize);
     curheap = fromspace;
     lim = curheap+heapsize-sizeof(cons_t);
     consflags = bitvector_new(heapsize/sizeof(cons_t), 1);
--- a/femtolisp/iostream.c
+++ b/femtolisp/iostream.c
@@ -239,6 +239,30 @@
     return FL_T;
 }
 
+value_t fl_ioreaduntil(value_t *args, u_int32_t nargs)
+{
+    argcount("io.readuntil", nargs, 2);
+    value_t str = cvalue_string(80);
+    cvalue_t *cv = (cvalue_t*)ptr(str);
+    char *data = cv_data(cv);
+    ios_t dest;
+    ios_mem(&dest, 0);
+    ios_setbuf(&dest, data, 80, 0);
+    char delim = (char)toulong(args[1], "io.readuntil");
+    ios_t *src = toiostream(args[0], "io.readuntil");
+    size_t n = ios_copyuntil(&dest, src, delim);
+    cv->len = n;
+    if (dest.buf != data) {
+        // outgrew initial space
+        cv->data = dest.buf;
+        cv_autorelease(cv);
+    }
+    ((char*)cv->data)[n] = '\0';
+    if (n == 0 && ios_eof(src))
+        return FL_F;
+    return str;
+}
+
 static builtinspec_t iostreamfunc_info[] = {
     { "iostream?", fl_iostreamp },
     { "dump", fl_dump },
@@ -254,6 +278,7 @@
     { "io.discardbuffer", fl_iopurge },
     { "io.read", fl_ioread },
     { "io.write", fl_iowrite },
+    { "io.readuntil", fl_ioreaduntil },
     { NULL, NULL }
 };
 
--- a/femtolisp/system.lsp
+++ b/femtolisp/system.lsp
@@ -560,6 +560,8 @@
 		(trim-start s at-start 0 L)
 		(trim-end   s at-end   L))))
 
+(define (io.readline s) (io.readuntil s #byte(0xA)))
+
 (define (repl)
   (define (prompt)
     (princ "> ") (io.flush *output-stream*)
--- a/femtolisp/todo
+++ b/femtolisp/todo
@@ -858,7 +858,8 @@
 *io.write     - (io.write s cvalue)
 *io.read      - (io.read s ctype [len])
  io.getc      - get utf8 character(s)
- io.readline
+*io.readline
+*io.readuntil
  io.copy      - (io.copy to from [nbytes])
  io.copyuntil - (io.copy to from byte)
  io.pos       - (io.pos s [set-pos])
--- a/llt/Makefile
+++ b/llt/Makefile
@@ -1,8 +1,9 @@
 CC = gcc
 
-SRCS = bitvector.c hashing.c socket.c timefuncs.c utils.c dblprint.c ptrhash.c \
+SRCS = bitvector.c hashing.c socket.c timefuncs.c dblprint.c ptrhash.c \
 	utf8.c ios.c operators.c cplxprint.c dirpath.c htable.c \
-	bitvector-ops.c fp.c int2str.c dump.c
+	bitvector-ops.c fp.c int2str.c dump.c random.c bswap.c memalign.c \
+	swapreverse.c
 OBJS = $(SRCS:%.c=%.o)
 DOBJS = $(SRCS:%.c=%.do)
 TARGET = libllt.a
--- /dev/null
+++ b/llt/bswap.c
@@ -1,0 +1,87 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stddef.h>
+#include <alloca.h>
+#include "dtypes.h"
+#include "utils.h"
+
+void bswap_buffer(byte_t *data, size_t sz, size_t npts)
+{
+    size_t i, b;
+    byte_t *el;
+    byte_t temp;
+
+    if (sz <= 1)
+        return;
+
+    switch (sz) {
+    case 8:
+        for(i=0; i < npts; i++) {
+            ((u_int64_t*)data)[i] = bswap_64(((u_int64_t*)data)[i]);
+        }
+        break;
+    case 4:
+        for(i=0; i < npts; i++) {
+            ((u_int32_t*)data)[i] = bswap_32(((u_int32_t*)data)[i]);
+        }
+        break;
+    case 2:
+        for(i=0; i < npts; i++) {
+            ((u_int16_t*)data)[i] = bswap_16(((u_int16_t*)data)[i]);
+        }
+        break;
+    default:
+        for(i=0; i < sz * npts; i += sz) {
+            el = data + i;
+            for(b=0; b < sz/2; b++) {
+                temp = el[b];
+                el[b] = el[sz-b-1];
+                el[sz-b-1] = temp;
+            }
+        }
+    }
+}
+
+void bswap(byte_t *s, size_t n)
+{
+    unsigned int i;
+    char temp;
+
+    switch (n) {
+    case 8:
+        *(u_int64_t*)s = bswap_64(*(u_int64_t*)s); break;
+    case 4:
+        *(u_int32_t*)s = bswap_32(*(u_int32_t*)s); break;
+    case 2:
+        *(u_int16_t*)s = bswap_16(*(u_int16_t*)s); break;
+    case 1:
+        break;
+    default:
+        for(i=0; i < n/2; i++) {
+            temp = s[i];
+            s[i] = s[n-i-1];
+            s[n-i-1] = temp;
+        }
+    }
+}
+
+void bswap_to(byte_t *dest, byte_t *src, size_t n)
+{
+    unsigned int i;
+
+    switch (n) {
+    case 8:
+        *(u_int64_t*)dest = bswap_64(*(u_int64_t*)src); break;
+    case 4:
+        *(u_int32_t*)dest = bswap_32(*(u_int32_t*)src); break;
+    case 2:
+        *(u_int16_t*)dest = bswap_16(*(u_int16_t*)src); break;
+    case 1:
+        break;
+    default:
+        for(i=0; i < n; i++) {
+            dest[i] = src[n-i-1];
+        }
+    }
+}
+
--- a/llt/fp.c
+++ b/llt/fp.c
@@ -86,25 +86,3 @@
         return 1;
     return 0;
 }
-
-double randn()
-{
-    double s, vre, vim, ure, uim;
-    static double next = -42;
-
-    if (next != -42) {
-        s = next;
-        next = -42;
-        return s;
-    }
-    do {
-        ure = rand_double();
-        uim = rand_double();
-        vre = 2*ure - 1;
-        vim = 2*uim - 1;
-        s = vre*vre + vim*vim;
-    } while (s >= 1);
-    s = sqrt(-2*log(s)/s);
-    next = s * vre;
-    return s * vim;
-}
--- a/llt/hashing.c
+++ b/llt/hashing.c
@@ -1,5 +1,5 @@
 /*
-  Hashing and random numbers
+  Hashing
 */
 #include <stdlib.h>
 #include <stdio.h>
@@ -10,6 +10,7 @@
 #include "hashing.h"
 #include "timefuncs.h"
 #include "ios.h"
+#include "random.h"
 
 uint_t nextipow2(uint_t i)
 {
@@ -74,35 +75,6 @@
 
     hashlittle2(buf, n, &c, &b);
     return c;
-}
-
-#include "mt19937ar.c"
-
-double rand_double()
-{
-    union ieee754_double d;
-
-    d.ieee.mantissa0 = random();
-    d.ieee.mantissa1 = random();
-    d.ieee.negative = 0;
-    d.ieee.exponent = IEEE754_DOUBLE_BIAS + 0;    /* 2^0 */
-    return d.d - 1.0;
-}
-
-float rand_float()
-{
-    union ieee754_float f;
-
-    f.ieee.mantissa = random();
-    f.ieee.negative = 0;
-    f.ieee.exponent = IEEE754_FLOAT_BIAS + 0;     /* 2^0 */
-    return f.f - 1.0;
-}
-
-void randomize()
-{
-    u_int64_t tm = i64time();
-    init_by_array((unsigned long*)&tm, 2);
 }
 
 double D_PNAN;
--- a/llt/hashing.h
+++ b/llt/hashing.h
@@ -12,14 +12,5 @@
 #endif
 u_int64_t memhash(char* buf, size_t n);
 u_int32_t memhash32(char* buf, size_t n);
-#define random() genrand_int32()
-#define srandom(n) init_genrand(n)
-double rand_double();
-float rand_float();
-double randn();
-u_int64_t i64time();
-void randomize();
-unsigned long genrand_int32();
-void init_genrand(unsigned long s);
 
 #endif
--- a/llt/ios.c
+++ b/llt/ios.c
@@ -309,8 +309,17 @@
     if (space >= n || s->bm == bm_mem || s->fd == -1)
         return space;
     if (s->maxsize < s->bpos+n) {
-        if (_buf_realloc(s, s->maxsize + n)==NULL)
-            return space;
+        // it won't fit. grow buffer or move data back.
+        if (n <= s->maxsize && space <= ((s->maxsize)>>5)) {
+            if (space)
+                memmove(s->buf, s->buf+s->bpos, space);
+            s->size -= s->bpos;
+            s->bpos = 0;
+        }
+        else {
+            if (_buf_realloc(s, s->bpos + n)==NULL)
+                return space;
+        }
     }
     size_t got;
     int result = _os_read(s->fd, s->buf+s->size, s->maxsize - s->size, &got);
@@ -617,6 +626,34 @@
 int ios_copyall(ios_t *to, ios_t *from)
 {
     return ios_copy_(to, from, 0, 1);
+}
+
+#define LINE_CHUNK_SIZE 160
+
+size_t ios_copyuntil(ios_t *to, ios_t *from, char delim)
+{
+    size_t total = 0, avail;
+    if (!ios_eof(from)) {
+        do {
+            avail = ios_readprep(from, LINE_CHUNK_SIZE);
+            size_t written;
+            char *pd = (char*)memchr(from->buf+from->bpos, delim, avail);
+            if (pd == NULL) {
+                written = ios_write(to, from->buf+from->bpos, avail);
+                from->bpos += avail;
+                total += written;
+            }
+            else {
+                size_t ntowrite = pd - (from->buf+from->bpos) + 1;
+                written = ios_write(to, from->buf+from->bpos, ntowrite);
+                from->bpos += ntowrite;
+                total += written;
+                return total;
+            }
+        } while (!ios_eof(from) && avail >= LINE_CHUNK_SIZE);
+    }
+    from->_eof = 1;
+    return total;
 }
 
 static void _ios_init(ios_t *s)
--- a/llt/ios.h
+++ b/llt/ios.h
@@ -80,6 +80,7 @@
 void ios_bswap(ios_t *s, int bswap);
 int ios_copy(ios_t *to, ios_t *from, size_t nbytes);
 int ios_copyall(ios_t *to, ios_t *from);
+size_t ios_copyuntil(ios_t *to, ios_t *from, char delim);
 // ensure at least n bytes are buffered if possible. returns # available.
 size_t ios_readprep(ios_t *from, size_t n);
 //void ios_lock(ios_t *s);
@@ -112,7 +113,6 @@
 int ios_ungetutf8(ios_t *s, uint32_t wc);
 int ios_getstringz(ios_t *dest, ios_t *src);
 int ios_getstringn(ios_t *dest, ios_t *src, size_t nchars);
-int ios_readline(ios_t *dest, ios_t *s, char delim);
 int ios_getline(ios_t *s, char **pbuf, size_t *psz);
 
 // discard data buffered for reading
--- /dev/null
+++ b/llt/memalign.c
@@ -1,0 +1,50 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stddef.h>
+#include <alloca.h>
+#include "dtypes.h"
+#include "utils.h"
+
+#define ALIGNED_TO_ACTUAL(p) (((char*)p) - ((long*)p)[-1])
+
+static void *aligned_ptr(char *ptr, size_t align_size)
+{
+    char *ptr2, *aligned_ptr;
+
+    ptr2 = ptr + sizeof(long);
+    aligned_ptr = (char*)ALIGN(((uptrint_t)ptr2), align_size);
+
+    ((long*)aligned_ptr)[-1] = (long)(aligned_ptr - ptr);
+
+    return aligned_ptr;
+}
+
+/* align_size has to be a power of two */
+void *malloc_aligned(size_t size, size_t align_size)
+{
+    char *ptr;
+
+    ptr = (char*)malloc(size + align_size-1 + sizeof(long));
+    if (ptr == NULL)
+        return NULL;
+
+    return aligned_ptr(ptr, align_size);
+}
+
+void free_aligned(void *ptr)
+{
+    free(ALIGNED_TO_ACTUAL(ptr));
+}
+
+void *realloc_aligned(void *ptr, size_t size, size_t align_size)
+{
+    char *pnew;
+
+    if (ptr != NULL)
+        ptr = ALIGNED_TO_ACTUAL(ptr);
+    pnew = realloc(ptr, size + align_size-1 + sizeof(long));
+    if (pnew == NULL)
+        return NULL;
+
+    return aligned_ptr(pnew, align_size);
+}
--- /dev/null
+++ b/llt/random.c
@@ -1,0 +1,63 @@
+/*
+  random numbers
+*/
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include "ieee754.h"
+#include "dtypes.h"
+#include "utils.h"
+#include "random.h"
+#include "timefuncs.h"
+#include "ios.h"
+
+#include "mt19937ar.c"
+
+double rand_double()
+{
+    union ieee754_double d;
+
+    d.ieee.mantissa0 = random();
+    d.ieee.mantissa1 = random();
+    d.ieee.negative = 0;
+    d.ieee.exponent = IEEE754_DOUBLE_BIAS + 0;    /* 2^0 */
+    return d.d - 1.0;
+}
+
+float rand_float()
+{
+    union ieee754_float f;
+
+    f.ieee.mantissa = random();
+    f.ieee.negative = 0;
+    f.ieee.exponent = IEEE754_FLOAT_BIAS + 0;     /* 2^0 */
+    return f.f - 1.0;
+}
+
+double randn()
+{
+    double s, vre, vim, ure, uim;
+    static double next = -42;
+
+    if (next != -42) {
+        s = next;
+        next = -42;
+        return s;
+    }
+    do {
+        ure = rand_double();
+        uim = rand_double();
+        vre = 2*ure - 1;
+        vim = 2*uim - 1;
+        s = vre*vre + vim*vim;
+    } while (s >= 1);
+    s = sqrt(-2*log(s)/s);
+    next = s * vre;
+    return s * vim;
+}
+
+void randomize()
+{
+    u_int64_t tm = i64time();
+    init_by_array((unsigned long*)&tm, 2);
+}
--- /dev/null
+++ b/llt/random.h
@@ -1,0 +1,14 @@
+#ifndef __LLTRANDOM_H_
+#define __LLTRANDOM_H_
+
+#define random() genrand_int32()
+#define srandom(n) init_genrand(n)
+double rand_double();
+float rand_float();
+double randn();
+void randomize();
+unsigned long genrand_int32();
+void init_genrand(unsigned long s);
+u_int64_t i64time();
+
+#endif
--- /dev/null
+++ b/llt/swapreverse.c
@@ -1,0 +1,187 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stddef.h>
+#include <alloca.h>
+#include "dtypes.h"
+#include "utils.h"
+
+void memswap(char *a, char *b, size_t sz)
+{
+    int8_t i8;
+    int32_t i32;
+    int32_t *a4, *b4;
+
+    if (sz < 4) {
+        while (sz--) {
+            i8 = *a;
+            *a++ = *b;
+            *b++ = i8;
+        }
+    }
+    else {
+        while (sz & 0x3) {
+            i8 = *a;
+            *a++ = *b;
+            *b++ = i8;
+            sz--;
+        }
+        a4 = (int32_t*)a;
+        b4 = (int32_t*)b;
+        sz >>= 2;
+        while (sz--) {
+            i32 = *a4;
+            *a4++ = *b4;
+            *b4++ = i32;
+        }
+    }
+}
+
+void memreverse(char *a, size_t n, size_t elsz)
+{
+    int64_t i64, *pi64;
+    int32_t i32, *pi32;
+    int16_t i16, *pi16;
+    int8_t i8;
+    size_t i;
+    char *temp;
+    size_t eli, tot;
+
+    if (n==0 || elsz==0) return;
+    switch(elsz) {
+    case 16:
+        pi64 = (int64_t*)a;
+        for(i=0; i < n/2; i++) {
+            i64 = pi64[2*i];
+            pi64[2*i] = pi64[2*(n-i-1)];
+            pi64[2*(n-i-1)] = i64;
+
+            i64 = pi64[2*i+1];
+            pi64[2*i+1] = pi64[2*(n-i-1)+1];
+            pi64[2*(n-i-1)+1] = i64;
+        }
+        break;
+    case 8:
+        pi64 = (int64_t*)a;
+        for(i=0; i < n/2; i++) {
+            i64 = pi64[i];
+            pi64[i] = pi64[n-i-1];
+            pi64[n-i-1] = i64;
+        }
+        break;
+    case 4:
+        pi32 = (int32_t*)a;
+        for(i=0; i < n/2; i++) {
+            i32 = pi32[i];
+            pi32[i] = pi32[n-i-1];
+            pi32[n-i-1] = i32;
+        }
+        break;
+    case 2:
+        pi16 = (int16_t*)a;
+        for(i=0; i < n/2; i++) {
+            i16 = pi16[i];
+            pi16[i] = pi16[n-i-1];
+            pi16[n-i-1] = i16;
+        }
+        break;
+    case 1:
+        for(i=0; i < n/2; i++) {
+            i8 = a[i];
+            a[i] = a[n-i-1];
+            a[n-i-1] = i8;
+        }
+        break;
+    default:
+        tot = n*elsz;
+        if (elsz < 4097)
+            temp = alloca(elsz);
+        else
+            temp = malloc(elsz);
+
+        if (temp != NULL) {
+            for(i=0, eli=0; i < n/2; i++, eli+=elsz) {
+                memcpy(temp, &a[eli], elsz);
+                memcpy(&a[eli], &a[tot-eli-elsz], elsz);
+                memcpy(&a[tot-eli-elsz], temp, elsz);
+            }
+
+            if (elsz >= 4097)
+                free(temp);
+        }
+        break;
+    }
+}
+
+void memreverse_to(char *dest, char *a, size_t n, size_t elsz)
+{
+    int64_t *pi64, *di64;
+    int32_t *pi32, *di32;
+    int16_t *pi16, *di16;
+    size_t i;
+    size_t eli, tot;
+    if (n==0 || elsz==0) return;
+    switch(elsz) {
+    case 16:
+        pi64 = (int64_t*)a;
+        di64 = (int64_t*)dest;
+        for(i=0; i < n/2; i++) {
+            di64[2*i] = pi64[2*(n-i-1)];
+            di64[2*(n-i-1)] = pi64[2*i];
+
+            di64[2*i+1] = pi64[2*(n-i-1)+1];
+            di64[2*(n-i-1)+1] = pi64[2*i+1];
+        }
+        if (n&0x1) {
+            di64[2*i] = pi64[2*i];
+            di64[2*i+1] = pi64[2*i+1];
+        }
+        break;
+    case 8:
+        pi64 = (int64_t*)a;
+        di64 = (int64_t*)dest;
+        for(i=0; i < n/2; i++) {
+            di64[i] = pi64[n-i-1];
+            di64[n-i-1] = pi64[i];
+        }
+        if (n&0x1)
+            di64[i] = pi64[i];
+        break;
+    case 4:
+        pi32 = (int32_t*)a;
+        di32 = (int32_t*)dest;
+        for(i=0; i < n/2; i++) {
+            di32[i] = pi32[n-i-1];
+            di32[n-i-1] = pi32[i];
+        }
+        if (n&0x1)
+            di32[i] = pi32[i];
+        break;
+    case 2:
+        pi16 = (int16_t*)a;
+        di16 = (int16_t*)dest;
+        for(i=0; i < n/2; i++) {
+            di16[i] = pi16[n-i-1];
+            di16[n-i-1] = pi16[i];
+        }
+        if (n&0x1)
+            di16[i] = pi16[i];
+        break;
+    case 1:
+        for(i=0; i < n/2; i++) {
+            dest[i] = a[n-i-1];
+            dest[n-i-1] = a[i];
+        }
+        if (n&0x1)
+            dest[i] = a[i];
+        break;
+    default:
+        tot = n*elsz;
+        for(i=0, eli=0; i < n/2; i++, eli+=elsz) {
+            memcpy(&dest[eli], &a[tot - eli - elsz], elsz);
+            memcpy(&dest[tot - eli - elsz], &a[eli], elsz);
+        }
+        if (n&0x1)
+            memcpy(&dest[eli], &a[eli], elsz);
+        break;
+    }
+}
--- a/llt/utils.c
+++ /dev/null
@@ -1,311 +1,0 @@
-#include <stdlib.h>
-#include <string.h>
-#include <stddef.h>
-#include <alloca.h>
-#include "dtypes.h"
-#include "utils.h"
-
-void memswap(char *a, char *b, size_t sz)
-{
-    int8_t i8;
-    int32_t i32;
-    int32_t *a4, *b4;
-
-    if (sz < 4) {
-        while (sz--) {
-            i8 = *a;
-            *a++ = *b;
-            *b++ = i8;
-        }
-    }
-    else {
-        while (sz & 0x3) {
-            i8 = *a;
-            *a++ = *b;
-            *b++ = i8;
-            sz--;
-        }
-        a4 = (int32_t*)a;
-        b4 = (int32_t*)b;
-        sz >>= 2;
-        while (sz--) {
-            i32 = *a4;
-            *a4++ = *b4;
-            *b4++ = i32;
-        }
-    }
-}
-
-void memreverse(char *a, size_t n, size_t elsz)
-{
-    int64_t i64, *pi64;
-    int32_t i32, *pi32;
-    int16_t i16, *pi16;
-    int8_t i8;
-    size_t i;
-    char *temp;
-    size_t eli, tot;
-
-    if (n==0 || elsz==0) return;
-    switch(elsz) {
-    case 16:
-        pi64 = (int64_t*)a;
-        for(i=0; i < n/2; i++) {
-            i64 = pi64[2*i];
-            pi64[2*i] = pi64[2*(n-i-1)];
-            pi64[2*(n-i-1)] = i64;
-
-            i64 = pi64[2*i+1];
-            pi64[2*i+1] = pi64[2*(n-i-1)+1];
-            pi64[2*(n-i-1)+1] = i64;
-        }
-        break;
-    case 8:
-        pi64 = (int64_t*)a;
-        for(i=0; i < n/2; i++) {
-            i64 = pi64[i];
-            pi64[i] = pi64[n-i-1];
-            pi64[n-i-1] = i64;
-        }
-        break;
-    case 4:
-        pi32 = (int32_t*)a;
-        for(i=0; i < n/2; i++) {
-            i32 = pi32[i];
-            pi32[i] = pi32[n-i-1];
-            pi32[n-i-1] = i32;
-        }
-        break;
-    case 2:
-        pi16 = (int16_t*)a;
-        for(i=0; i < n/2; i++) {
-            i16 = pi16[i];
-            pi16[i] = pi16[n-i-1];
-            pi16[n-i-1] = i16;
-        }
-        break;
-    case 1:
-        for(i=0; i < n/2; i++) {
-            i8 = a[i];
-            a[i] = a[n-i-1];
-            a[n-i-1] = i8;
-        }
-        break;
-    default:
-        tot = n*elsz;
-        if (elsz < 4097)
-            temp = alloca(elsz);
-        else
-            temp = malloc(elsz);
-
-        if (temp != NULL) {
-            for(i=0, eli=0; i < n/2; i++, eli+=elsz) {
-                memcpy(temp, &a[eli], elsz);
-                memcpy(&a[eli], &a[tot-eli-elsz], elsz);
-                memcpy(&a[tot-eli-elsz], temp, elsz);
-            }
-
-            if (elsz >= 4097)
-                free(temp);
-        }
-        break;
-    }
-}
-
-void memreverse_to(char *dest, char *a, size_t n, size_t elsz)
-{
-    int64_t *pi64, *di64;
-    int32_t *pi32, *di32;
-    int16_t *pi16, *di16;
-    size_t i;
-    size_t eli, tot;
-    if (n==0 || elsz==0) return;
-    switch(elsz) {
-    case 16:
-        pi64 = (int64_t*)a;
-        di64 = (int64_t*)dest;
-        for(i=0; i < n/2; i++) {
-            di64[2*i] = pi64[2*(n-i-1)];
-            di64[2*(n-i-1)] = pi64[2*i];
-
-            di64[2*i+1] = pi64[2*(n-i-1)+1];
-            di64[2*(n-i-1)+1] = pi64[2*i+1];
-        }
-        if (n&0x1) {
-            di64[2*i] = pi64[2*i];
-            di64[2*i+1] = pi64[2*i+1];
-        }
-        break;
-    case 8:
-        pi64 = (int64_t*)a;
-        di64 = (int64_t*)dest;
-        for(i=0; i < n/2; i++) {
-            di64[i] = pi64[n-i-1];
-            di64[n-i-1] = pi64[i];
-        }
-        if (n&0x1)
-            di64[i] = pi64[i];
-        break;
-    case 4:
-        pi32 = (int32_t*)a;
-        di32 = (int32_t*)dest;
-        for(i=0; i < n/2; i++) {
-            di32[i] = pi32[n-i-1];
-            di32[n-i-1] = pi32[i];
-        }
-        if (n&0x1)
-            di32[i] = pi32[i];
-        break;
-    case 2:
-        pi16 = (int16_t*)a;
-        di16 = (int16_t*)dest;
-        for(i=0; i < n/2; i++) {
-            di16[i] = pi16[n-i-1];
-            di16[n-i-1] = pi16[i];
-        }
-        if (n&0x1)
-            di16[i] = pi16[i];
-        break;
-    case 1:
-        for(i=0; i < n/2; i++) {
-            dest[i] = a[n-i-1];
-            dest[n-i-1] = a[i];
-        }
-        if (n&0x1)
-            dest[i] = a[i];
-        break;
-    default:
-        tot = n*elsz;
-        for(i=0, eli=0; i < n/2; i++, eli+=elsz) {
-            memcpy(&dest[eli], &a[tot - eli - elsz], elsz);
-            memcpy(&dest[tot - eli - elsz], &a[eli], elsz);
-        }
-        if (n&0x1)
-            memcpy(&dest[eli], &a[eli], elsz);
-        break;
-    }
-}
-
-void bswap_buffer(byte_t *data, size_t sz, size_t npts)
-{
-    size_t i, b;
-    byte_t *el;
-    byte_t temp;
-
-    if (sz <= 1)
-        return;
-
-    switch (sz) {
-    case 8:
-        for(i=0; i < npts; i++) {
-            ((u_int64_t*)data)[i] = bswap_64(((u_int64_t*)data)[i]);
-        }
-        break;
-    case 4:
-        for(i=0; i < npts; i++) {
-            ((u_int32_t*)data)[i] = bswap_32(((u_int32_t*)data)[i]);
-        }
-        break;
-    case 2:
-        for(i=0; i < npts; i++) {
-            ((u_int16_t*)data)[i] = bswap_16(((u_int16_t*)data)[i]);
-        }
-        break;
-    default:
-        for(i=0; i < sz * npts; i += sz) {
-            el = data + i;
-            for(b=0; b < sz/2; b++) {
-                temp = el[b];
-                el[b] = el[sz-b-1];
-                el[sz-b-1] = temp;
-            }
-        }
-    }
-}
-
-void bswap(byte_t *s, size_t n)
-{
-    unsigned int i;
-    char temp;
-
-    switch (n) {
-    case 8:
-        *(u_int64_t*)s = bswap_64(*(u_int64_t*)s); break;
-    case 4:
-        *(u_int32_t*)s = bswap_32(*(u_int32_t*)s); break;
-    case 2:
-        *(u_int16_t*)s = bswap_16(*(u_int16_t*)s); break;
-    case 1:
-        break;
-    default:
-        for(i=0; i < n/2; i++) {
-            temp = s[i];
-            s[i] = s[n-i-1];
-            s[n-i-1] = temp;
-        }
-    }
-}
-
-void bswap_to(byte_t *dest, byte_t *src, size_t n)
-{
-    unsigned int i;
-
-    switch (n) {
-    case 8:
-        *(u_int64_t*)dest = bswap_64(*(u_int64_t*)src); break;
-    case 4:
-        *(u_int32_t*)dest = bswap_32(*(u_int32_t*)src); break;
-    case 2:
-        *(u_int16_t*)dest = bswap_16(*(u_int16_t*)src); break;
-    case 1:
-        break;
-    default:
-        for(i=0; i < n; i++) {
-            dest[i] = src[n-i-1];
-        }
-    }
-}
-
-#define ALIGNED_TO_ACTUAL(p) (((char*)p) - ((long*)p)[-1])
-
-static void *aligned_ptr(char *ptr, size_t align_size)
-{
-    char *ptr2, *aligned_ptr;
-
-    ptr2 = ptr + sizeof(long);
-    aligned_ptr = (char*)ALIGN(((uptrint_t)ptr2), align_size);
-
-    ((long*)aligned_ptr)[-1] = (long)(aligned_ptr - ptr);
-
-    return aligned_ptr;
-}
-
-/* align_size has to be a power of two */
-void *malloc_aligned(size_t size, size_t align_size)
-{
-    char *ptr;
-
-    ptr = (char*)malloc(size + align_size-1 + sizeof(long));
-    if (ptr == NULL)
-        return NULL;
-
-    return aligned_ptr(ptr, align_size);
-}
-
-void free_aligned(void *ptr)
-{
-    free(ALIGNED_TO_ACTUAL(ptr));
-}
-
-void *realloc_aligned(void *ptr, size_t size, size_t align_size)
-{
-    char *pnew;
-
-    if (ptr != NULL)
-        ptr = ALIGNED_TO_ACTUAL(ptr);
-    pnew = realloc(ptr, size + align_size-1 + sizeof(long));
-    if (pnew == NULL)
-        return NULL;
-
-    return aligned_ptr(pnew, align_size);
-}