shithub: opus

--- a/libcelt/celt.c

+++ b/libcelt/celt.c

@@ -600,7 +600,7 @@

       curr = tf_res[i];

    if (LM!=0)

-      ec_enc_bits(enc, tf_select, 1);

+      ec_enc_bit_logp(enc, tf_select, 1);

    for (i=start;i<end;i++)

       tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];

    /*printf("%d %d ", isTransient, tf_select); for(i=0;i<end;i++)printf("%d ", tf_res[i]);printf("\n");*/

@@ -617,7 +617,7 @@

       curr = tf_res[i];

    if (LM!=0)

-      tf_select = ec_dec_bits(dec, 1);

+      tf_select = ec_dec_bit_logp(dec, 1);

    else

       tf_select = 0;

    for (i=start;i<end;i++)

@@ -1953,7 +1953,7 @@

    deemphasis(out_syn, pcm, N, C, st->mode->preemph, st->preemph_memD);

    st->loss_count = 0;

    RESTORE_STACK;

-   if (ec_dec_get_error(dec))

+   if (ec_dec_tell(dec,0) > 8*len || ec_dec_get_error(dec))

       return CELT_CORRUPTED_DATA;

    else

       return CELT_OK;

--- a/libcelt/entcode.h

+++ b/libcelt/entcode.h

@@ -34,28 +34,31 @@

 #if !defined(_entcode_H)

 # define _entcode_H (1)

 # include <limits.h>

+# include <stddef.h>

 # include "ecintrin.h"

-typedef celt_int32 ec_int32;

-typedef celt_uint32 ec_uint32;

+typedef celt_int32            ec_int32;

+typedef celt_uint32           ec_uint32;

+typedef size_t                ec_window;

 typedef struct ec_byte_buffer ec_byte_buffer;

-/*The number of bits to code at a time when coding bits directly.*/

-# define EC_UNIT_BITS  (8)

-/*The mask for the given bits.*/

-# define EC_UNIT_MASK  ((1U<<EC_UNIT_BITS)-1)

+/*This must be at least 32 bits.*/

+# define EC_WINDOW_SIZE ((int)sizeof(ec_window)*CHAR_BIT)

+/*The number of bits to use for the range-coded part of unsigned integers.*/

+# define EC_UINT_BITS   (8)

 /*Simple libogg1-style buffer.*/

 struct ec_byte_buffer{

   unsigned char *buf;

-  unsigned char *ptr;

-  unsigned char *end_ptr;

+  ec_uint32      offs;

+  ec_uint32      end_offs;

   ec_uint32      storage;

};

@@ -62,26 +65,25 @@

 /*Encoding functions.*/

 void ec_byte_writeinit_buffer(ec_byte_buffer *_b, unsigned char *_buf, ec_uint32 _size);

 void ec_byte_shrink(ec_byte_buffer *_b, ec_uint32 _size);

-int ec_byte_write1(ec_byte_buffer *_b,unsigned _value);

+int ec_byte_write(ec_byte_buffer *_b,unsigned _value);

 int ec_byte_write_at_end(ec_byte_buffer *_b,unsigned _value);

+int ec_byte_write_done(ec_byte_buffer *_b,int _start_bits_available,

+ unsigned _end_byte,int _end_bits_used);

 /*Decoding functions.*/

 void ec_byte_readinit(ec_byte_buffer *_b,unsigned char *_buf,ec_uint32 _bytes);

-unsigned char ec_byte_look_at_end(ec_byte_buffer *_b);

-int ec_byte_look4(ec_byte_buffer *_b,ec_uint32 *_val);

-void ec_byte_adv1(ec_byte_buffer *_b);

-void ec_byte_adv4(ec_byte_buffer *_b);

-int ec_byte_read1(ec_byte_buffer *_b);

+int ec_byte_read(ec_byte_buffer *_b);

+unsigned char ec_byte_read_from_end(ec_byte_buffer *_b);

 /*Shared functions.*/

 static inline void ec_byte_reset(ec_byte_buffer *_b){

-   _b->ptr=_b->buf;

+  _b->offs=_b->end_offs=0;

 static inline ec_uint32 ec_byte_bytes(ec_byte_buffer *_b){

-   return _b->ptr-_b->buf;

+  return _b->offs;

 static inline unsigned char *ec_byte_get_buffer(ec_byte_buffer *_b){

-   return _b->buf;

+  return _b->buf;

 int ec_ilog(ec_uint32 _v);

--- a/libcelt/entdec.c

+++ b/libcelt/entdec.c

@@ -39,50 +39,39 @@

 #include "arch.h"

 void ec_byte_readinit(ec_byte_buffer *_b,unsigned char *_buf,ec_uint32 _bytes){

-  _b->buf=_b->ptr=_buf;

+  _b->buf=_buf;

+  _b->offs=_b->end_offs=0;

   _b->storage=_bytes;

-  _b->end_ptr=_b->buf+_bytes-1;

-unsigned char ec_byte_look_at_end(ec_byte_buffer *_b){

-  celt_assert2 (_b->end_ptr >= _b->buf, "Trying to read raw bits before the beginning of the stream");

-  return *(_b->end_ptr--);

+int ec_byte_read(ec_byte_buffer *_b){

+  return _b->offs<_b->storage?_b->buf[_b->offs++]:0;

-void ec_byte_adv1(ec_byte_buffer *_b){

-  _b->ptr++;

+unsigned char ec_byte_read_from_end(ec_byte_buffer *_b){

+  return _b->end_offs<_b->storage?_b->buf[_b->storage-++(_b->end_offs)]:0;

-int ec_byte_read1(ec_byte_buffer *_b){

-  ptrdiff_t endbyte;

-  endbyte=_b->ptr-_b->buf;

-  if(endbyte>=_b->storage)return -1;

-  else return *(_b->ptr++);

-}

 ec_uint32 ec_dec_uint(ec_dec *_this,ec_uint32 _ft){

-  unsigned  ft;

-  unsigned  s;

-  int       ftb;

+  unsigned ft;

+  unsigned s;

+  int      ftb;

   /*In order to optimize EC_ILOG(), it is undefined for the value 0.*/

   celt_assert(_ft>1);

   _ft--;

   ftb=EC_ILOG(_ft);

-  if(ftb>EC_UNIT_BITS){

+  if(ftb>EC_UINT_BITS){

     ec_uint32 t;

-    ftb-=EC_UNIT_BITS;

+    ftb-=EC_UINT_BITS;

     ft=(unsigned)(_ft>>ftb)+1;

     s=ec_decode(_this,ft);

     ec_dec_update(_this,s,s+1,ft);

-    t=s;

-    t = t<<ftb|ec_dec_bits(_this,ftb&(1<<ftb)-1);

-    if (t>_ft)

-    {

-      _this->error |= 1;

-      t = _ft;

-    }

-    return t;

-  } else {

+    t=s<<ftb|ec_dec_bits(_this,ftb);

+    if(t<=_ft)return t;

+    _this->error=1;

+    return _ft;

+  }

+  else{

     _ft++;

     s=ec_decode(_this,(unsigned)_ft);

     ec_dec_update(_this,s,s+1,(unsigned)_ft);

@@ -90,7 +79,6 @@

-int ec_dec_get_error(ec_dec *_this)

-{

-  return _this->error || (ec_dec_tell(_this,0) > 8*_this->buf->storage);

+int ec_dec_get_error(ec_dec *_this){

+  return _this->error;

--- a/libcelt/entdec.h

+++ b/libcelt/entdec.h

@@ -31,6 +31,7 @@

 #if !defined(_entdec_H)

 # define _entdec_H (1)

+# include <limits.h>

 # include "entcode.h"

@@ -52,12 +53,13 @@

    ec_uint32       dif;

    /*Normalization factor.*/

    ec_uint32       nrm;

-   /*Byte that will be written at the end*/

-   unsigned char   end_byte;

-   /*Number of valid bits in end_byte*/

-   int             end_bits_left;

-   int             nb_end_bits;

-   /*Nonzero if an error occurred*/

+   /*Bits that were written at the end.*/

+   ec_window       end_window;

+   /*Number of valid bits in end_window.*/

+   int             nend_bits;

+   /*The total number of whole bits read.*/

+   int             nbits_total;

+   /*Nonzero if an error occurred.*/

    int             error;

};

@@ -101,7 +103,7 @@

   The bits must have been encoded with ec_enc_bits().

   No call to ec_dec_update() is necessary after this call.

   _ftb: The number of bits to extract.

-        This must be at least one, and no more than 32.

+        This must be between 0 and 25, inclusive.

   Return: The decoded bits.*/

 ec_uint32 ec_dec_bits(ec_dec *_this,unsigned _ftb);

 /*Extracts a raw unsigned integer with a non-power-of-2 range from the stream.

@@ -137,7 +139,7 @@

            rounding error is in the positive direction).*/

 ec_uint32 ec_dec_tell(ec_dec *_this,int _b);

-/*Returns a nonzero value if any error has been detected during decoding*/

+/*Return: A nonzero value if any error has been detected during decoding.*/

 int ec_dec_get_error(ec_dec *_this);

 #endif

--- a/libcelt/entenc.c

+++ b/libcelt/entenc.c

@@ -29,53 +29,60 @@

    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

*/

-#ifdef HAVE_CONFIG_H

-#include "config.h"

+#if defined(HAVE_CONFIG_H)

+# include "config.h"

 #endif

 #include "os_support.h"

 #include "entenc.h"

 #include "arch.h"

-void ec_byte_writeinit_buffer(ec_byte_buffer *_b, unsigned char *_buf, ec_uint32 _size){

-  _b->ptr=_b->buf=_buf;

-  _b->end_ptr=_b->buf+_size-1;

+void ec_byte_writeinit_buffer(ec_byte_buffer *_b,

+ unsigned char *_buf,ec_uint32 _size){

+  _b->buf=_buf;

+  _b->end_offs=_b->offs=0;

   _b->storage=_size;

-void ec_byte_shrink(ec_byte_buffer *_b, ec_uint32 _size){

-   int i;

-   int d;

-   int N;

-   d = _b->storage-_size;

-   N = _b->storage-(_b->end_ptr-_b->buf)-1;

-   /* Copy "raw bytes" */

-   _b->end_ptr=_b->buf+_size-1-N;

-   for (i=0;i<N;i++)

-      _b->end_ptr[i+1] = _b->end_ptr[i+1+d];

-   _b->storage=_size;

+void ec_byte_shrink(ec_byte_buffer *_b,ec_uint32 _size){

+  celt_assert(_b->offs+_b->end_offs<=_size);

+  CELT_MOVE(_b->buf+_size-_b->end_offs,

+   _b->buf+_b->storage-_b->end_offs,_b->end_offs);

+  _b->storage=_size;

-int ec_byte_write1(ec_byte_buffer *_b,unsigned _value){

-  ptrdiff_t endbyte;

-  endbyte=_b->ptr-_b->buf;

-  if(endbyte>=_b->storage){

-    return 1;

-  } else {

-    *(_b->ptr++)=(unsigned char)_value;

-    return 0;

-  }

+int ec_byte_write(ec_byte_buffer *_b,unsigned _value){

+  if(_b->offs+_b->end_offs>=_b->storage)return -1;

+  _b->buf[_b->offs++]=(unsigned char)_value;

+  return 0;

 int ec_byte_write_at_end(ec_byte_buffer *_b,unsigned _value){

-  if (_b->end_ptr < _b->ptr)

-  {

-    return 1;

-  } else {

-    *(_b->end_ptr--)=(unsigned char)_value;

-    return 0;

+  if(_b->offs+_b->end_offs>=_b->storage)return -1;

+  _b->buf[_b->storage-++(_b->end_offs)]=(unsigned char)_value;

+  return 0;

+}

+int ec_byte_write_done(ec_byte_buffer *_b,int _start_bits_available,

+ unsigned _end_byte,int _end_bits_used){

+  int ret;

+  CELT_MEMSET(_b->buf+_b->offs,0,_b->storage-_b->offs-_b->end_offs);

+  ret=0;

+  if(_end_bits_used>0){

+    if(_b->offs+_b->end_offs>=_b->storage){

+      /*If there's no range coder data at all, give up.*/

+      if(_b->end_offs>=_b->storage)return -1;

+      /*If we've busted, don't add too many extra bits to the last byte; it

+         would corrupt the range coder data, and that's more important.*/

+      if(_start_bits_available<_end_bits_used){

+        _end_bits_used=_start_bits_available;

+        _end_byte&=(1<_start_bits_available)-1;

+        ret=-1;

+      }

+    }

+    _b->buf[_b->storage-_b->end_offs-1]|=_end_byte;

+  return ret;

 void ec_enc_uint(ec_enc *_this,ec_uint32 _fl,ec_uint32 _ft){

@@ -86,18 +93,16 @@

   celt_assert(_ft>1);

   _ft--;

   ftb=EC_ILOG(_ft);

-  if(ftb>EC_UNIT_BITS){

-    ftb-=EC_UNIT_BITS;

+  if(ftb>EC_UINT_BITS){

+    ftb-=EC_UINT_BITS;

     ft=(_ft>>ftb)+1;

     fl=(unsigned)(_fl>>ftb);

     ec_encode(_this,fl,fl+1,ft);

-    ec_enc_bits(_this,_fl&(1<<ftb)-1,ftb);

-  } else {

-    ec_encode(_this,_fl,_fl+1,_ft+1);

+    ec_enc_bits(_this,_fl&((ec_uint32)1<<ftb)-1,ftb);

+  else ec_encode(_this,_fl,_fl+1,_ft+1);

-int ec_enc_get_error(ec_enc *_this)

-{

+int ec_enc_get_error(ec_enc *_this){

   return _this->error;

--- a/libcelt/entenc.h

+++ b/libcelt/entenc.h

@@ -52,12 +52,13 @@

    ec_uint32       rng;

    /*The low end of the current range (inclusive).*/

    ec_uint32       low;

-   /*Byte that will be written at the end*/

-   unsigned char   end_byte;

-   /*Number of valid bits in end_byte*/

-   int             end_bits_left;

-   int             nb_end_bits;

-   /*Nonzero if an error occurred*/

+   /*Bits that will be written at the end.*/

+   ec_window       end_window;

+   /*Number of valid bits in end_window.*/

+   int             nend_bits;

+   /*The total number of whole bits written.*/

+   int             nbits_total;

+   /*Nonzero if an error occurred.*/

    int             error;

};

@@ -84,7 +85,7 @@

 /*Encodes a sequence of raw bits in the stream.

   _fl:  The bits to encode.

   _ftb: The number of bits to encode.

-        This must be at least one, and no more than 32.*/

+        This must be between 0 and 25, inclusive.*/

 void ec_enc_bits(ec_enc *_this,ec_uint32 _fl,unsigned _ftb);

 /*Encodes a raw unsigned integer in the stream.

--- a/libcelt/quant_bands.c

+++ b/libcelt/quant_bands.c

@@ -207,7 +207,6 @@

          bits_left = budget-(int)ec_enc_tell(enc, 0)-3*C*(end-i);

          if (i!=start && bits_left < 30)

-            qi = IMAX(-1,qi);

             if (bits_left < 24)

                qi = IMIN(1, qi);

             if (bits_left < 16)

@@ -274,6 +273,8 @@

       ec_enc enc_intra_state;

       ec_byte_buffer buf_intra_state;

       int tell_intra;

+      ec_uint32 nstart_bytes;

+      ec_uint32 nintra_bytes;

       VARDECL(unsigned char, intra_bits);

       tell_intra = ec_enc_tell(enc, 3);

@@ -281,9 +282,13 @@

       enc_intra_state = *enc;

       buf_intra_state = *(enc->buf);

-      ALLOC(intra_bits, buf_intra_state.ptr-buf_start_state.ptr, unsigned char);

+      nstart_bytes = ec_byte_bytes(&buf_start_state);

+      nintra_bytes = ec_byte_bytes(&buf_intra_state);

+      ALLOC(intra_bits, nintra_bytes-nstart_bytes, unsigned char);

       /* Copy bits from intra bit-stream */

-      CELT_COPY(intra_bits, buf_start_state.ptr, buf_intra_state.ptr-buf_start_state.ptr);

+      CELT_COPY(intra_bits,

+            ec_byte_get_buffer(&buf_intra_state) + nstart_bytes,

+            nintra_bytes - nstart_bytes);

       *enc = enc_start_state;

       *(enc->buf) = buf_start_state;

@@ -295,8 +300,9 @@

          *enc = enc_intra_state;

          *(enc->buf) = buf_intra_state;

-         /* Copy bits from to bit-stream */

-         CELT_COPY(buf_start_state.ptr, intra_bits, buf_intra_state.ptr-buf_start_state.ptr);

+         /* Copy intra bits to bit-stream */

+         CELT_COPY(ec_byte_get_buffer(&buf_intra_state) + nstart_bytes,

+               intra_bits, nintra_bytes - nstart_bytes);

          CELT_COPY(oldEBands, oldEBands_intra, C*end);

          CELT_COPY(error, error_intra, C*end);

--- a/libcelt/rangedec.c

+++ b/libcelt/rangedec.c

@@ -97,22 +97,6 @@

}*/

-/*Gets the next byte of input.

-  After all the bytes in the current packet have been consumed, and the extra

-   end code returned if needed, this function will continue to return zero each

-   time it is called.

-  Return: The next byte of input.*/

-static int ec_dec_in(ec_dec *_this){

-  int ret;

-  ret=ec_byte_read1(_this->buf);

-  if(ret<0){

-    ret=0;

-    /*Needed to keep oc_dec_tell() operating correctly.*/

-    ec_byte_adv1(_this->buf);

-  }

-  return ret;

-}

 /*Normalizes the contents of dif and rng so that rng lies entirely in the

    high-order symbol.*/

 static inline void ec_dec_normalize(ec_dec *_this){

@@ -119,13 +103,14 @@

   /*If the range is too small, rescale it and input some bits.*/

   while(_this->rng<=EC_CODE_BOT){

     int sym;

+    _this->nbits_total+=EC_SYM_BITS;

     _this->rng<<=EC_SYM_BITS;

     /*Use up the remaining bits from our last symbol.*/

-    sym=_this->rem<<EC_CODE_EXTRA;

+    sym=_this->rem;

     /*Read the next value from the input.*/

-    _this->rem=ec_dec_in(_this);

+    _this->rem=ec_byte_read(_this->buf);

     /*Take the rest of the bits we need from this new symbol.*/

-    sym|=_this->rem>>EC_SYM_BITS-EC_CODE_EXTRA;

+    sym=(sym<<EC_SYM_BITS|_this->rem)>>EC_SYM_BITS-EC_CODE_EXTRA;

     /*And subtract them from dif, capped to be less than EC_CODE_TOP.*/

     _this->dif=(_this->dif<<EC_SYM_BITS)+(EC_SYM_MAX&~sym)&EC_CODE_TOP-1;

@@ -133,14 +118,17 @@

 void ec_dec_init(ec_dec *_this,ec_byte_buffer *_buf){

   _this->buf=_buf;

-  _this->rem=ec_dec_in(_this);

+  _this->rem=ec_byte_read(_buf);

   _this->rng=1U<<EC_CODE_EXTRA;

   _this->dif=_this->rng-1-(_this->rem>>EC_SYM_BITS-EC_CODE_EXTRA);

   /*Normalize the interval.*/

   ec_dec_normalize(_this);

-  _this->end_byte=0; /* Required for platforms that have chars > 8 bits */

-  _this->end_bits_left=0;

-  _this->nb_end_bits=0;

+  _this->end_window=0;

+  _this->nend_bits=0;

+  /*This is the offset from which ec_enc_tell() will subtract partial bits.

+    This must be after the initial ec_dec_normalize(), or you will have to

+     compensate for the bits that are read there.*/

+  _this->nbits_total=EC_CODE_BITS+1;

   _this->error=0;

@@ -159,28 +147,11 @@

    return (1<<_bits)-EC_MINI(s+1,1<<_bits);

-unsigned ec_dec_bits(ec_dec *_this,unsigned bits){

-  unsigned value=0;

-  int count=0;

-  _this->nb_end_bits += bits;

-  while (bits>=_this->end_bits_left)

-  {

-    value |= _this->end_byte>>(8-_this->end_bits_left)<<count;

-    count += _this->end_bits_left;

-    bits -= _this->end_bits_left;

-    _this->end_byte=ec_byte_look_at_end(_this->buf);

-    _this->end_bits_left = 8;

-  }

-  value |= ((_this->end_byte>>(8-_this->end_bits_left))&((1<<bits)-1))<<count;

-  _this->end_bits_left -= bits;

-  return value;

-}

 void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,unsigned _ft){

   ec_uint32 s;

-  s=IMUL32(_this->nrm,(_ft-_fh));

+  s=IMUL32(_this->nrm,_ft-_fh);

   _this->dif-=s;

-  _this->rng=_fl>0?IMUL32(_this->nrm,(_fh-_fl)):_this->rng-s;

+  _this->rng=_fl>0?IMUL32(_this->nrm,_fh-_fl):_this->rng-s;

   ec_dec_normalize(_this);

@@ -237,17 +208,45 @@

   return val-1;

+ec_uint32 ec_dec_bits(ec_dec *_this,unsigned _bits){

+  ec_window window;

+  int       available;

+  int       ret;

+  window=_this->end_window;

+  available=_this->nend_bits;

+  if(available<_bits){

+    do{

+      window|=(ec_window)ec_byte_read_from_end(_this->buf)<<available;

+      available+=EC_SYM_BITS;

+    }

+    while(available<=EC_WINDOW_SIZE-EC_SYM_BITS);

+  }

+  ret=(ec_uint32)window&((ec_uint32)1<<_bits)-1;

+  window>>=_bits;

+  available-=_bits;

+  _this->end_window=window;

+  _this->nend_bits=available;

+  _this->nbits_total+=_bits;

+  return ret;

+}

 ec_uint32 ec_dec_tell(ec_dec *_this,int _b){

+  ec_uint32 nbits;

   ec_uint32 r;

   int       l;

-  ec_uint32      nbits;

-  nbits=(ec_byte_bytes(_this->buf)-(EC_CODE_BITS+EC_SYM_BITS-1)/EC_SYM_BITS)*

-   EC_SYM_BITS;

   /*To handle the non-integral number of bits still left in the decoder state,

-     we compute the number of bits of low that must be encoded to ensure that

-     the value is inside the range for any possible subsequent bits.*/

-  nbits+=EC_CODE_BITS+1+_this->nb_end_bits;

-  nbits<<=_b;

+     we compute the worst-case number of bits of low that must be encoded to

+     ensure that the value is inside the range for any possible subsequent

+     bits.

+    The computation here is independent of low itself (the decoder does not

+     even track that value), even though the real number of bits used after

+     ec_enc_done() may be 1 smaller if rng is a power of two and the

+     corresponding trailing bits of low are all zeros.

+    If we did try to track that special case, then coding a value with a

+     probability of 1/(1<<n) might sometimes appear to use more than n bits.

+    This may help explain the surprising result that a newly initialized

+     decoder claims to have used 1 bit.*/

+  nbits=_this->nbits_total<<_b;

   l=EC_ILOG(_this->rng);

   r=_this->rng>>l-16;

   while(_b-->0){

--- a/libcelt/rangeenc.c

+++ b/libcelt/rangeenc.c

@@ -83,11 +83,11 @@

     carry=_c>>EC_SYM_BITS;

     /*Don't output a byte on the first write.

       This compare should be taken care of by branch-prediction thereafter.*/

-    if(_this->rem>=0)_this->error|=ec_byte_write1(_this->buf,_this->rem+carry);

+    if(_this->rem>=0)_this->error|=ec_byte_write(_this->buf,_this->rem+carry);

     if(_this->ext>0){

       unsigned sym;

       sym=EC_SYM_MAX+carry&EC_SYM_MAX;

-      do _this->error|=ec_byte_write1(_this->buf,sym);

+      do _this->error|=ec_byte_write(_this->buf,sym);

       while(--(_this->ext)>0);

     _this->rem=_c&EC_SYM_MAX;

@@ -102,6 +102,7 @@

     /*Move the next-to-high-order symbol into the high-order position.*/

     _this->low=_this->low<<EC_SYM_BITS&EC_CODE_TOP-1;

     _this->rng<<=EC_SYM_BITS;

+    _this->nbits_total+=EC_SYM_BITS;

@@ -111,9 +112,10 @@

   _this->ext=0;

   _this->low=0;

   _this->rng=EC_CODE_TOP;

-  _this->end_byte=0;

-  _this->end_bits_left=8;

-  _this->nb_end_bits=0;

+  _this->end_window=0;

+  _this->nend_bits=0;

+  /*This is the offset from which ec_enc_tell() will subtract partial bits.*/

+  _this->nbits_total=EC_CODE_BITS+1;

   _this->error=0;

@@ -129,69 +131,82 @@

 void ec_encode_bin(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _bits){

-   ec_uint32 r;

-   r=_this->rng>>_bits;

-   if(_fl>0){

-      _this->low+=_this->rng-IMUL32(r,((1<<_bits)-_fl));

-      _this->rng=IMUL32(r,(_fh-_fl));

-   }

-   else _this->rng-=IMUL32(r,((1<<_bits)-_fh));

-   ec_enc_normalize(_this);

+  ec_uint32 r;

+  r=_this->rng>>_bits;

+  if(_fl>0){

+    _this->low+=_this->rng-IMUL32(r,((1<<_bits)-_fl));

+    _this->rng=IMUL32(r,(_fh-_fl));

+  }

+  else _this->rng-=IMUL32(r,((1<<_bits)-_fh));

+  ec_enc_normalize(_this);

 /*The probability of having a "one" is given in 1/65536.*/

 void ec_enc_bit_prob(ec_enc *_this,int _val,unsigned _prob){

-   ec_uint32 r;

-   ec_uint32 s;

-   ec_uint32 l;

-   r=_this->rng;

-   l=_this->low;

-   s=(r>>16)*_prob;

-   r-=s;

-   if(_val)_this->low=l+r;

-   _this->rng=_val?s:r;

-   ec_enc_normalize(_this);

+  ec_uint32 r;

+  ec_uint32 s;

+  ec_uint32 l;

+  r=_this->rng;

+  l=_this->low;

+  s=(r>>16)*_prob;

+  r-=s;

+  if(_val)_this->low=l+r;

+  _this->rng=_val?s:r;

+  ec_enc_normalize(_this);

 /*The probability of having a "one" is 1/(1<<_logp).*/

 void ec_enc_bit_logp(ec_enc *_this,int _val,unsigned _logp){

-   ec_uint32 r;

-   ec_uint32 s;

-   ec_uint32 l;

-   r=_this->rng;

-   l=_this->low;

-   s=r>>_logp;

-   r-=s;

-   if(_val)_this->low=l+r;

-   _this->rng=_val?s:r;

-   ec_enc_normalize(_this);

+  ec_uint32 r;

+  ec_uint32 s;

+  ec_uint32 l;

+  r=_this->rng;

+  l=_this->low;

+  s=r>>_logp;

+  r-=s;

+  if(_val)_this->low=l+r;

+  _this->rng=_val?s:r;

+  ec_enc_normalize(_this);

-void ec_enc_bits(ec_enc *_this,unsigned _fl,unsigned bits){

-  _this->nb_end_bits += bits;

-  while (bits >= _this->end_bits_left)

-  {

-    _this->end_byte |= (_fl<<(8-_this->end_bits_left)) & 0xff;

-    _fl >>= _this->end_bits_left;

-    _this->error|=ec_byte_write_at_end(_this->buf, _this->end_byte);

-    _this->end_byte = 0;

-    bits -= _this->end_bits_left;

-    _this->end_bits_left = 8;

+void ec_enc_bits(ec_enc *_this,ec_uint32 _fl,unsigned _bits){

+  ec_window window;

+  int       used;

+  window=_this->end_window;

+  used=_this->nend_bits;

+  if(used+_bits>EC_WINDOW_SIZE){

+    do{

+      _this->error|=

+       ec_byte_write_at_end(_this->buf,(unsigned)window&EC_SYM_MAX);

+      window>>=EC_SYM_BITS;

+      used-=EC_SYM_BITS;

+    }

+    while(used>=EC_SYM_BITS);

-  _this->end_byte |= (_fl<<(8-_this->end_bits_left)) & 0xff;

-  _this->end_bits_left -= bits;

+  window|=(ec_window)_fl<<used;

+  used+=_bits;

+  _this->end_window=window;

+  _this->nend_bits=used;

+  _this->nbits_total+=_bits;

 ec_uint32 ec_enc_tell(ec_enc *_this,int _b){

+  ec_uint32 nbits;

   ec_uint32 r;

   int       l;

-  ec_uint32      nbits;

-  nbits=(ec_byte_bytes(_this->buf)+(_this->rem>=0)+_this->ext)*EC_SYM_BITS;

-  /*To handle the non-integral number of bits still left in the encoder state,

-     we compute the number of bits of low that must be encoded to ensure that

-     the value is inside the range for any possible subsequent bits.*/

-  nbits+=EC_CODE_BITS+1+_this->nb_end_bits;

-  nbits<<=_b;

+  /*To handle the non-integral number of bits still left in the decoder state,

+     we compute the worst-case number of bits of low that must be encoded to

+     ensure that the value is inside the range for any possible subsequent

+     bits.

+    The computation here is independent of low itself (the decoder does not

+     even track that value), even though the real number of bits used after

+     ec_enc_done() may be 1 smaller if rng is a power of two and the

+     corresponding trailing bits of low are all zeros.

+    If we did try to track that special case, then coding a value with a

+     probability of 1/(1<<n) might sometimes appear to use more than n bits.

+    This may help explain the surprising result that a newly initialized

+     encoder claims to have used 1 bit.*/

+  nbits=_this->nbits_total<<_b;

   l=EC_ILOG(_this->rng);

   r=_this->rng>>l-16;

   while(_b-->0){

@@ -205,6 +220,8 @@

 void ec_enc_done(ec_enc *_this){

+  ec_window window;

+  int       used;

   ec_uint32 msk;

   ec_uint32 end;

   int       l;

@@ -224,15 +241,16 @@

     l-=EC_SYM_BITS;

   /*If we have a buffered byte flush it into the output buffer.*/

-  if(_this->rem>=0||_this->ext>0){

-    ec_enc_carry_out(_this,0);

-    _this->rem=-1;

+  if(_this->rem>=0||_this->ext>0)ec_enc_carry_out(_this,0);

+  /*If we have buffered extra bits, flush them as well.*/

+  window=_this->end_window;

+  used=_this->nend_bits;

+  while(used>=EC_SYM_BITS){

+    _this->error|=

+     ec_byte_write_at_end(_this->buf,(unsigned)window&EC_SYM_MAX);

+    window>>=EC_SYM_BITS;

+    used-=EC_SYM_BITS;

-  {

-    unsigned char *ptr = _this->buf->ptr;

-    while (ptr<= _this->buf->end_ptr)

-      *ptr++ = 0;

-    if (_this->end_bits_left != 8)

-      *_this->buf->end_ptr |= _this->end_byte;

-  }

+  /*Clear any excess space and add any remaining extra bits to the last byte.*/

+  if(!_this->error)_this->error=ec_byte_write_done(_this->buf,-l,window,used);

--- a/tests/ectest.c

+++ b/tests/ectest.c

@@ -77,7 +77,7 @@

   fprintf(stderr,

    "Encoded %0.2lf bits of entropy to %0.2lf bits (%0.3lf%% wasted).\n",

    entropy,ldexp(nbits,-4),100*(nbits-ldexp(entropy,4))/nbits);

-  fprintf(stderr,"Packed to %li bytes.\n",(long)(buf.ptr-buf.buf));

+  fprintf(stderr,"Packed to %li bytes.\n",(long)ec_byte_bytes(&buf));

   ec_byte_readinit(&buf,ptr,DATA_SIZE);

   ec_dec_init(&dec,&buf);

   for(ft=2;ft<1024;ft++){

@@ -109,6 +109,7 @@

   fprintf(stderr,"Testing random streams... Random seed: %u (%.4X)\n", seed, rand() % 65536);

   for(i=0;i<409600;i++){

     unsigned *data;

+    unsigned *tell;

     int       j;

     int tell_bits;

     int zeros;

@@ -115,9 +116,11 @@

     ft=rand()/((RAND_MAX>>(rand()%11))+1)+10;

     sz=rand()/((RAND_MAX>>(rand()%9))+1);

     data=(unsigned *)malloc(sz*sizeof(*data));

+    tell=(unsigned *)malloc((sz+1)*sizeof(*tell));

     ec_byte_writeinit_buffer(&buf, ptr, DATA_SIZE2);

     ec_enc_init(&enc,&buf);

     zeros = rand()%13==0;

+    tell[0]=ec_enc_tell(&enc, 3);

     for(j=0;j<sz;j++){

       if (zeros)

         data[j]=0;

@@ -124,6 +127,7 @@

       else

         data[j]=rand()%ft;

       ec_enc_uint(&enc,data[j],ft);

+      tell[j+1]=ec_enc_tell(&enc, 3);

     if (rand()%2==0)

       while(ec_enc_tell(&enc, 0)%8 != 0)

@@ -130,6 +134,11 @@

         ec_enc_uint(&enc, rand()%2, 2);

     tell_bits = ec_enc_tell(&enc, 0);

     ec_enc_done(&enc);

+    if(tell_bits!=ec_enc_tell(&enc,0)){

+      fprintf(stderr,"tell() changed after ec_enc_done(): %i instead of %i (Random seed: %u)\n",

+       ec_enc_tell(&enc,0),tell_bits,seed);

+      ret=-1;

+    }

     if ((tell_bits+7)/8 < ec_byte_bytes(&buf))

       fprintf (stderr, "tell() lied, there's %i bytes instead of %d (Random seed: %u)\n",

@@ -139,6 +148,11 @@

     tell_bits -= 8*ec_byte_bytes(&buf);

     ec_byte_readinit(&buf,ptr,DATA_SIZE2);

     ec_dec_init(&dec,&buf);

+    if(ec_dec_tell(&dec,3)!=tell[0]){

+      fprintf(stderr,

+       "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n",

+       0,ec_dec_tell(&dec,3),tell[0],seed);

+    }

     for(j=0;j<sz;j++){

       sym=ec_dec_uint(&dec,ft);

       if(sym!=data[j]){

@@ -146,6 +160,11 @@

          "Decoded %i instead of %i with ft of %i at position %i of %i (Random seed: %u).\n",

          sym,data[j],ft,j,sz,seed);

         ret=-1;

+      }

+      if(ec_dec_tell(&dec,3)!=tell[j+1]){

+        fprintf(stderr,

+         "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n",

+         j+1,ec_dec_tell(&dec,3),tell[j+1],seed);

     free(data);