ref: 31348844081c53b1bab49f751187a2baf7c37333
parent: 06c67dbce2f214d58981ce4ed84bc2ec06d57bd8
author: Jean-Marc Valin <[email protected]>
date: Thu Feb 14 06:55:01 EST 2008
Speeded up cwrsi and icwrs by at least an order of magnitude. Now using the recursive definition of ncwrs instead of computing it from scratch every time.
--- a/libcelt/cwrs.c
+++ b/libcelt/cwrs.c
@@ -30,6 +30,50 @@
#include <stdlib.h>
#include "cwrs.h"
+static celt_uint64_t update_ncwrs64(celt_uint64_t *nc, int len, int nc0)
+{
+ int i;
+ celt_uint64_t mem;
+
+ mem = nc[0];
+ nc[0] = nc0;
+ for (i=1;i<len;i++)
+ {
+ celt_uint64_t tmp = nc[i]+nc[i-1]+mem;
+ mem = nc[i];
+ nc[i] = tmp;
+ }
+}
+
+static celt_uint64_t reverse_ncwrs64(celt_uint64_t *nc, int len, int nc0)
+{
+ int i;
+ celt_uint64_t mem;
+
+ mem = nc[0];
+ nc[0] = nc0;
+ for (i=1;i<len;i++)
+ {
+ celt_uint64_t tmp = nc[i]-nc[i-1]-mem;
+ mem = nc[i];
+ nc[i] = tmp;
+ }
+}
+
+/* Optional implementation of ncwrs64 using update_ncwrs64(). It's slightly
+ slower than the standard ncwrs64(), but it could still be useful.
+celt_uint64_t ncwrs64_opt(int _n,int _m)
+{
+ int i;
+ celt_uint64_t ret;
+ celt_uint64_t nc[_n+1];
+ for (i=0;i<_n+1;i++)
+ nc[i] = 1;
+ for (i=0;i<_m;i++)
+ update_ncwrs64(nc, _n+1, 0);
+ return nc[_n];
+}*/
+
/*Returns the numer of ways of choosing _m elements from a set of size _n with
replacement when a sign bit is needed for each unique element.*/
#if 0
@@ -161,12 +205,19 @@
void cwrsi64(int _n,int _m,celt_uint64_t _i,int *_x,int *_s){
int j;
int k;
+ celt_uint64_t nc[_n+1];
+ for (j=0;j<_n+1;j++)
+ nc[j] = 1;
+ for (k=0;k<_m-1;k++)
+ update_ncwrs64(nc, _n+1, 0);
for(k=j=0;k<_m;k++){
celt_uint64_t pn;
celt_uint64_t p;
celt_uint64_t t;
- p=ncwrs64(_n-j,_m-k-1);
- pn=ncwrs64(_n-j-1,_m-k-1);
+ /*p=ncwrs64(_n-j,_m-k-1);
+ pn=ncwrs64(_n-j-1,_m-k-1);*/
+ p=nc[_n-j];
+ pn=nc[_n-j-1];
p+=pn;
if(k>0){
t=p>>1;
@@ -176,7 +227,8 @@
_i-=p;
j++;
p=pn;
- pn=ncwrs64(_n-j-1,_m-k-1);
+ /*pn=ncwrs64(_n-j-1,_m-k-1);*/
+ pn=nc[_n-j-1];
p+=pn;
}
t=p>>1;
@@ -183,6 +235,10 @@
_s[k]=_i>=t;
_x[k]=j;
if(_s[k])_i-=t;
+ if (k<_m-2)
+ reverse_ncwrs64(nc, _n+1, 0);
+ else
+ reverse_ncwrs64(nc, _n+1, 1);
}
}
@@ -194,12 +250,19 @@
celt_uint64_t i;
int j;
int k;
+ celt_uint64_t nc[_n+1];
+ for (j=0;j<_n+1;j++)
+ nc[j] = 1;
+ for (k=0;k<_m-1;k++)
+ update_ncwrs64(nc, _n+1, 0);
i=0;
for(k=j=0;k<_m;k++){
celt_uint64_t pn;
celt_uint64_t p;
- p=ncwrs64(_n-j,_m-k-1);
- pn=ncwrs64(_n-j-1,_m-k-1);
+ /*p=ncwrs64(_n-j,_m-k-1);
+ pn=ncwrs64(_n-j-1,_m-k-1);*/
+ p=nc[_n-j];
+ pn=nc[_n-j-1];
p+=pn;
if(k>0)p>>=1;
while(j<_x[k]){
@@ -206,10 +269,15 @@
i+=p;
j++;
p=pn;
- pn=ncwrs64(_n-j-1,_m-k-1);
+ /*pn=ncwrs64(_n-j-1,_m-k-1);*/
+ pn=nc[_n-j-1];
p+=pn;
}
if((k==0||_x[k]!=_x[k-1])&&_s[k])i+=p>>1;
+ if (k<_m-2)
+ reverse_ncwrs64(nc, _n+1, 0);
+ else
+ reverse_ncwrs64(nc, _n+1, 1);
}
return i;
}