Commit 254da44b authored by Reimar Döffinger's avatar Reimar Döffinger

flacenc: calculate lower sum levels in-place.

Should improve cache usage and reduces stack usage.
Also reduces number of copies in case many levels
have the same number of bits.
Signed-off-by: 's avatarReimar Döffinger <Reimar.Doeffinger@gmx.de>
parent abbcc68e
...@@ -609,10 +609,10 @@ static uint64_t calc_optimal_rice_params(RiceContext *rc, int porder, ...@@ -609,10 +609,10 @@ static uint64_t calc_optimal_rice_params(RiceContext *rc, int porder,
} }
static void calc_sums(int pmin, int pmax, uint32_t *data, int n, int pred_order, static void calc_sum_top(int pmax, uint32_t *data, int n, int pred_order,
uint64_t sums[][MAX_PARTITIONS]) uint64_t sums[MAX_PARTITIONS])
{ {
int i, j; int i;
int parts; int parts;
uint32_t *res, *res_end; uint32_t *res, *res_end;
...@@ -624,17 +624,18 @@ static void calc_sums(int pmin, int pmax, uint32_t *data, int n, int pred_order, ...@@ -624,17 +624,18 @@ static void calc_sums(int pmin, int pmax, uint32_t *data, int n, int pred_order,
uint64_t sum = 0; uint64_t sum = 0;
while (res < res_end) while (res < res_end)
sum += *(res++); sum += *(res++);
sums[pmax][i] = sum; sums[i] = sum;
res_end += n >> pmax; res_end += n >> pmax;
} }
/* sums for lower levels */
for (i = pmax - 1; i >= pmin; i--) {
parts = (1 << i);
for (j = 0; j < parts; j++)
sums[i][j] = sums[i+1][2*j] + sums[i+1][2*j+1];
}
} }
static void calc_sum_next(int level, uint64_t sums[MAX_PARTITIONS])
{
int i;
int parts = (1 << level);
for (i = 0; i < parts; i++)
sums[i] = sums[2*i] + sums[2*i+1];
}
static uint64_t calc_rice_params(RiceContext *rc, int pmin, int pmax, static uint64_t calc_rice_params(RiceContext *rc, int pmin, int pmax,
int32_t *data, int n, int pred_order) int32_t *data, int n, int pred_order)
...@@ -644,7 +645,7 @@ static uint64_t calc_rice_params(RiceContext *rc, int pmin, int pmax, ...@@ -644,7 +645,7 @@ static uint64_t calc_rice_params(RiceContext *rc, int pmin, int pmax,
int opt_porder; int opt_porder;
RiceContext tmp_rc; RiceContext tmp_rc;
uint32_t *udata; uint32_t *udata;
uint64_t sums[MAX_PARTITION_ORDER+1][MAX_PARTITIONS]; uint64_t sums[MAX_PARTITIONS];
av_assert1(pmin >= 0 && pmin <= MAX_PARTITION_ORDER); av_assert1(pmin >= 0 && pmin <= MAX_PARTITION_ORDER);
av_assert1(pmax >= 0 && pmax <= MAX_PARTITION_ORDER); av_assert1(pmax >= 0 && pmax <= MAX_PARTITION_ORDER);
...@@ -656,16 +657,19 @@ static uint64_t calc_rice_params(RiceContext *rc, int pmin, int pmax, ...@@ -656,16 +657,19 @@ static uint64_t calc_rice_params(RiceContext *rc, int pmin, int pmax,
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
udata[i] = (2*data[i]) ^ (data[i]>>31); udata[i] = (2*data[i]) ^ (data[i]>>31);
calc_sums(pmin, pmax, udata, n, pred_order, sums); calc_sum_top(pmax, udata, n, pred_order, sums);
opt_porder = pmin; opt_porder = pmin;
bits[pmin] = UINT32_MAX; bits[pmin] = UINT32_MAX;
for (i = pmin; i <= pmax; i++) { for (i = pmax; ; ) {
bits[i] = calc_optimal_rice_params(&tmp_rc, i, sums[i], n, pred_order); bits[i] = calc_optimal_rice_params(&tmp_rc, i, sums, n, pred_order);
if (bits[i] <= bits[opt_porder]) { if (bits[i] < bits[opt_porder]) {
opt_porder = i; opt_porder = i;
*rc = tmp_rc; *rc = tmp_rc;
} }
if (i == pmin)
break;
calc_sum_next(--i, sums);
} }
av_freep(&udata); av_freep(&udata);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment