ref: 9d8decc16274b4c2ed1b7c0636b7666a1c57f3d0
parent: e665d0bdd919e143997fb73b3cadbc086889bb2f
parent: ad0ac045d5a5d53a47520018d12a6951b0e8416f
author: James Zern <[email protected]>
date: Thu Sep 17 18:00:23 EDT 2015
Merge changes from topic 'tile-thread-cleanup' * changes: vp9/decode_tiles_mt: move frame count accum from loop VP9Decoder: remove duplicate tile_worker_info vp9/decode_tiles_mt: move some inits from inner loop vp9_accumulate_frame_counts: pass counts directly
--- a/vp9/common/vp9_thread_common.c
+++ b/vp9/common/vp9_thread_common.c
@@ -318,21 +318,21 @@
}
// Accumulate frame counts.
-void vp9_accumulate_frame_counts(VP9_COMMON *cm, FRAME_COUNTS *counts,
- int is_dec) {
+void vp9_accumulate_frame_counts(FRAME_COUNTS *accum,
+ const FRAME_COUNTS *counts, int is_dec) {
int i, j, k, l, m;
for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
for (j = 0; j < INTRA_MODES; j++)
- cm->counts.y_mode[i][j] += counts->y_mode[i][j];
+ accum->y_mode[i][j] += counts->y_mode[i][j];
for (i = 0; i < INTRA_MODES; i++)
for (j = 0; j < INTRA_MODES; j++)
- cm->counts.uv_mode[i][j] += counts->uv_mode[i][j];
+ accum->uv_mode[i][j] += counts->uv_mode[i][j];
for (i = 0; i < PARTITION_CONTEXTS; i++)
for (j = 0; j < PARTITION_TYPES; j++)
- cm->counts.partition[i][j] += counts->partition[i][j];
+ accum->partition[i][j] += counts->partition[i][j];
if (is_dec) {
int n;
@@ -341,10 +341,10 @@
for (k = 0; k < REF_TYPES; k++)
for (l = 0; l < COEF_BANDS; l++)
for (m = 0; m < COEFF_CONTEXTS; m++) {
- cm->counts.eob_branch[i][j][k][l][m] +=
+ accum->eob_branch[i][j][k][l][m] +=
counts->eob_branch[i][j][k][l][m];
for (n = 0; n < UNCONSTRAINED_NODES + 1; n++)
- cm->counts.coef[i][j][k][l][m][n] +=
+ accum->coef[i][j][k][l][m][n] +=
counts->coef[i][j][k][l][m][n];
}
} else {
@@ -353,64 +353,64 @@
for (k = 0; k < REF_TYPES; k++)
for (l = 0; l < COEF_BANDS; l++)
for (m = 0; m < COEFF_CONTEXTS; m++)
- cm->counts.eob_branch[i][j][k][l][m] +=
+ accum->eob_branch[i][j][k][l][m] +=
counts->eob_branch[i][j][k][l][m];
- // In the encoder, cm->counts.coef is only updated at frame
+ // In the encoder, coef is only updated at frame
// level, so not need to accumulate it here.
// for (n = 0; n < UNCONSTRAINED_NODES + 1; n++)
- // cm->counts.coef[i][j][k][l][m][n] +=
+ // accum->coef[i][j][k][l][m][n] +=
// counts->coef[i][j][k][l][m][n];
}
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
for (j = 0; j < SWITCHABLE_FILTERS; j++)
- cm->counts.switchable_interp[i][j] += counts->switchable_interp[i][j];
+ accum->switchable_interp[i][j] += counts->switchable_interp[i][j];
for (i = 0; i < INTER_MODE_CONTEXTS; i++)
for (j = 0; j < INTER_MODES; j++)
- cm->counts.inter_mode[i][j] += counts->inter_mode[i][j];
+ accum->inter_mode[i][j] += counts->inter_mode[i][j];
for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
for (j = 0; j < 2; j++)
- cm->counts.intra_inter[i][j] += counts->intra_inter[i][j];
+ accum->intra_inter[i][j] += counts->intra_inter[i][j];
for (i = 0; i < COMP_INTER_CONTEXTS; i++)
for (j = 0; j < 2; j++)
- cm->counts.comp_inter[i][j] += counts->comp_inter[i][j];
+ accum->comp_inter[i][j] += counts->comp_inter[i][j];
for (i = 0; i < REF_CONTEXTS; i++)
for (j = 0; j < 2; j++)
for (k = 0; k < 2; k++)
- cm->counts.single_ref[i][j][k] += counts->single_ref[i][j][k];
+ accum->single_ref[i][j][k] += counts->single_ref[i][j][k];
for (i = 0; i < REF_CONTEXTS; i++)
for (j = 0; j < 2; j++)
- cm->counts.comp_ref[i][j] += counts->comp_ref[i][j];
+ accum->comp_ref[i][j] += counts->comp_ref[i][j];
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
for (j = 0; j < TX_SIZES; j++)
- cm->counts.tx.p32x32[i][j] += counts->tx.p32x32[i][j];
+ accum->tx.p32x32[i][j] += counts->tx.p32x32[i][j];
for (j = 0; j < TX_SIZES - 1; j++)
- cm->counts.tx.p16x16[i][j] += counts->tx.p16x16[i][j];
+ accum->tx.p16x16[i][j] += counts->tx.p16x16[i][j];
for (j = 0; j < TX_SIZES - 2; j++)
- cm->counts.tx.p8x8[i][j] += counts->tx.p8x8[i][j];
+ accum->tx.p8x8[i][j] += counts->tx.p8x8[i][j];
}
for (i = 0; i < TX_SIZES; i++)
- cm->counts.tx.tx_totals[i] += counts->tx.tx_totals[i];
+ accum->tx.tx_totals[i] += counts->tx.tx_totals[i];
for (i = 0; i < SKIP_CONTEXTS; i++)
for (j = 0; j < 2; j++)
- cm->counts.skip[i][j] += counts->skip[i][j];
+ accum->skip[i][j] += counts->skip[i][j];
for (i = 0; i < MV_JOINTS; i++)
- cm->counts.mv.joints[i] += counts->mv.joints[i];
+ accum->mv.joints[i] += counts->mv.joints[i];
for (k = 0; k < 2; k++) {
- nmv_component_counts *comps = &cm->counts.mv.comps[k];
- nmv_component_counts *comps_t = &counts->mv.comps[k];
+ nmv_component_counts *const comps = &accum->mv.comps[k];
+ const nmv_component_counts *const comps_t = &counts->mv.comps[k];
for (i = 0; i < 2; i++) {
comps->sign[i] += comps_t->sign[i];
--- a/vp9/common/vp9_thread_common.h
+++ b/vp9/common/vp9_thread_common.h
@@ -55,8 +55,8 @@
VPxWorker *workers, int num_workers,
VP9LfSync *lf_sync);
-void vp9_accumulate_frame_counts(struct VP9Common *cm,
- struct FRAME_COUNTS *counts, int is_dec);
+void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum,
+ const struct FRAME_COUNTS *counts, int is_dec);
#ifdef __cplusplus
} // extern "C"
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -1563,9 +1563,10 @@
return vpx_reader_find_end(&tile_data->bit_reader);
}
-static int tile_worker_hook(TileWorkerData *const tile_data,
- const TileInfo *const tile) {
+static int tile_worker_hook(TileWorkerData *const tile_data, void *unused) {
+ const TileInfo *const tile = &tile_data->xd.tile;
int mi_row, mi_col;
+ (void)unused;
if (setjmp(tile_data->error_info.jmp)) {
tile_data->error_info.setjmp = 0;
@@ -1628,8 +1629,6 @@
CHECK_MEM_ERROR(cm, pbi->tile_worker_data,
vpx_memalign(32, num_threads *
sizeof(*pbi->tile_worker_data)));
- CHECK_MEM_ERROR(cm, pbi->tile_worker_info,
- vpx_malloc(num_threads * sizeof(*pbi->tile_worker_info)));
for (i = 0; i < num_threads; ++i) {
VPxWorker *const worker = &pbi->tile_workers[i];
++pbi->num_tile_workers;
@@ -1645,10 +1644,15 @@
// Reset tile decoding hook
for (n = 0; n < num_workers; ++n) {
VPxWorker *const worker = &pbi->tile_workers[n];
+ TileWorkerData *const tile_data = &pbi->tile_worker_data[n];
winterface->sync(worker);
+ tile_data->pbi = pbi;
+ tile_data->xd = pbi->mb;
+ tile_data->xd.counts =
+ cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts;
worker->hook = (VPxWorkerHook)tile_worker_hook;
- worker->data1 = &pbi->tile_worker_data[n];
- worker->data2 = &pbi->tile_worker_info[n];
+ worker->data1 = tile_data;
+ worker->data2 = NULL;
}
// Note: this memset assumes above_context[0], [1] and [2]
@@ -1698,16 +1702,10 @@
for (i = 0; i < num_workers && n < tile_cols; ++i) {
VPxWorker *const worker = &pbi->tile_workers[i];
TileWorkerData *const tile_data = (TileWorkerData*)worker->data1;
- TileInfo *const tile = (TileInfo*)worker->data2;
TileBuffer *const buf = &tile_buffers[0][n];
- tile_data->pbi = pbi;
- tile_data->xd = pbi->mb;
tile_data->xd.corrupted = 0;
- tile_data->xd.counts = cm->frame_parallel_decoding_mode ?
- 0 : &tile_data->counts;
vp9_zero(tile_data->dqcoeff);
- vp9_tile_init(tile, cm, 0, buf->col);
vp9_tile_init(&tile_data->xd.tile, cm, 0, buf->col);
setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
&tile_data->bit_reader, pbi->decrypt_cb,
@@ -1742,14 +1740,15 @@
bit_reader_end = vpx_reader_find_end(&tile_data->bit_reader);
final_worker = -1;
}
+ }
- // Accumulate thread frame counts.
- if (n >= tile_cols && !cm->frame_parallel_decoding_mode) {
- for (i = 0; i < num_workers; ++i) {
- TileWorkerData *const tile_data =
- (TileWorkerData*)pbi->tile_workers[i].data1;
- vp9_accumulate_frame_counts(cm, &tile_data->counts, 1);
- }
+ // Accumulate thread frame counts.
+ if (!cm->frame_parallel_decoding_mode) {
+ int i;
+ for (i = 0; i < num_workers; ++i) {
+ TileWorkerData *const tile_data =
+ (TileWorkerData*)pbi->tile_workers[i].data1;
+ vp9_accumulate_frame_counts(&cm->counts, &tile_data->counts, 1);
}
}
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -134,7 +134,6 @@
vpx_get_worker_interface()->end(worker);
}
vpx_free(pbi->tile_worker_data);
- vpx_free(pbi->tile_worker_info);
vpx_free(pbi->tile_workers);
if (pbi->num_tile_workers > 0) {
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -65,7 +65,6 @@
VPxWorker lf_worker;
VPxWorker *tile_workers;
TileWorkerData *tile_worker_data;
- TileInfo *tile_worker_info;
int num_tile_workers;
TileData *tile_data;
--- a/vp9/encoder/vp9_ethread.c
+++ b/vp9/encoder/vp9_ethread.c
@@ -192,7 +192,7 @@
// Accumulate counters.
if (i < cpi->num_workers - 1) {
- vp9_accumulate_frame_counts(cm, thread_data->td->counts, 0);
+ vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0);
accumulate_rd_opt(&cpi->td, thread_data->td);
}
}