53         uint32_t v32 = v * 0x01010101;
 
   62         uint64_t v64 = v * 0x0101010101010101ULL;
 
   68         uint32_t v32 = v * 0x01010101;
 
   83         0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
 
   86         0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
 
   94     int row = 
td->row, col = 
td->col, row7 = 
td->row7;
 
   95     enum TxfmMode max_tx = max_tx_for_bl_bp[
b->bs];
 
   98     int have_a = row > 0, have_l = col > 
td->tile_col_start;
 
  101     if (!
s->s.h.segmentation.enabled) {
 
  103     } 
else if (
s->s.h.keyframe || 
s->s.h.intraonly) {
 
  104         b->seg_id = !
s->s.h.segmentation.update_map ? 0 :
 
  106     } 
else if (!
s->s.h.segmentation.update_map ||
 
  107                (
s->s.h.segmentation.temporal &&
 
  109                     s->s.h.segmentation.pred_prob[
s->above_segpred_ctx[col] +
 
  110                                     td->left_segpred_ctx[row7]]))) {
 
  117             for (y = 0; y < h4; y++) {
 
  118                 int idx_base = (y + row) * 8 * 
s->sb_cols + col;
 
  119                 for (x = 0; x < w4; x++)
 
  128         memset(&
s->above_segpred_ctx[col], 1, w4);
 
  129         memset(&
td->left_segpred_ctx[row7], 1, h4);
 
  132                                      s->s.h.segmentation.prob);
 
  134         memset(&
s->above_segpred_ctx[col], 0, w4);
 
  135         memset(&
td->left_segpred_ctx[row7], 0, h4);
 
  137     if (
s->s.h.segmentation.enabled &&
 
  138         (
s->s.h.segmentation.update_map || 
s->s.h.keyframe || 
s->s.h.intraonly)) {
 
  140                   bw4, bh4, 8 * 
s->sb_cols, 
b->seg_id);
 
  143     b->skip = 
s->s.h.segmentation.enabled &&
 
  144         s->s.h.segmentation.feat[
b->seg_id].skip_enabled;
 
  146         int c = 
td->left_skip_ctx[row7] + 
s->above_skip_ctx[col];
 
  148         td->counts.skip[
c][
b->skip]++;
 
  151     if (
s->s.h.keyframe || 
s->s.h.intraonly) {
 
  153     } 
else if (
s->s.h.segmentation.enabled && 
s->s.h.segmentation.feat[
b->seg_id].ref_enabled) {
 
  154         b->intra = !
s->s.h.segmentation.feat[
b->seg_id].ref_val;
 
  158         if (have_a && have_l) {
 
  159             c = 
s->above_intra_ctx[col] + 
td->left_intra_ctx[row7];
 
  162             c = have_a ? 2 * 
s->above_intra_ctx[col] :
 
  163                 have_l ? 2 * 
td->left_intra_ctx[row7] : 0;
 
  166         td->counts.intra[
c][
bit]++;
 
  174                 c = (
s->above_skip_ctx[col] ? max_tx :
 
  175                      s->above_txfm_ctx[col]) +
 
  176                     (
td->left_skip_ctx[row7] ? max_tx :
 
  177                      td->left_txfm_ctx[row7]) > max_tx;
 
  179                 c = 
s->above_skip_ctx[col] ? 1 :
 
  180                     (
s->above_txfm_ctx[col] * 2 > max_tx);
 
  183             c = 
td->left_skip_ctx[row7] ? 1 :
 
  184                 (
td->left_txfm_ctx[row7] * 2 > max_tx);
 
  196             td->counts.tx32p[
c][
b->tx]++;
 
  202             td->counts.tx16p[
c][
b->tx]++;
 
  206             td->counts.tx8p[
c][
b->tx]++;
 
  213         b->tx = 
FFMIN(max_tx, 
s->s.h.txfmmode);
 
  216     if (
s->s.h.keyframe || 
s->s.h.intraonly) {
 
  217         uint8_t *
a = &
s->above_mode_ctx[col * 2];
 
  218         uint8_t *l = &
td->left_mode_ctx[(row7) << 1];
 
  236                 b->mode[1] = 
b->mode[0];
 
  250                     b->mode[3] = 
b->mode[2];
 
  253                 b->mode[2] = 
b->mode[0];
 
  256                 b->mode[3] = 
b->mode[1];
 
  263             b->mode[1] = 
b->mode[0];
 
  270     } 
else if (
b->intra) {
 
  274                                           s->prob.p.y_mode[0]);
 
  275             td->counts.y_mode[0][
b->mode[0]]++;
 
  278                                               s->prob.p.y_mode[0]);
 
  279                 td->counts.y_mode[0][
b->mode[1]]++;
 
  281                 b->mode[1] = 
b->mode[0];
 
  285                                               s->prob.p.y_mode[0]);
 
  286                 td->counts.y_mode[0][
b->mode[2]]++;
 
  289                                                   s->prob.p.y_mode[0]);
 
  290                     td->counts.y_mode[0][
b->mode[3]]++;
 
  292                     b->mode[3] = 
b->mode[2];
 
  295                 b->mode[2] = 
b->mode[0];
 
  296                 b->mode[3] = 
b->mode[1];
 
  299             static const uint8_t size_group[10] = {
 
  300                 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
 
  302             int sz = size_group[
b->bs];
 
  305                                           s->prob.p.y_mode[sz]);
 
  308             b->mode[3] = 
b->mode[0];
 
  309             td->counts.y_mode[sz][
b->mode[3]]++;
 
  312                                      s->prob.p.uv_mode[
b->mode[3]]);
 
  313         td->counts.uv_mode[
b->mode[3]][
b->uvmode]++;
 
  315         static const uint8_t inter_mode_ctx_lut[14][14] = {
 
  316             { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
 
  317             { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
 
  318             { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
 
  319             { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
 
  320             { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
 
  321             { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
 
  322             { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
 
  323             { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
 
  324             { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
 
  325             { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
 
  326             { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
 
  327             { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
 
  328             { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
 
  329             { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
 
  332         if (
s->s.h.segmentation.enabled && 
s->s.h.segmentation.feat[
b->seg_id].ref_enabled) {
 
  333             av_assert2(
s->s.h.segmentation.feat[
b->seg_id].ref_val != 0);
 
  335             b->ref[0] = 
s->s.h.segmentation.feat[
b->seg_id].ref_val - 1;
 
  346                         if (
s->above_comp_ctx[col] && 
td->left_comp_ctx[row7]) {
 
  348                         } 
else if (
s->above_comp_ctx[col]) {
 
  349                             c = 2 + (
td->left_intra_ctx[row7] ||
 
  350                                      td->left_ref_ctx[row7] == 
s->s.h.fixcompref);
 
  351                         } 
else if (
td->left_comp_ctx[row7]) {
 
  352                             c = 2 + (
s->above_intra_ctx[col] ||
 
  353                                      s->above_ref_ctx[col] == 
s->s.h.fixcompref);
 
  355                             c = (!
s->above_intra_ctx[col] &&
 
  356                                  s->above_ref_ctx[col] == 
s->s.h.fixcompref) ^
 
  357                                 (!
td->left_intra_ctx[row7] &&
 
  358                                  td->left_ref_ctx[row & 7] == 
s->s.h.fixcompref);
 
  361                         c = 
s->above_comp_ctx[col] ? 3 :
 
  362                         (!
s->above_intra_ctx[col] && 
s->above_ref_ctx[col] == 
s->s.h.fixcompref);
 
  365                     c = 
td->left_comp_ctx[row7] ? 3 :
 
  366                     (!
td->left_intra_ctx[row7] && 
td->left_ref_ctx[row7] == 
s->s.h.fixcompref);
 
  371                 td->counts.comp[
c][
b->comp]++;
 
  378                 int fix_idx = 
s->s.h.signbias[
s->s.h.fixcompref], var_idx = !fix_idx, 
c, 
bit;
 
  380                 b->ref[fix_idx] = 
s->s.h.fixcompref;
 
  384                         if (
s->above_intra_ctx[col]) {
 
  385                             if (
td->left_intra_ctx[row7]) {
 
  388                                 c = 1 + 2 * (
td->left_ref_ctx[row7] != 
s->s.h.varcompref[1]);
 
  390                         } 
else if (
td->left_intra_ctx[row7]) {
 
  391                             c = 1 + 2 * (
s->above_ref_ctx[col] != 
s->s.h.varcompref[1]);
 
  393                             int refl = 
td->left_ref_ctx[row7], refa = 
s->above_ref_ctx[col];
 
  395                             if (refl == refa && refa == 
s->s.h.varcompref[1]) {
 
  397                             } 
else if (!
td->left_comp_ctx[row7] && !
s->above_comp_ctx[col]) {
 
  398                                 if ((refa == 
s->s.h.fixcompref && refl == 
s->s.h.varcompref[0]) ||
 
  399                                     (refl == 
s->s.h.fixcompref && refa == 
s->s.h.varcompref[0])) {
 
  402                                     c = (refa == refl) ? 3 : 1;
 
  404                             } 
else if (!
td->left_comp_ctx[row7]) {
 
  405                                 if (refa == 
s->s.h.varcompref[1] && refl != 
s->s.h.varcompref[1]) {
 
  408                                     c = (refl == 
s->s.h.varcompref[1] &&
 
  409                                          refa != 
s->s.h.varcompref[1]) ? 2 : 4;
 
  411                             } 
else if (!
s->above_comp_ctx[col]) {
 
  412                                 if (refl == 
s->s.h.varcompref[1] && refa != 
s->s.h.varcompref[1]) {
 
  415                                     c = (refa == 
s->s.h.varcompref[1] &&
 
  416                                          refl != 
s->s.h.varcompref[1]) ? 2 : 4;
 
  419                                 c = (refl == refa) ? 4 : 2;
 
  423                         if (
s->above_intra_ctx[col]) {
 
  425                         } 
else if (
s->above_comp_ctx[col]) {
 
  426                             c = 4 * (
s->above_ref_ctx[col] != 
s->s.h.varcompref[1]);
 
  428                             c = 3 * (
s->above_ref_ctx[col] != 
s->s.h.varcompref[1]);
 
  432                     if (
td->left_intra_ctx[row7]) {
 
  434                     } 
else if (
td->left_comp_ctx[row7]) {
 
  435                         c = 4 * (
td->left_ref_ctx[row7] != 
s->s.h.varcompref[1]);
 
  437                         c = 3 * (
td->left_ref_ctx[row7] != 
s->s.h.varcompref[1]);
 
  443                 b->ref[var_idx] = 
s->s.h.varcompref[
bit];
 
  444                 td->counts.comp_ref[
c][
bit]++;
 
  448                 if (have_a && !
s->above_intra_ctx[col]) {
 
  449                     if (have_l && !
td->left_intra_ctx[row7]) {
 
  450                         if (
td->left_comp_ctx[row7]) {
 
  451                             if (
s->above_comp_ctx[col]) {
 
  452                                 c = 1 + (!
s->s.h.fixcompref || !
td->left_ref_ctx[row7] ||
 
  453                                          !
s->above_ref_ctx[col]);
 
  455                                 c = (3 * !
s->above_ref_ctx[col]) +
 
  456                                     (!
s->s.h.fixcompref || !
td->left_ref_ctx[row7]);
 
  458                         } 
else if (
s->above_comp_ctx[col]) {
 
  459                             c = (3 * !
td->left_ref_ctx[row7]) +
 
  460                                 (!
s->s.h.fixcompref || !
s->above_ref_ctx[col]);
 
  462                             c = 2 * !
td->left_ref_ctx[row7] + 2 * !
s->above_ref_ctx[col];
 
  464                     } 
else if (
s->above_intra_ctx[col]) {
 
  466                     } 
else if (
s->above_comp_ctx[col]) {
 
  467                         c = 1 + (!
s->s.h.fixcompref || !
s->above_ref_ctx[col]);
 
  469                         c = 4 * (!
s->above_ref_ctx[col]);
 
  471                 } 
else if (have_l && !
td->left_intra_ctx[row7]) {
 
  472                     if (
td->left_intra_ctx[row7]) {
 
  474                     } 
else if (
td->left_comp_ctx[row7]) {
 
  475                         c = 1 + (!
s->s.h.fixcompref || !
td->left_ref_ctx[row7]);
 
  477                         c = 4 * (!
td->left_ref_ctx[row7]);
 
  483                 td->counts.single_ref[
c][0][
bit]++;
 
  490                             if (
td->left_intra_ctx[row7]) {
 
  491                                 if (
s->above_intra_ctx[col]) {
 
  493                                 } 
else if (
s->above_comp_ctx[col]) {
 
  494                                     c = 1 + 2 * (
s->s.h.fixcompref == 1 ||
 
  495                                                  s->above_ref_ctx[col] == 1);
 
  496                                 } 
else if (!
s->above_ref_ctx[col]) {
 
  499                                     c = 4 * (
s->above_ref_ctx[col] == 1);
 
  501                             } 
else if (
s->above_intra_ctx[col]) {
 
  502                                 if (
td->left_intra_ctx[row7]) {
 
  504                                 } 
else if (
td->left_comp_ctx[row7]) {
 
  505                                     c = 1 + 2 * (
s->s.h.fixcompref == 1 ||
 
  506                                                  td->left_ref_ctx[row7] == 1);
 
  507                                 } 
else if (!
td->left_ref_ctx[row7]) {
 
  510                                     c = 4 * (
td->left_ref_ctx[row7] == 1);
 
  512                             } 
else if (
s->above_comp_ctx[col]) {
 
  513                                 if (
td->left_comp_ctx[row7]) {
 
  514                                     if (
td->left_ref_ctx[row7] == 
s->above_ref_ctx[col]) {
 
  515                                         c = 3 * (
s->s.h.fixcompref == 1 ||
 
  516                                                  td->left_ref_ctx[row7] == 1);
 
  520                                 } 
else if (!
td->left_ref_ctx[row7]) {
 
  521                                     c = 1 + 2 * (
s->s.h.fixcompref == 1 ||
 
  522                                                  s->above_ref_ctx[col] == 1);
 
  524                                     c = 3 * (
td->left_ref_ctx[row7] == 1) +
 
  525                                     (
s->s.h.fixcompref == 1 || 
s->above_ref_ctx[col] == 1);
 
  527                             } 
else if (
td->left_comp_ctx[row7]) {
 
  528                                 if (!
s->above_ref_ctx[col]) {
 
  529                                     c = 1 + 2 * (
s->s.h.fixcompref == 1 ||
 
  530                                                  td->left_ref_ctx[row7] == 1);
 
  532                                     c = 3 * (
s->above_ref_ctx[col] == 1) +
 
  533                                     (
s->s.h.fixcompref == 1 || 
td->left_ref_ctx[row7] == 1);
 
  535                             } 
else if (!
s->above_ref_ctx[col]) {
 
  536                                 if (!
td->left_ref_ctx[row7]) {
 
  539                                     c = 4 * (
td->left_ref_ctx[row7] == 1);
 
  541                             } 
else if (!
td->left_ref_ctx[row7]) {
 
  542                                 c = 4 * (
s->above_ref_ctx[col] == 1);
 
  544                                 c = 2 * (
td->left_ref_ctx[row7] == 1) +
 
  545                                     2 * (
s->above_ref_ctx[col] == 1);
 
  548                             if (
s->above_intra_ctx[col] ||
 
  549                                 (!
s->above_comp_ctx[col] && !
s->above_ref_ctx[col])) {
 
  551                             } 
else if (
s->above_comp_ctx[col]) {
 
  552                                 c = 3 * (
s->s.h.fixcompref == 1 || 
s->above_ref_ctx[col] == 1);
 
  554                                 c = 4 * (
s->above_ref_ctx[col] == 1);
 
  558                         if (
td->left_intra_ctx[row7] ||
 
  559                             (!
td->left_comp_ctx[row7] && !
td->left_ref_ctx[row7])) {
 
  561                         } 
else if (
td->left_comp_ctx[row7]) {
 
  562                             c = 3 * (
s->s.h.fixcompref == 1 || 
td->left_ref_ctx[row7] == 1);
 
  564                             c = 4 * (
td->left_ref_ctx[row7] == 1);
 
  570                     td->counts.single_ref[
c][1][
bit]++;
 
  577             if (
s->s.h.segmentation.enabled && 
s->s.h.segmentation.feat[
b->seg_id].skip_enabled) {
 
  583                 static const uint8_t off[10] = {
 
  584                     3, 0, 0, 1, 0, 0, 0, 0, 0, 0
 
  589                 int c = inter_mode_ctx_lut[
s->above_mode_ctx[col + off[
b->bs]]]
 
  590                                           [
td->left_mode_ctx[row7 + off[
b->bs]]];
 
  593                                               s->prob.p.mv_mode[
c]);
 
  596                 b->mode[3] = 
b->mode[0];
 
  597                 td->counts.mv_mode[
c][
b->mode[0] - 10]++;
 
  604             if (have_a && 
s->above_mode_ctx[col] >= 
NEARESTMV) {
 
  605                 if (have_l && 
td->left_mode_ctx[row7] >= 
NEARESTMV) {
 
  606                     c = 
s->above_filter_ctx[col] == 
td->left_filter_ctx[row7] ?
 
  607                         td->left_filter_ctx[row7] : 3;
 
  609                     c = 
s->above_filter_ctx[col];
 
  611             } 
else if (have_l && 
td->left_mode_ctx[row7] >= 
NEARESTMV) {
 
  612                 c = 
td->left_filter_ctx[row7];
 
  618                                          s->prob.p.filter[
c]);
 
  619             td->counts.filter[
c][filter_id]++;
 
  622             b->filter = 
s->s.h.filtermode;
 
  626             int c = inter_mode_ctx_lut[
s->above_mode_ctx[col]][
td->left_mode_ctx[row7]];
 
  629                                           s->prob.p.mv_mode[
c]);
 
  630             td->counts.mv_mode[
c][
b->mode[0] - 10]++;
 
  635                                               s->prob.p.mv_mode[
c]);
 
  636                 td->counts.mv_mode[
c][
b->mode[1] - 10]++;
 
  639                 b->mode[1] = 
b->mode[0];
 
  646                                               s->prob.p.mv_mode[
c]);
 
  647                 td->counts.mv_mode[
c][
b->mode[2] - 10]++;
 
  652                                                   s->prob.p.mv_mode[
c]);
 
  653                     td->counts.mv_mode[
c][
b->mode[3] - 10]++;
 
  656                     b->mode[3] = 
b->mode[2];
 
  661                 b->mode[2] = 
b->mode[0];
 
  664                 b->mode[3] = 
b->mode[1];
 
  678         vref = 
b->ref[
b->comp ? 
s->s.h.signbias[
s->s.h.varcompref[0]] : 0];
 
  682 #define SPLAT_CTX(var, val, n) \ 
  684     case 1:  var = val;                                    break; \ 
  685     case 2:  AV_WN16A(&var, val *             0x0101);     break; \ 
  686     case 4:  AV_WN32A(&var, val *         0x01010101);     break; \ 
  687     case 8:  AV_WN64A(&var, val * 0x0101010101010101ULL);  break; \ 
  689         uint64_t v64 = val * 0x0101010101010101ULL; \ 
  690         AV_WN64A(              &var,     v64); \ 
  691         AV_WN64A(&((uint8_t *) &var)[8], v64); \ 
  696 #define SPLAT_CTX(var, val, n) \ 
  698     case 1:  var = val;                         break; \ 
  699     case 2:  AV_WN16A(&var, val *     0x0101);  break; \ 
  700     case 4:  AV_WN32A(&var, val * 0x01010101);  break; \ 
  702         uint32_t v32 = val * 0x01010101; \ 
  703         AV_WN32A(              &var,     v32); \ 
  704         AV_WN32A(&((uint8_t *) &var)[4], v32); \ 
  708         uint32_t v32 = val * 0x01010101; \ 
  709         AV_WN32A(              &var,      v32); \ 
  710         AV_WN32A(&((uint8_t *) &var)[4],  v32); \ 
  711         AV_WN32A(&((uint8_t *) &var)[8],  v32); \ 
  712         AV_WN32A(&((uint8_t *) &var)[12], v32); \ 
  719 #define SET_CTXS(perf, dir, off, n) \ 
  721         SPLAT_CTX(perf->dir##_skip_ctx[off],      b->skip,          n); \ 
  722         SPLAT_CTX(perf->dir##_txfm_ctx[off],      b->tx,            n); \ 
  723         SPLAT_CTX(perf->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \ 
  724         if (!s->s.h.keyframe && !s->s.h.intraonly) { \ 
  725             SPLAT_CTX(perf->dir##_intra_ctx[off], b->intra,   n); \ 
  726             SPLAT_CTX(perf->dir##_comp_ctx[off],  b->comp,    n); \ 
  727             SPLAT_CTX(perf->dir##_mode_ctx[off],  b->mode[3], n); \ 
  729                 SPLAT_CTX(perf->dir##_ref_ctx[off], vref, n); \ 
  730                 if (s->s.h.filtermode == FILTER_SWITCHABLE) { \ 
  731                     SPLAT_CTX(perf->dir##_filter_ctx[off], filter_id, n); \ 
  736     case 1: 
SET_CTXS(
s, above, col, 1); 
break;
 
  737     case 2: 
SET_CTXS(
s, above, col, 2); 
break;
 
  738     case 4: 
SET_CTXS(
s, above, col, 4); 
break;
 
  739     case 8: 
SET_CTXS(
s, above, col, 8); 
break;
 
  750     if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
 
  754             AV_COPY32(&
td->left_mv_ctx[row7 * 2 + 0][0], &
b->mv[1][0]);
 
  755             AV_COPY32(&
td->left_mv_ctx[row7 * 2 + 0][1], &
b->mv[1][1]);
 
  756             AV_WN32A(&
td->left_mv_ctx[row7 * 2 + 1][0], mv0);
 
  757             AV_WN32A(&
td->left_mv_ctx[row7 * 2 + 1][1], mv1);
 
  758             AV_COPY32(&
s->above_mv_ctx[col * 2 + 0][0], &
b->mv[2][0]);
 
  759             AV_COPY32(&
s->above_mv_ctx[col * 2 + 0][1], &
b->mv[2][1]);
 
  760             AV_WN32A(&
s->above_mv_ctx[col * 2 + 1][0], mv0);
 
  761             AV_WN32A(&
s->above_mv_ctx[col * 2 + 1][1], mv1);
 
  765             for (n = 0; n < w4 * 2; n++) {
 
  766                 AV_WN32A(&
s->above_mv_ctx[col * 2 + n][0], mv0);
 
  767                 AV_WN32A(&
s->above_mv_ctx[col * 2 + n][1], mv1);
 
  769             for (n = 0; n < h4 * 2; n++) {
 
  770                 AV_WN32A(&
td->left_mv_ctx[row7 * 2 + n][0], mv0);
 
  771                 AV_WN32A(&
td->left_mv_ctx[row7 * 2 + n][1], mv1);
 
  777     for (y = 0; y < h4; y++) {
 
  778         int x, o = (row + y) * 
s->sb_cols * 8 + col;
 
  782             for (x = 0; x < w4; x++) {
 
  786         } 
else if (
b->comp) {
 
  787             for (x = 0; x < w4; x++) {
 
  788                 mv[x].ref[0] = 
b->ref[0];
 
  789                 mv[x].ref[1] = 
b->ref[1];
 
  794             for (x = 0; x < w4; x++) {
 
  795                 mv[x].ref[0] = 
b->ref[0];
 
  806                         int is_tx32x32, 
int is8bitsperpixel, 
int bpp, 
unsigned (*cnt)[6][3],
 
  807                         unsigned (*eob)[6][2], 
uint8_t (*p)[6][11],
 
  808                         int nnz, 
const int16_t *scan, 
const int16_t (*nb)[2],
 
  809                         const int16_t *band_counts, int16_t *qmul)
 
  811     int i = 0, band = 0, band_left = band_counts[band];
 
  819         eob[band][nnz][
val]++;
 
  827                 band_left = band_counts[++band];
 
  829             nnz            = (1 + cache[nb[
i][0]] + cache[nb[
i][1]]) >> 1;
 
  879                     if (!is8bitsperpixel) {
 
  904 #define STORE_COEF(c, i, v) do { \ 
  905     if (is8bitsperpixel) { \ 
  908         AV_WN32A(&c[i * 2], v); \ 
  912             band_left = band_counts[++band];
 
  917         nnz = (1 + cache[nb[
i][0]] + cache[nb[
i][1]]) >> 1;
 
  919     } 
while (++
i < n_coeffs);
 
  925                                 unsigned (*cnt)[6][3], 
unsigned (*eob)[6][2],
 
  926                                 uint8_t (*p)[6][11], 
int nnz, 
const int16_t *scan,
 
  927                                 const int16_t (*nb)[2], 
const int16_t *band_counts,
 
  931                                    nnz, scan, nb, band_counts, qmul);
 
  935                                   unsigned (*cnt)[6][3], 
unsigned (*eob)[6][2],
 
  936                                   uint8_t (*p)[6][11], 
int nnz, 
const int16_t *scan,
 
  937                                   const int16_t (*nb)[2], 
const int16_t *band_counts,
 
  941                                    nnz, scan, nb, band_counts, qmul);
 
  945                                  unsigned (*cnt)[6][3], 
unsigned (*eob)[6][2],
 
  946                                  uint8_t (*p)[6][11], 
int nnz, 
const int16_t *scan,
 
  947                                  const int16_t (*nb)[2], 
const int16_t *band_counts,
 
  951                                    nnz, scan, nb, band_counts, qmul);
 
  955                                    unsigned (*cnt)[6][3], 
unsigned (*eob)[6][2],
 
  956                                    uint8_t (*p)[6][11], 
int nnz, 
const int16_t *scan,
 
  957                                    const int16_t (*nb)[2], 
const int16_t *band_counts,
 
  961                                    nnz, scan, nb, band_counts, qmul);
 
  968     int row = 
td->row, col = 
td->col;
 
  969     uint8_t (*p)[6][11] = 
s->prob.coef[
b->tx][0 ][!
b->intra];
 
  970     unsigned (*
c)[6][3] = 
td->counts.coef[
b->tx][0 ][!
b->intra];
 
  971     unsigned (*e)[6][2] = 
td->counts.eob[
b->tx][0 ][!
b->intra];
 
  973     int end_x = 
FFMIN(2 * (
s->cols - col), w4);
 
  974     int end_y = 
FFMIN(2 * (
s->rows - row), h4);
 
  975     int n, pl, x, y, 
ret;
 
  976     int16_t (*qmul)[2] = 
s->s.h.segmentation.feat[
b->seg_id].qmul;
 
  977     int tx = 4 * 
s->s.h.lossless + 
b->tx;
 
  982     uint8_t *
a = &
s->above_y_nnz_ctx[col * 2];
 
  983     uint8_t *l = &
td->left_y_nnz_ctx[(row & 7) << 1];
 
  984     static const int16_t band_counts[4][8] = {
 
  985         { 1, 2, 3, 4,  3,   16 - 13 },
 
  986         { 1, 2, 3, 4, 11,   64 - 21 },
 
  987         { 1, 2, 3, 4, 11,  256 - 21 },
 
  988         { 1, 2, 3, 4, 11, 1024 - 21 },
 
  990     const int16_t *y_band_counts = band_counts[
b->tx];
 
  991     const int16_t *uv_band_counts = band_counts[
b->uvtx];
 
  992     int bytesperpixel = is8bitsperpixel ? 1 : 2;
 
  995 #define MERGE(la, end, step, rd) \ 
  996     for (n = 0; n < end; n += step) \ 
  998 #define MERGE_CTX(step, rd) \ 
 1000         MERGE(l, end_y, step, rd); \ 
 1001         MERGE(a, end_x, step, rd); \ 
 1004 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \ 
 1005     for (n = 0, y = 0; y < end_y; y += step) { \ 
 1006         for (x = 0; x < end_x; x += step, n += step * step) { \ 
 1007             enum TxfmType txtp = ff_vp9_intra_txfm_type[b->mode[mode_index]]; \ 
 1008             ret = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \ 
 1009                                     (td, td->block + 16 * n * bytesperpixel, 16 * step * step, \ 
 1010                                      c, e, p, a[x] + l[y], yscans[txtp], \ 
 1011                                      ynbs[txtp], y_band_counts, qmul[0]); \ 
 1012             a[x] = l[y] = !!ret; \ 
 1013             total_coeff |= !!ret; \ 
 1015                 AV_WN16A(&td->eob[n], ret); \ 
 1022 #define SPLAT(la, end, step, cond) \ 
 1024         for (n = 1; n < end; n += step) \ 
 1025             la[n] = la[n - 1]; \ 
 1026     } else if (step == 4) { \ 
 1028             for (n = 0; n < end; n += step) \ 
 1029                 AV_WN32A(&la[n], la[n] * 0x01010101); \ 
 1031             for (n = 0; n < end; n += step) \ 
 1032                 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \ 
 1036             if (HAVE_FAST_64BIT) { \ 
 1037                 for (n = 0; n < end; n += step) \ 
 1038                     AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \ 
 1040                 for (n = 0; n < end; n += step) { \ 
 1041                     uint32_t v32 = la[n] * 0x01010101; \ 
 1042                     AV_WN32A(&la[n],     v32); \ 
 1043                     AV_WN32A(&la[n + 4], v32); \ 
 1047             for (n = 0; n < end; n += step) \ 
 1048                 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \ 
 1051 #define SPLAT_CTX(step) \ 
 1053         SPLAT(a, end_x, step, end_x == w4); \ 
 1054         SPLAT(l, end_y, step, end_y == h4); \ 
 1079 #define DECODE_UV_COEF_LOOP(step, v) \ 
 1080     for (n = 0, y = 0; y < end_y; y += step) { \ 
 1081         for (x = 0; x < end_x; x += step, n += step * step) { \ 
 1082             ret = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \ 
 1083                                     (td, td->uvblock[pl] + 16 * n * bytesperpixel, \ 
 1084                                      16 * step * step, c, e, p, a[x] + l[y], \ 
 1085                                      uvscan, uvnb, uv_band_counts, qmul[1]); \ 
 1086             a[x] = l[y] = !!ret; \ 
 1087             total_coeff |= !!ret; \ 
 1089                 AV_WN16A(&td->uveob[pl][n], ret); \ 
 1091                 td->uveob[pl][n] = ret; \ 
 1096     p = 
s->prob.coef[
b->uvtx][1 ][!
b->intra];
 
 1097     c = 
td->counts.coef[
b->uvtx][1 ][!
b->intra];
 
 1098     e = 
td->counts.eob[
b->uvtx][1 ][!
b->intra];
 
 1103     for (pl = 0; pl < 2; pl++) {
 
 1104         a = &
s->above_uv_nnz_ctx[pl][col << !
s->ss_h];
 
 1105         l = &
td->left_uv_nnz_ctx[pl][(row & 7) << !
s->ss_v];
 
 1142                                         int row_and_7, 
int col_and_7,
 
 1143                                         int w, 
int h, 
int col_end, 
int row_end,
 
 1146     static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
 
 1147     static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
 
 1159     if (tx == 
TX_4X4 && (ss_v | ss_h)) {
 
 1174     if (tx == 
TX_4X4 && !skip_inter) {
 
 1175         int t = 1 << col_and_7, m_col = (t << 
w) - t, y;
 
 1177         int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
 
 1179         for (y = row_and_7; y < 
h + row_and_7; y++) {
 
 1180             int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
 
 1182             mask[0][y][1] |= m_row_8;
 
 1183             mask[0][y][2] |= m_row_4;
 
 1194             if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
 
 1195                 mask[1][y][col_mask_id] |= (t << (
w - 1)) - t;
 
 1197                 mask[1][y][col_mask_id] |= m_col;
 
 1200                 mask[0][y][3] |= m_col;
 
 1202                 if (ss_h && (col_end & 1))
 
 1203                     mask[1][y][3] |= (t << (
w - 1)) - t;
 
 1205                     mask[1][y][3] |= m_col;
 
 1209         int y, t = 1 << col_and_7, m_col = (t << 
w) - t;
 
 1212             int mask_id = (tx == 
TX_8X8);
 
 1213             int l2 = tx + ss_h - 1, step1d;
 
 1214             static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
 
 1215             int m_row = m_col & masks[l2];
 
 1219             if (ss_h && tx > 
TX_8X8 && (
w ^ (
w - 1)) == 1) {
 
 1220                 int m_row_16 = ((t << (
w - 1)) - t) & masks[l2];
 
 1221                 int m_row_8 = m_row - m_row_16;
 
 1223                 for (y = row_and_7; y < 
h + row_and_7; y++) {
 
 1224                     mask[0][y][0] |= m_row_16;
 
 1225                     mask[0][y][1] |= m_row_8;
 
 1228                 for (y = row_and_7; y < 
h + row_and_7; y++)
 
 1229                     mask[0][y][mask_id] |= m_row;
 
 1234             if (ss_v && tx > 
TX_8X8 && (
h ^ (
h - 1)) == 1) {
 
 1235                 for (y = row_and_7; y < 
h + row_and_7 - 1; y += step1d)
 
 1236                     mask[1][y][0] |= m_col;
 
 1237                 if (y - row_and_7 == 
h - 1)
 
 1238                     mask[1][y][1] |= m_col;
 
 1240                 for (y = row_and_7; y < 
h + row_and_7; y += step1d)
 
 1241                     mask[1][y][mask_id] |= m_col;
 
 1243         } 
else if (tx != 
TX_4X4) {
 
 1246             mask_id = (tx == 
TX_8X8) || (
h == ss_v);
 
 1247             mask[1][row_and_7][mask_id] |= m_col;
 
 1248             mask_id = (tx == 
TX_8X8) || (
w == ss_h);
 
 1249             for (y = row_and_7; y < 
h + row_and_7; y++)
 
 1250                 mask[0][y][mask_id] |= t;
 
 1252             int t8 = t & wide_filter_col_mask[ss_h], 
t4 = t - 
t8;
 
 1254             for (y = row_and_7; y < 
h + row_and_7; y++) {
 
 1258             mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
 
 1264                          VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
 
 1270     int bytesperpixel = 
s->bytesperpixel;
 
 1280     td->min_mv.x = -(128 + col * 64);
 
 1281     td->min_mv.y = -(128 + row * 64);
 
 1282     td->max_mv.x = 128 + (
s->cols - col - w4) * 64;
 
 1283     td->max_mv.y = 128 + (
s->rows - row - h4) * 64;
 
 1290         b->uvtx = 
b->tx - ((
s->ss_h && w4 * 2 == (1 << 
b->tx)) ||
 
 1291                            (
s->ss_v && h4 * 2 == (1 << 
b->tx)));
 
 1293         if (
td->block_structure) {
 
 1294             td->block_structure[
td->nb_block_structure].row = row;
 
 1295             td->block_structure[
td->nb_block_structure].col = col;
 
 1296             td->block_structure[
td->nb_block_structure].block_size_idx_x = 
av_log2(w4);
 
 1297             td->block_structure[
td->nb_block_structure].block_size_idx_y = 
av_log2(h4);
 
 1298             td->nb_block_structure++;
 
 1304             if (bytesperpixel == 1) {
 
 1309             if (!has_coeffs && 
b->bs <= 
BS_8x8 && !
b->intra) {
 
 1311                 memset(&
s->above_skip_ctx[col], 1, w4);
 
 1312                 memset(&
td->left_skip_ctx[
td->row7], 1, h4);
 
 1315             int row7 = 
td->row7;
 
 1317 #define SPLAT_ZERO_CTX(v, n) \ 
 1319     case 1:  v = 0;          break; \ 
 1320     case 2:  AV_ZERO16(&v);  break; \ 
 1321     case 4:  AV_ZERO32(&v);  break; \ 
 1322     case 8:  AV_ZERO64(&v);  break; \ 
 1323     case 16: AV_ZERO128(&v); break; \ 
 1325 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \ 
 1327         SPLAT_ZERO_CTX(dir##_y_##var[off * 2], n * 2); \ 
 1328         if (s->ss_##dir2) { \ 
 1329             SPLAT_ZERO_CTX(dir##_uv_##var[0][off], n); \ 
 1330             SPLAT_ZERO_CTX(dir##_uv_##var[1][off], n); \ 
 1332             SPLAT_ZERO_CTX(dir##_uv_##var[0][off * 2], n * 2); \ 
 1333             SPLAT_ZERO_CTX(dir##_uv_##var[1][off * 2], n * 2); \ 
 1353             s->td[0].block += w4 * h4 * 64 * bytesperpixel;
 
 1354             s->td[0].uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (
s->ss_h + 
s->ss_v);
 
 1355             s->td[0].uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (
s->ss_h + 
s->ss_v);
 
 1356             s->td[0].eob += 4 * w4 * h4;
 
 1357             s->td[0].uveob[0] += 4 * w4 * h4 >> (
s->ss_h + 
s->ss_v);
 
 1358             s->td[0].uveob[1] += 4 * w4 * h4 >> (
s->ss_h + 
s->ss_v);
 
 1367     emu[0] = (col + w4) * 8 * bytesperpixel > 
f->linesize[0] ||
 
 1368              (row + h4) > 
s->rows;
 
 1369     emu[1] = ((col + w4) * 8 >> 
s->ss_h) * bytesperpixel > 
f->linesize[1] ||
 
 1370              (row + h4) > 
s->rows;
 
 1372         td->dst[0] = 
td->tmp_y;
 
 1375         td->dst[0] = 
f->data[0] + yoff;
 
 1376         td->y_stride = 
f->linesize[0];
 
 1379         td->dst[1] = 
td->tmp_uv[0];
 
 1380         td->dst[2] = 
td->tmp_uv[1];
 
 1381         td->uv_stride = 128;
 
 1383         td->dst[1] = 
f->data[1] + uvoff;
 
 1384         td->dst[2] = 
f->data[2] + uvoff;
 
 1385         td->uv_stride = 
f->linesize[1];
 
 1388         if (
s->s.h.bpp > 8) {
 
 1394         if (
s->s.h.bpp > 8) {
 
 1401         int w = 
FFMIN(
s->cols - col, w4) * 8, 
h = 
FFMIN(
s->rows - row, h4) * 8, n, o = 0;
 
 1403         for (n = 0; o < 
w; n++) {
 
 1408                 s->dsp.mc[n][0][0][0][0](
f->data[0] + yoff + o * bytesperpixel, 
f->linesize[0],
 
 1409                                          td->tmp_y + o * bytesperpixel, 128, 
h, 0, 0);
 
 1415         int w = 
FFMIN(
s->cols - col, w4) * 8 >> 
s->ss_h;
 
 1416         int h = 
FFMIN(
s->rows - row, h4) * 8 >> 
s->ss_v, n, o = 0;
 
 1418         for (n = 
s->ss_h; o < 
w; n++) {
 
 1423                 s->dsp.mc[n][0][0][0][0](
f->data[1] + uvoff + o * bytesperpixel, 
f->linesize[1],
 
 1424                                          td->tmp_uv[0] + o * bytesperpixel, 128, 
h, 0, 0);
 
 1425                 s->dsp.mc[n][0][0][0][0](
f->data[2] + uvoff + o * bytesperpixel, 
f->linesize[2],
 
 1426                                          td->tmp_uv[1] + o * bytesperpixel, 128, 
h, 0, 0);
 
 1433     if (
s->s.h.filter.level &&
 
 1434         (lvl = 
s->s.h.segmentation.feat[
b->seg_id].lflvl[
b->intra ? 0 : 
b->ref[0] + 1]
 
 1435                                                       [
b->mode[3] != 
ZEROMV]) > 0) {
 
 1436         int x_end = 
FFMIN(
s->cols - col, w4), y_end = 
FFMIN(
s->rows - row, h4);
 
 1437         int skip_inter = !
b->intra && 
b->skip, col7 = 
td->col7, row7 = 
td->row7;
 
 1440         mask_edges(lflvl->
mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, 
b->tx, skip_inter);
 
 1441         if (
s->ss_h || 
s->ss_v)
 
 1443                        s->cols & 1 && col + w4 >= 
s->cols ? 
s->cols & 7 : 0,
 
 1444                        s->rows & 1 && row + h4 >= 
s->rows ? 
s->rows & 7 : 0,
 
 1445                        b->uvtx, skip_inter);
 
 1450         s->td[0].block += w4 * h4 * 64 * bytesperpixel;
 
 1451         s->td[0].uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (
s->ss_v + 
s->ss_h);
 
 1452         s->td[0].uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (
s->ss_v + 
s->ss_h);
 
 1453         s->td[0].eob += 4 * w4 * h4;
 
 1454         s->td[0].uveob[0] += 4 * w4 * h4 >> (
s->ss_v + 
s->ss_h);
 
 1455         s->td[0].uveob[1] += 4 * w4 * h4 >> (
s->ss_v + 
s->ss_h);