24 #include "config_components.h"
48 #define VP9_SYNCCODE 0x498342
67 for (
i = 0;
i < n;
i++)
105 f->segmentation_map =
NULL;
117 sz = 64 *
s->sb_cols *
s->sb_rows;
118 if (sz !=
s->frame_extradata_pool_size) {
122 if (!
s->frame_extradata_pool) {
123 s->frame_extradata_pool_size = 0;
127 s->frame_extradata_pool_size = sz;
135 f->segmentation_map =
f->extradata;
164 src->hwaccel_picture_private);
171 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
172 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
173 CONFIG_VP9_D3D12VA_HWACCEL + \
174 CONFIG_VP9_NVDEC_HWACCEL + \
175 CONFIG_VP9_VAAPI_HWACCEL + \
176 CONFIG_VP9_VDPAU_HWACCEL + \
177 CONFIG_VP9_VIDEOTOOLBOX_HWACCEL)
181 int bytesperpixel =
s->bytesperpixel,
ret, cols, rows;
186 if (!(
s->pix_fmt ==
s->gf_fmt &&
w ==
s->w &&
h ==
s->h)) {
190 switch (
s->pix_fmt) {
193 #if CONFIG_VP9_DXVA2_HWACCEL
196 #if CONFIG_VP9_D3D11VA_HWACCEL
200 #if CONFIG_VP9_D3D12VA_HWACCEL
203 #if CONFIG_VP9_NVDEC_HWACCEL
206 #if CONFIG_VP9_VAAPI_HWACCEL
209 #if CONFIG_VP9_VDPAU_HWACCEL
212 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
217 #if CONFIG_VP9_NVDEC_HWACCEL
220 #if CONFIG_VP9_VAAPI_HWACCEL
223 #if CONFIG_VP9_VDPAU_HWACCEL
230 #if CONFIG_VP9_VAAPI_HWACCEL
237 #if CONFIG_VP9_VAAPI_HWACCEL
243 *fmtp++ =
s->pix_fmt;
251 s->gf_fmt =
s->pix_fmt;
259 if (
s->intra_pred_data[0] && cols ==
s->cols && rows ==
s->rows &&
s->pix_fmt ==
s->last_fmt)
262 s->last_fmt =
s->pix_fmt;
263 s->sb_cols = (
w + 63) >> 6;
264 s->sb_rows = (
h + 63) >> 6;
265 s->cols = (
w + 7) >> 3;
266 s->rows = (
h + 7) >> 3;
269 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
273 p =
av_malloc(
s->sb_cols * (128 + 192 * bytesperpixel +
274 lflvl_len *
sizeof(*
s->lflvl) + 16 *
sizeof(*
s->above_mv_ctx)));
277 assign(
s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
278 assign(
s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
279 assign(
s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
280 assign(
s->above_y_nnz_ctx, uint8_t *, 16);
281 assign(
s->above_mode_ctx, uint8_t *, 16);
283 assign(
s->above_uv_nnz_ctx[0], uint8_t *, 16);
284 assign(
s->above_uv_nnz_ctx[1], uint8_t *, 16);
285 assign(
s->above_partition_ctx, uint8_t *, 8);
286 assign(
s->above_skip_ctx, uint8_t *, 8);
287 assign(
s->above_txfm_ctx, uint8_t *, 8);
288 assign(
s->above_segpred_ctx, uint8_t *, 8);
289 assign(
s->above_intra_ctx, uint8_t *, 8);
290 assign(
s->above_comp_ctx, uint8_t *, 8);
291 assign(
s->above_ref_ctx, uint8_t *, 8);
292 assign(
s->above_filter_ctx, uint8_t *, 8);
297 for (
i = 0;
i <
s->active_tile_cols;
i++)
301 if (
s->s.h.bpp !=
s->last_bpp) {
304 s->last_bpp =
s->s.h.bpp;
314 int chroma_blocks, chroma_eobs, bytesperpixel =
s->bytesperpixel;
317 if (
td->b_base &&
td->block_base &&
s->block_alloc_using_2pass ==
s->s.frames[
CUR_FRAME].uses_2pass)
321 chroma_blocks = 64 * 64 >> (
s->ss_h +
s->ss_v);
322 chroma_eobs = 16 * 16 >> (
s->ss_h +
s->ss_v);
324 int sbs =
s->sb_cols *
s->sb_rows;
327 td->block_base =
av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
328 16 * 16 + 2 * chroma_eobs) * sbs);
329 if (!
td->b_base || !
td->block_base)
331 td->uvblock_base[0] =
td->block_base + sbs * 64 * 64 * bytesperpixel;
332 td->uvblock_base[1] =
td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
333 td->eob_base = (uint8_t *) (
td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
334 td->uveob_base[0] =
td->eob_base + 16 * 16 * sbs;
335 td->uveob_base[1] =
td->uveob_base[0] + chroma_eobs * sbs;
339 if (!
td->block_structure)
343 for (
i = 1;
i <
s->active_tile_cols;
i++)
346 for (
i = 0;
i <
s->active_tile_cols;
i++) {
348 s->td[
i].block_base =
av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
349 16 * 16 + 2 * chroma_eobs);
350 if (!
s->td[
i].b_base || !
s->td[
i].block_base)
352 s->td[
i].uvblock_base[0] =
s->td[
i].block_base + 64 * 64 * bytesperpixel;
353 s->td[
i].uvblock_base[1] =
s->td[
i].uvblock_base[0] + chroma_blocks * bytesperpixel;
354 s->td[
i].eob_base = (uint8_t *) (
s->td[
i].uvblock_base[1] + chroma_blocks * bytesperpixel);
355 s->td[
i].uveob_base[0] =
s->td[
i].eob_base + 16 * 16;
356 s->td[
i].uveob_base[1] =
s->td[
i].uveob_base[0] + chroma_eobs;
360 if (!
s->td[
i].block_structure)
365 s->block_alloc_using_2pass =
s->s.frames[
CUR_FRAME].uses_2pass;
382 return m - ((v + 1) >> 1);
389 static const uint8_t inv_map_table[255] = {
390 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
391 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
392 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
393 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
394 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
395 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
396 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
397 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
398 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
399 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
400 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
401 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
402 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
403 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
404 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
405 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
406 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
407 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
455 s->s.h.bpp = 8 +
bits * 2;
456 s->bytesperpixel = (7 +
s->s.h.bpp) >> 3;
462 s->ss_h =
s->ss_v = 0;
476 static const enum AVPixelFormat pix_fmt_for_ss[3][2 ][2 ] = {
488 s->pix_fmt = pix_fmt_for_ss[
bits][
s->ss_v][
s->ss_h];
499 s->ss_h =
s->ss_v = 1;
500 s->pix_fmt = pix_fmt_for_ss[
bits][1][1];
511 int c,
i, j, k, l, m, n,
w,
h,
max, size2,
ret, sharp;
513 const uint8_t *data2;
537 s->last_keyframe =
s->s.h.keyframe;
540 last_invisible =
s->s.h.invisible;
543 s->s.h.use_last_frame_mvs = !
s->s.h.errorres && !last_invisible;
545 if (
s->s.h.keyframe) {
553 s->s.h.refreshrefmask = 0xff;
559 s->s.h.intraonly =
s->s.h.invisible ?
get_bits1(&
s->gb) : 0;
560 s->s.h.resetctx =
s->s.h.errorres ? 0 :
get_bits(&
s->gb, 2);
561 if (
s->s.h.intraonly) {
570 s->ss_h =
s->ss_v = 1;
573 s->bytesperpixel = 1;
586 s->s.h.signbias[0] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
588 s->s.h.signbias[1] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
590 s->s.h.signbias[2] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
591 if (!
s->s.refs[
s->s.h.refidx[0]].f->buf[0] ||
592 !
s->s.refs[
s->s.h.refidx[1]].f->buf[0] ||
593 !
s->s.refs[
s->s.h.refidx[2]].f->buf[0]) {
598 w =
s->s.refs[
s->s.h.refidx[0]].f->width;
599 h =
s->s.refs[
s->s.h.refidx[0]].f->height;
601 w =
s->s.refs[
s->s.h.refidx[1]].f->width;
602 h =
s->s.refs[
s->s.h.refidx[1]].f->height;
604 w =
s->s.refs[
s->s.h.refidx[2]].f->width;
605 h =
s->s.refs[
s->s.h.refidx[2]].f->height;
613 s->s.h.use_last_frame_mvs &=
s->s.frames[
CUR_FRAME].tf.f->width ==
w &&
620 s->s.h.allowcompinter =
s->s.h.signbias[0] !=
s->s.h.signbias[1] ||
621 s->s.h.signbias[0] !=
s->s.h.signbias[2];
622 if (
s->s.h.allowcompinter) {
623 if (
s->s.h.signbias[0] ==
s->s.h.signbias[1]) {
624 s->s.h.fixcompref = 2;
625 s->s.h.varcompref[0] = 0;
626 s->s.h.varcompref[1] = 1;
627 }
else if (
s->s.h.signbias[0] ==
s->s.h.signbias[2]) {
628 s->s.h.fixcompref = 1;
629 s->s.h.varcompref[0] = 0;
630 s->s.h.varcompref[1] = 2;
632 s->s.h.fixcompref = 0;
633 s->s.h.varcompref[0] = 1;
634 s->s.h.varcompref[1] = 2;
639 s->s.h.refreshctx =
s->s.h.errorres ? 0 :
get_bits1(&
s->gb);
640 s->s.h.parallelmode =
s->s.h.errorres ? 1 :
get_bits1(&
s->gb);
642 if (
s->s.h.keyframe ||
s->s.h.intraonly)
643 s->s.h.framectxid = 0;
646 if (
s->s.h.keyframe ||
s->s.h.errorres ||
s->s.h.intraonly) {
648 s->s.h.lf_delta.ref[0] = 1;
649 s->s.h.lf_delta.ref[1] = 0;
650 s->s.h.lf_delta.ref[2] = -1;
651 s->s.h.lf_delta.ref[3] = -1;
652 s->s.h.lf_delta.mode[0] = 0;
653 s->s.h.lf_delta.mode[1] = 0;
654 memset(
s->s.h.segmentation.feat, 0,
sizeof(
s->s.h.segmentation.feat));
660 if (
s->s.h.filter.sharpness != sharp) {
661 for (
i = 1;
i <= 63;
i++) {
665 limit >>= (sharp + 3) >> 2;
670 s->filter_lut.lim_lut[
i] =
limit;
671 s->filter_lut.mblim_lut[
i] = 2 * (
i + 2) +
limit;
674 s->s.h.filter.sharpness = sharp;
675 if ((
s->s.h.lf_delta.enabled =
get_bits1(&
s->gb))) {
676 if ((
s->s.h.lf_delta.updated =
get_bits1(&
s->gb))) {
677 for (
i = 0;
i < 4;
i++)
680 for (
i = 0;
i < 2;
i++)
691 s->s.h.lossless =
s->s.h.yac_qi == 0 &&
s->s.h.ydc_qdelta == 0 &&
692 s->s.h.uvdc_qdelta == 0 &&
s->s.h.uvac_qdelta == 0;
697 if ((
s->s.h.segmentation.enabled =
get_bits1(&
s->gb))) {
698 if ((
s->s.h.segmentation.update_map =
get_bits1(&
s->gb))) {
699 for (
i = 0;
i < 7;
i++)
702 if ((
s->s.h.segmentation.temporal =
get_bits1(&
s->gb)))
703 for (
i = 0;
i < 3;
i++)
709 s->s.h.segmentation.absolute_vals =
get_bits1(&
s->gb);
710 for (
i = 0;
i < 8;
i++) {
711 if ((
s->s.h.segmentation.feat[
i].q_enabled =
get_bits1(&
s->gb)))
713 if ((
s->s.h.segmentation.feat[
i].lf_enabled =
get_bits1(&
s->gb)))
715 if ((
s->s.h.segmentation.feat[
i].ref_enabled =
get_bits1(&
s->gb)))
716 s->s.h.segmentation.feat[
i].ref_val =
get_bits(&
s->gb, 2);
717 s->s.h.segmentation.feat[
i].skip_enabled =
get_bits1(&
s->gb);
724 s->s.h.segmentation.temporal = 0;
725 s->s.h.segmentation.update_map = 0;
729 for (
i = 0;
i < (
s->s.h.segmentation.enabled ? 8 : 1);
i++) {
730 int qyac, qydc, quvac, quvdc, lflvl, sh;
732 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].q_enabled) {
733 if (
s->s.h.segmentation.absolute_vals)
738 qyac =
s->s.h.yac_qi;
750 sh =
s->s.h.filter.level >= 32;
751 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].lf_enabled) {
752 if (
s->s.h.segmentation.absolute_vals)
755 lflvl =
av_clip_uintp2(
s->s.h.filter.level +
s->s.h.segmentation.feat[
i].lf_val, 6);
757 lflvl =
s->s.h.filter.level;
759 if (
s->s.h.lf_delta.enabled) {
760 s->s.h.segmentation.feat[
i].lflvl[0][0] =
761 s->s.h.segmentation.feat[
i].lflvl[0][1] =
763 for (j = 1; j < 4; j++) {
764 s->s.h.segmentation.feat[
i].lflvl[j][0] =
766 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
767 s->s.h.segmentation.feat[
i].lflvl[j][1] =
769 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
772 memset(
s->s.h.segmentation.feat[
i].lflvl, lflvl,
773 sizeof(
s->s.h.segmentation.feat[
i].lflvl));
783 for (
s->s.h.tiling.log2_tile_cols = 0;
784 s->sb_cols > (64 <<
s->s.h.tiling.log2_tile_cols);
785 s->s.h.tiling.log2_tile_cols++) ;
786 for (
max = 0; (
s->sb_cols >>
max) >= 4;
max++) ;
788 while (
max >
s->s.h.tiling.log2_tile_cols) {
790 s->s.h.tiling.log2_tile_cols++;
795 s->s.h.tiling.tile_rows = 1 <<
s->s.h.tiling.log2_tile_rows;
796 if (
s->s.h.tiling.tile_cols != (1 <<
s->s.h.tiling.log2_tile_cols)) {
801 for (
i = 0;
i <
s->active_tile_cols;
i++)
806 s->s.h.tiling.tile_cols = 1 <<
s->s.h.tiling.log2_tile_cols;
808 s->s.h.tiling.tile_cols : 1;
813 n_range_coders =
s->s.h.tiling.tile_cols;
820 for (
i = 0;
i <
s->active_tile_cols;
i++) {
823 rc += n_range_coders;
828 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
829 int valid_ref_frame = 0;
830 for (
i = 0;
i < 3;
i++) {
832 int refw =
ref->width, refh =
ref->height;
836 "Ref pixfmt (%s) did not match current frame (%s)",
840 }
else if (refw ==
w && refh ==
h) {
841 s->mvscale[
i][0] =
s->mvscale[
i][1] = 0;
845 if (
w * 2 < refw ||
h * 2 < refh ||
w > 16 * refw ||
h > 16 * refh) {
847 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
852 s->mvscale[
i][0] = (refw << 14) /
w;
853 s->mvscale[
i][1] = (refh << 14) /
h;
854 s->mvstep[
i][0] = 16 *
s->mvscale[
i][0] >> 14;
855 s->mvstep[
i][1] = 16 *
s->mvscale[
i][1] >> 14;
859 if (!valid_ref_frame) {
860 av_log(avctx,
AV_LOG_ERROR,
"No valid reference frame is found, bitstream not supported\n");
865 if (
s->s.h.keyframe ||
s->s.h.errorres || (
s->s.h.intraonly &&
s->s.h.resetctx == 3)) {
866 s->prob_ctx[0].p =
s->prob_ctx[1].p =
s->prob_ctx[2].p =
876 }
else if (
s->s.h.intraonly &&
s->s.h.resetctx == 2) {
883 s->s.h.compressed_header_size = size2 =
get_bits(&
s->gb, 16);
887 if (size2 >
size - (data2 -
data)) {
900 for (
i = 0;
i <
s->active_tile_cols;
i++) {
901 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
902 memset(
s->td[
i].counts.coef, 0,
sizeof(
s->td[0].counts.coef));
903 memset(
s->td[
i].counts.eob, 0,
sizeof(
s->td[0].counts.eob));
905 memset(&
s->td[
i].counts, 0,
sizeof(
s->td[0].counts));
907 s->td[
i].nb_block_structure = 0;
913 s->prob.p =
s->prob_ctx[
c].p;
916 if (
s->s.h.lossless) {
920 if (
s->s.h.txfmmode == 3)
924 for (
i = 0;
i < 2;
i++)
927 for (
i = 0;
i < 2;
i++)
928 for (j = 0; j < 2; j++)
930 s->prob.p.tx16p[
i][j] =
932 for (
i = 0;
i < 2;
i++)
933 for (j = 0; j < 3; j++)
935 s->prob.p.tx32p[
i][j] =
941 for (
i = 0;
i < 4;
i++) {
942 uint8_t (*
ref)[2][6][6][3] =
s->prob_ctx[
c].coef[
i];
944 for (j = 0; j < 2; j++)
945 for (k = 0; k < 2; k++)
946 for (l = 0; l < 6; l++)
947 for (m = 0; m < 6; m++) {
948 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
949 uint8_t *
r =
ref[j][k][l][m];
950 if (m >= 3 && l == 0)
952 for (n = 0; n < 3; n++) {
961 for (j = 0; j < 2; j++)
962 for (k = 0; k < 2; k++)
963 for (l = 0; l < 6; l++)
964 for (m = 0; m < 6; m++) {
965 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
966 uint8_t *
r =
ref[j][k][l][m];
973 if (
s->s.h.txfmmode ==
i)
978 for (
i = 0;
i < 3;
i++)
981 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
982 for (
i = 0;
i < 7;
i++)
983 for (j = 0; j < 3; j++)
985 s->prob.p.mv_mode[
i][j] =
989 for (
i = 0;
i < 4;
i++)
990 for (j = 0; j < 2; j++)
992 s->prob.p.filter[
i][j] =
995 for (
i = 0;
i < 4;
i++)
999 if (
s->s.h.allowcompinter) {
1001 if (
s->s.h.comppredmode)
1004 for (
i = 0;
i < 5;
i++)
1013 for (
i = 0;
i < 5;
i++) {
1015 s->prob.p.single_ref[
i][0] =
1018 s->prob.p.single_ref[
i][1] =
1024 for (
i = 0;
i < 5;
i++)
1026 s->prob.p.comp_ref[
i] =
1030 for (
i = 0;
i < 4;
i++)
1031 for (j = 0; j < 9; j++)
1033 s->prob.p.y_mode[
i][j] =
1036 for (
i = 0;
i < 4;
i++)
1037 for (j = 0; j < 4; j++)
1038 for (k = 0; k < 3; k++)
1040 s->prob.p.partition[3 -
i][j][k] =
1042 s->prob.p.partition[3 -
i][j][k]);
1045 for (
i = 0;
i < 3;
i++)
1049 for (
i = 0;
i < 2;
i++) {
1051 s->prob.p.mv_comp[
i].sign =
1054 for (j = 0; j < 10; j++)
1056 s->prob.p.mv_comp[
i].classes[j] =
1060 s->prob.p.mv_comp[
i].class0 =
1063 for (j = 0; j < 10; j++)
1065 s->prob.p.mv_comp[
i].bits[j] =
1069 for (
i = 0;
i < 2;
i++) {
1070 for (j = 0; j < 2; j++)
1071 for (k = 0; k < 3; k++)
1073 s->prob.p.mv_comp[
i].class0_fp[j][k] =
1076 for (j = 0; j < 3; j++)
1078 s->prob.p.mv_comp[
i].fp[j] =
1082 if (
s->s.h.highprecisionmvs) {
1083 for (
i = 0;
i < 2;
i++) {
1085 s->prob.p.mv_comp[
i].class0_hp =
1089 s->prob.p.mv_comp[
i].hp =
1095 return (data2 -
data) + size2;
1099 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1102 int c = ((
s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1103 (((
td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1105 s->prob.p.partition[bl][
c];
1107 ptrdiff_t hbs = 4 >> bl;
1109 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1110 int bytesperpixel =
s->bytesperpixel;
1115 }
else if (col + hbs < s->cols) {
1116 if (row + hbs < s->rows) {
1124 yoff += hbs * 8 * y_stride;
1125 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1130 yoff += hbs * 8 * bytesperpixel;
1131 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1135 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1137 yoff + 8 * hbs * bytesperpixel,
1138 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1139 yoff += hbs * 8 * y_stride;
1140 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1141 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1143 yoff + 8 * hbs * bytesperpixel,
1144 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1151 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1153 yoff + 8 * hbs * bytesperpixel,
1154 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1159 }
else if (row + hbs < s->rows) {
1162 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1163 yoff += hbs * 8 * y_stride;
1164 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1165 decode_sb(
td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1172 decode_sb(
td, row, col, lflvl, yoff, uvoff, bl + 1);
1174 td->counts.partition[bl][
c][bp]++;
1178 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1182 ptrdiff_t hbs = 4 >> bl;
1184 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1185 int bytesperpixel =
s->bytesperpixel;
1190 }
else if (
td->b->bl == bl) {
1193 yoff += hbs * 8 * y_stride;
1194 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1196 }
else if (
b->bp ==
PARTITION_V && col + hbs < s->cols) {
1197 yoff += hbs * 8 * bytesperpixel;
1198 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1203 if (col + hbs < s->cols) {
1204 if (row + hbs < s->rows) {
1205 decode_sb_mem(
td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1206 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1207 yoff += hbs * 8 * y_stride;
1208 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1211 yoff + 8 * hbs * bytesperpixel,
1212 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1214 yoff += hbs * 8 * bytesperpixel;
1215 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1218 }
else if (row + hbs < s->rows) {
1219 yoff += hbs * 8 * y_stride;
1220 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1228 int sb_start = ( idx * n) >> log2_n;
1229 int sb_end = ((idx + 1) * n) >> log2_n;
1230 *start =
FFMIN(sb_start, n) << 3;
1231 *end =
FFMIN(sb_end, n) << 3;
1239 for (
i = 0;
i <
s->active_tile_cols;
i++)
1248 for (
i = 0;
i < 3;
i++) {
1253 for (
i = 0;
i < 8;
i++) {
1274 int row, col, tile_row, tile_col,
ret;
1276 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1278 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1281 ls_y =
f->linesize[0];
1282 ls_uv =
f->linesize[1];
1283 bytesperpixel =
s->bytesperpixel;
1286 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1288 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1290 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1293 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1294 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1301 if (tile_size >
size)
1312 for (row = tile_row_start; row < tile_row_end;
1313 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1315 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1317 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1319 tile_col,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1320 td->tile_col_start = tile_col_start;
1322 memset(
td->left_partition_ctx, 0, 8);
1323 memset(
td->left_skip_ctx, 0, 8);
1324 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1329 memset(
td->left_y_nnz_ctx, 0, 16);
1330 memset(
td->left_uv_nnz_ctx, 0, 32);
1331 memset(
td->left_segpred_ctx, 0, 8);
1333 td->c = &
td->c_b[tile_col];
1336 for (col = tile_col_start;
1338 col += 8, yoff2 += 64 * bytesperpixel,
1339 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1343 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1364 if (row + 8 <
s->rows) {
1365 memcpy(
s->intra_pred_data[0],
1366 f->data[0] + yoff + 63 * ls_y,
1367 8 *
s->cols * bytesperpixel);
1368 memcpy(
s->intra_pred_data[1],
1369 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1370 8 *
s->cols * bytesperpixel >>
s->ss_h);
1371 memcpy(
s->intra_pred_data[2],
1372 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1373 8 *
s->cols * bytesperpixel >>
s->ss_h);
1377 if (
s->s.h.filter.level) {
1380 lflvl_ptr =
s->lflvl;
1381 for (col = 0; col <
s->cols;
1382 col += 8, yoff2 += 64 * bytesperpixel,
1383 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1400 int decode_tiles_mt(
AVCodecContext *avctx,
void *tdata,
int jobnr,
1405 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1406 int bytesperpixel =
s->bytesperpixel, row, col, tile_row;
1407 unsigned tile_cols_len;
1408 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1413 ls_y =
f->linesize[0];
1414 ls_uv =
f->linesize[1];
1417 jobnr,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1418 td->tile_col_start = tile_col_start;
1419 uvoff = (64 * bytesperpixel >>
s->ss_h)*(tile_col_start >> 3);
1420 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1421 lflvl_ptr_base =
s->lflvl+(tile_col_start >> 3);
1423 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1425 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1427 td->c = &
td->c_b[tile_row];
1428 for (row = tile_row_start; row < tile_row_end;
1429 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1430 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1431 VP9Filter *lflvl_ptr = lflvl_ptr_base+
s->sb_cols*(row >> 3);
1433 memset(
td->left_partition_ctx, 0, 8);
1434 memset(
td->left_skip_ctx, 0, 8);
1435 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1440 memset(
td->left_y_nnz_ctx, 0, 16);
1441 memset(
td->left_uv_nnz_ctx, 0, 32);
1442 memset(
td->left_segpred_ctx, 0, 8);
1444 for (col = tile_col_start;
1446 col += 8, yoff2 += 64 * bytesperpixel,
1447 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1450 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1457 tile_cols_len = tile_col_end - tile_col_start;
1458 if (row + 8 <
s->rows) {
1459 memcpy(
s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1460 f->data[0] + yoff + 63 * ls_y,
1461 8 * tile_cols_len * bytesperpixel);
1462 memcpy(
s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1463 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1464 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1465 memcpy(
s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1466 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1467 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1470 vp9_report_tile_progress(
s, row >> 3, 1);
1480 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1482 int bytesperpixel =
s->bytesperpixel, col,
i;
1486 ls_y =
f->linesize[0];
1487 ls_uv =
f->linesize[1];
1489 for (
i = 0;
i <
s->sb_rows;
i++) {
1490 vp9_await_tile_progress(
s,
i,
s->s.h.tiling.tile_cols);
1492 if (
s->s.h.filter.level) {
1493 yoff = (ls_y * 64)*
i;
1494 uvoff = (ls_uv * 64 >>
s->ss_v)*
i;
1495 lflvl_ptr =
s->lflvl+
s->sb_cols*
i;
1496 for (col = 0; col <
s->cols;
1497 col += 8, yoff += 64 * bytesperpixel,
1498 uvoff += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1511 unsigned int tile, nb_blocks = 0;
1513 if (
s->s.h.segmentation.enabled) {
1514 for (tile = 0; tile <
s->active_tile_cols; tile++)
1515 nb_blocks +=
s->td[tile].nb_block_structure;
1523 par->
qp =
s->s.h.yac_qi;
1524 par->
delta_qp[0][0] =
s->s.h.ydc_qdelta;
1525 par->
delta_qp[1][0] =
s->s.h.uvdc_qdelta;
1526 par->
delta_qp[2][0] =
s->s.h.uvdc_qdelta;
1527 par->
delta_qp[1][1] =
s->s.h.uvac_qdelta;
1528 par->
delta_qp[2][1] =
s->s.h.uvac_qdelta;
1531 unsigned int block = 0;
1532 unsigned int tile, block_tile;
1534 for (tile = 0; tile <
s->active_tile_cols; tile++) {
1537 for (block_tile = 0; block_tile <
td->nb_block_structure; block_tile++) {
1539 unsigned int row =
td->block_structure[block_tile].row;
1540 unsigned int col =
td->block_structure[block_tile].col;
1541 uint8_t seg_id =
frame->segmentation_map[row * 8 *
s->sb_cols + col];
1545 b->w = 1 << (3 +
td->block_structure[block_tile].block_size_idx_x);
1546 b->h = 1 << (3 +
td->block_structure[block_tile].block_size_idx_y);
1548 if (
s->s.h.segmentation.feat[seg_id].q_enabled) {
1549 b->delta_qp =
s->s.h.segmentation.feat[seg_id].q_val;
1550 if (
s->s.h.segmentation.absolute_vals)
1551 b->delta_qp -= par->
qp;
1568 (!
s->s.h.segmentation.enabled || !
s->s.h.segmentation.update_map);
1573 }
else if (
ret == 0) {
1574 if (!
s->s.refs[
ref].f->buf[0]) {
1582 for (
i = 0;
i < 8;
i++) {
1583 if (
s->next_refs[
i].f->buf[0])
1585 if (
s->s.refs[
i].f->buf[0] &&
1595 if (!retain_segmap_ref ||
s->s.h.keyframe ||
s->s.h.intraonly) {
1598 if (!
s->s.h.keyframe && !
s->s.h.intraonly && !
s->s.h.errorres &&
s->s.frames[
CUR_FRAME].tf.f->buf[0] &&
1604 if (!
s->s.h.intraonly && !
s->s.h.keyframe && !
s->s.h.errorres &&
s->s.frames[
CUR_FRAME].tf.f->buf[0] &&
1612 if (
s->s.h.keyframe)
1625 for (
i = 0;
i < 8;
i++) {
1626 if (
s->next_refs[
i].f->buf[0])
1628 if (
s->s.h.refreshrefmask & (1 <<
i)) {
1630 }
else if (
s->s.refs[
i].f->buf[0]) {
1652 memset(
s->above_partition_ctx, 0,
s->cols);
1653 memset(
s->above_skip_ctx, 0,
s->cols);
1654 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1655 memset(
s->above_mode_ctx,
DC_PRED,
s->cols * 2);
1659 memset(
s->above_y_nnz_ctx, 0,
s->sb_cols * 16);
1660 memset(
s->above_uv_nnz_ctx[0], 0,
s->sb_cols * 16 >>
s->ss_h);
1661 memset(
s->above_uv_nnz_ctx[1], 0,
s->sb_cols * 16 >>
s->ss_h);
1662 memset(
s->above_segpred_ctx, 0,
s->cols);
1667 "Failed to allocate block buffers\n");
1670 if (
s->s.h.refreshctx &&
s->s.h.parallelmode) {
1673 for (
i = 0;
i < 4;
i++) {
1674 for (j = 0; j < 2; j++)
1675 for (k = 0; k < 2; k++)
1676 for (l = 0; l < 6; l++)
1677 for (m = 0; m < 6; m++)
1678 memcpy(
s->prob_ctx[
s->s.h.framectxid].coef[
i][j][k][l][m],
1679 s->prob.coef[
i][j][k][l][m], 3);
1680 if (
s->s.h.txfmmode ==
i)
1683 s->prob_ctx[
s->s.h.framectxid].p =
s->prob.p;
1685 }
else if (!
s->s.h.refreshctx) {
1691 for (
i = 0;
i <
s->sb_rows;
i++)
1697 for (
i = 0;
i <
s->active_tile_cols;
i++) {
1698 s->td[
i].b =
s->td[
i].b_base;
1699 s->td[
i].block =
s->td[
i].block_base;
1700 s->td[
i].uvblock[0] =
s->td[
i].uvblock_base[0];
1701 s->td[
i].uvblock[1] =
s->td[
i].uvblock_base[1];
1702 s->td[
i].eob =
s->td[
i].eob_base;
1703 s->td[
i].uveob[0] =
s->td[
i].uveob_base[0];
1704 s->td[
i].uveob[1] =
s->td[
i].uveob_base[1];
1705 s->td[
i].error_info = 0;
1710 int tile_row, tile_col;
1714 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1715 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1718 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1719 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1726 if (tile_size >
size)
1751 for (
i = 1;
i <
s->s.h.tiling.tile_cols;
i++)
1752 for (j = 0; j <
sizeof(
s->td[
i].counts) /
sizeof(
unsigned); j++)
1753 ((
unsigned *)&
s->td[0].counts)[j] += ((
unsigned *)&
s->td[
i].counts)[j];
1755 if (
s->pass < 2 &&
s->s.h.refreshctx && !
s->s.h.parallelmode) {
1759 }
while (
s->pass++ == 1);
1762 if (
s->td->error_info < 0) {
1764 s->td->error_info = 0;
1775 for (
i = 0;
i < 8;
i++) {
1776 if (
s->s.refs[
i].f->buf[0])
1778 if (
s->next_refs[
i].f->buf[0] &&
1783 if (!
s->s.h.invisible) {
1797 for (
i = 0;
i < 3;
i++)
1799 for (
i = 0;
i < 8;
i++)
1812 s->s.h.filter.sharpness = -1;
1822 for (
int i = 0;
i < 3;
i++) {
1824 if (!
s->s.frames[
i].tf.f)
1827 for (
int i = 0;
i < 8;
i++) {
1830 if (!
s->s.refs[
i].f || !
s->next_refs[
i].f)
1842 for (
i = 0;
i < 3;
i++) {
1843 if (
s->s.frames[
i].tf.f->buf[0])
1845 if (ssrc->s.frames[
i].tf.f->buf[0]) {
1850 for (
i = 0;
i < 8;
i++) {
1851 if (
s->s.refs[
i].f->buf[0])
1853 if (ssrc->next_refs[
i].f->buf[0]) {
1859 s->frame_extradata_pool_size = ssrc->frame_extradata_pool_size;
1861 s->s.h.invisible = ssrc->s.h.invisible;
1862 s->s.h.keyframe = ssrc->s.h.keyframe;
1863 s->s.h.intraonly = ssrc->s.h.intraonly;
1864 s->ss_v = ssrc->ss_v;
1865 s->ss_h = ssrc->ss_h;
1866 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1867 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1868 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1869 s->bytesperpixel = ssrc->bytesperpixel;
1870 s->gf_fmt = ssrc->gf_fmt;
1873 s->s.h.bpp = ssrc->s.h.bpp;
1874 s->bpp_index = ssrc->bpp_index;
1875 s->pix_fmt = ssrc->pix_fmt;
1876 memcpy(&
s->prob_ctx, &ssrc->prob_ctx,
sizeof(
s->prob_ctx));
1877 memcpy(&
s->s.h.lf_delta, &ssrc->s.h.lf_delta,
sizeof(
s->s.h.lf_delta));
1878 memcpy(&
s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1879 sizeof(
s->s.h.segmentation.feat));
1901 .bsfs =
"vp9_superframe_split",
1903 #if CONFIG_VP9_DXVA2_HWACCEL
1906 #if CONFIG_VP9_D3D11VA_HWACCEL
1909 #if CONFIG_VP9_D3D11VA2_HWACCEL
1912 #if CONFIG_VP9_D3D12VA_HWACCEL
1915 #if CONFIG_VP9_NVDEC_HWACCEL
1918 #if CONFIG_VP9_VAAPI_HWACCEL
1921 #if CONFIG_VP9_VDPAU_HWACCEL
1924 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL