24 #include "config_components.h"
50 #define VP9_SYNCCODE 0x498342
104 f->segmentation_map =
NULL;
116 sz = 64 *
s->sb_cols *
s->sb_rows;
117 if (sz !=
s->frame_extradata_pool_size) {
121 if (!
s->frame_extradata_pool) {
122 s->frame_extradata_pool_size = 0;
126 s->frame_extradata_pool_size = sz;
134 f->segmentation_map =
f->extradata;
151 dst->frame_header =
src->frame_header;
157 dst->segmentation_map =
src->segmentation_map;
159 dst->uses_2pass =
src->uses_2pass;
162 src->hwaccel_picture_private);
167 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
168 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
169 CONFIG_VP9_D3D12VA_HWACCEL + \
170 CONFIG_VP9_NVDEC_HWACCEL + \
171 CONFIG_VP9_VAAPI_HWACCEL + \
172 CONFIG_VP9_VDPAU_HWACCEL + \
173 CONFIG_VP9_VIDEOTOOLBOX_HWACCEL + \
174 CONFIG_VP9_VULKAN_HWACCEL)
178 int bytesperpixel =
s->bytesperpixel,
ret, cols, rows;
184 if (!(
s->pix_fmt ==
s->gf_fmt &&
w ==
s->w &&
h ==
s->h)) {
189 switch (
s->pix_fmt) {
192 #if CONFIG_VP9_DXVA2_HWACCEL
195 #if CONFIG_VP9_D3D11VA_HWACCEL
199 #if CONFIG_VP9_D3D12VA_HWACCEL
202 #if CONFIG_VP9_NVDEC_HWACCEL
205 #if CONFIG_VP9_VAAPI_HWACCEL
208 #if CONFIG_VP9_VDPAU_HWACCEL
211 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
214 #if CONFIG_VP9_VULKAN_HWACCEL
219 #if CONFIG_VP9_NVDEC_HWACCEL
222 #if CONFIG_VP9_VAAPI_HWACCEL
225 #if CONFIG_VP9_VDPAU_HWACCEL
228 #if CONFIG_VP9_VULKAN_HWACCEL
235 #if CONFIG_VP9_VAAPI_HWACCEL
238 #if CONFIG_VP9_VULKAN_HWACCEL
245 #if CONFIG_VP9_VAAPI_HWACCEL
248 #if CONFIG_VP9_VULKAN_HWACCEL
254 *fmtp++ =
s->pix_fmt;
262 s->gf_fmt =
s->pix_fmt;
270 if (
s->intra_pred_data[0] && cols ==
s->cols && rows ==
s->rows &&
s->pix_fmt ==
s->last_fmt)
273 s->last_fmt =
s->pix_fmt;
274 s->sb_cols = (
w + 63) >> 6;
275 s->sb_rows = (
h + 63) >> 6;
276 s->cols = (
w + 7) >> 3;
277 s->rows = (
h + 7) >> 3;
280 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
284 p =
av_malloc(
s->sb_cols * (128 + 192 * bytesperpixel +
285 lflvl_len *
sizeof(*
s->lflvl) + 16 *
sizeof(*
s->above_mv_ctx)));
288 assign(
s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
289 assign(
s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
290 assign(
s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
291 assign(
s->above_y_nnz_ctx, uint8_t *, 16);
292 assign(
s->above_mode_ctx, uint8_t *, 16);
294 assign(
s->above_uv_nnz_ctx[0], uint8_t *, 16);
295 assign(
s->above_uv_nnz_ctx[1], uint8_t *, 16);
296 assign(
s->above_partition_ctx, uint8_t *, 8);
297 assign(
s->above_skip_ctx, uint8_t *, 8);
298 assign(
s->above_txfm_ctx, uint8_t *, 8);
299 assign(
s->above_segpred_ctx, uint8_t *, 8);
300 assign(
s->above_intra_ctx, uint8_t *, 8);
301 assign(
s->above_comp_ctx, uint8_t *, 8);
302 assign(
s->above_ref_ctx, uint8_t *, 8);
303 assign(
s->above_filter_ctx, uint8_t *, 8);
308 for (
i = 0;
i <
s->active_tile_cols;
i++)
312 if (
s->s.h.bpp !=
s->last_bpp) {
315 s->last_bpp =
s->s.h.bpp;
326 int chroma_blocks, chroma_eobs, bytesperpixel =
s->bytesperpixel;
333 chroma_blocks = 64 * 64 >> (
s->ss_h +
s->ss_v);
334 chroma_eobs = 16 * 16 >> (
s->ss_h +
s->ss_v);
336 int sbs =
s->sb_cols *
s->sb_rows;
340 16 * 16 + 2 * chroma_eobs) * sbs);
355 for (
i = 1;
i <
s->active_tile_cols;
i++)
358 for (
i = 0;
i <
s->active_tile_cols;
i++) {
360 s->td[
i].block_base =
av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
361 16 * 16 + 2 * chroma_eobs);
362 if (!
s->td[
i].b_base || !
s->td[
i].block_base)
364 s->td[
i].uvblock_base[0] =
s->td[
i].block_base + 64 * 64 * bytesperpixel;
365 s->td[
i].uvblock_base[1] =
s->td[
i].uvblock_base[0] + chroma_blocks * bytesperpixel;
366 s->td[
i].eob_base = (uint8_t *) (
s->td[
i].uvblock_base[1] + chroma_blocks * bytesperpixel);
367 s->td[
i].uveob_base[0] =
s->td[
i].eob_base + 16 * 16;
368 s->td[
i].uveob_base[1] =
s->td[
i].uveob_base[0] + chroma_eobs;
372 if (!
s->td[
i].block_structure)
377 s->block_alloc_using_2pass =
s->s.frames[
CUR_FRAME].uses_2pass;
394 return m - ((v + 1) >> 1);
401 static const uint8_t inv_map_table[255] = {
402 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
403 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
404 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
405 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
406 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
407 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
408 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
409 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
410 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
411 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
412 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
413 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
414 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
415 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
416 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
417 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
418 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
419 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
467 s->s.h.bpp = 8 +
bits * 2;
468 s->bytesperpixel = (7 +
s->s.h.bpp) >> 3;
474 s->ss_h =
s->ss_v = 0;
488 static const enum AVPixelFormat pix_fmt_for_ss[3][2 ][2 ] = {
500 s->pix_fmt = pix_fmt_for_ss[
bits][
s->ss_v][
s->ss_h];
511 s->ss_h =
s->ss_v = 1;
512 s->pix_fmt = pix_fmt_for_ss[
bits][1][1];
523 int c,
i, j, k, l, m, n,
w,
h,
max, size2,
ret, sharp;
525 const uint8_t *data2;
550 s->last_keyframe =
s->s.h.keyframe;
553 last_invisible =
s->s.h.invisible;
556 s->s.h.use_last_frame_mvs = !
s->s.h.errorres && !last_invisible;
558 if (
s->s.h.keyframe) {
566 s->s.h.refreshrefmask = 0xff;
572 s->s.h.intraonly =
s->s.h.invisible ?
get_bits1(&
s->gb) : 0;
573 s->s.h.resetctx =
s->s.h.errorres ? 0 :
get_bits(&
s->gb, 2);
574 if (
s->s.h.intraonly) {
583 s->ss_h =
s->ss_v = 1;
586 s->bytesperpixel = 1;
599 s->s.h.signbias[0] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
601 s->s.h.signbias[1] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
603 s->s.h.signbias[2] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
604 if (!
s->s.refs[
s->s.h.refidx[0]].f ||
605 !
s->s.refs[
s->s.h.refidx[1]].f ||
606 !
s->s.refs[
s->s.h.refidx[2]].f) {
611 w =
s->s.refs[
s->s.h.refidx[0]].f->width;
612 h =
s->s.refs[
s->s.h.refidx[0]].f->height;
614 w =
s->s.refs[
s->s.h.refidx[1]].f->width;
615 h =
s->s.refs[
s->s.h.refidx[1]].f->height;
617 w =
s->s.refs[
s->s.h.refidx[2]].f->width;
618 h =
s->s.refs[
s->s.h.refidx[2]].f->height;
626 s->s.h.use_last_frame_mvs &=
s->s.frames[
CUR_FRAME].tf.f &&
634 s->s.h.allowcompinter =
s->s.h.signbias[0] !=
s->s.h.signbias[1] ||
635 s->s.h.signbias[0] !=
s->s.h.signbias[2];
636 if (
s->s.h.allowcompinter) {
637 if (
s->s.h.signbias[0] ==
s->s.h.signbias[1]) {
638 s->s.h.fixcompref = 2;
639 s->s.h.varcompref[0] = 0;
640 s->s.h.varcompref[1] = 1;
641 }
else if (
s->s.h.signbias[0] ==
s->s.h.signbias[2]) {
642 s->s.h.fixcompref = 1;
643 s->s.h.varcompref[0] = 0;
644 s->s.h.varcompref[1] = 2;
646 s->s.h.fixcompref = 0;
647 s->s.h.varcompref[0] = 1;
648 s->s.h.varcompref[1] = 2;
653 s->s.h.refreshctx =
s->s.h.errorres ? 0 :
get_bits1(&
s->gb);
654 s->s.h.parallelmode =
s->s.h.errorres ? 1 :
get_bits1(&
s->gb);
656 if (
s->s.h.keyframe ||
s->s.h.intraonly)
657 s->s.h.framectxid = 0;
660 if (
s->s.h.keyframe ||
s->s.h.errorres ||
s->s.h.intraonly) {
662 s->s.h.lf_delta.ref[0] = 1;
663 s->s.h.lf_delta.ref[1] = 0;
664 s->s.h.lf_delta.ref[2] = -1;
665 s->s.h.lf_delta.ref[3] = -1;
666 s->s.h.lf_delta.mode[0] = 0;
667 s->s.h.lf_delta.mode[1] = 0;
668 memset(
s->s.h.segmentation.feat, 0,
sizeof(
s->s.h.segmentation.feat));
674 if (
s->s.h.filter.sharpness != sharp) {
675 for (
i = 1;
i <= 63;
i++) {
679 limit >>= (sharp + 3) >> 2;
684 s->filter_lut.lim_lut[
i] =
limit;
685 s->filter_lut.mblim_lut[
i] = 2 * (
i + 2) +
limit;
688 s->s.h.filter.sharpness = sharp;
689 if ((
s->s.h.lf_delta.enabled =
get_bits1(&
s->gb))) {
690 if ((
s->s.h.lf_delta.updated =
get_bits1(&
s->gb))) {
691 for (
i = 0;
i < 4;
i++)
694 for (
i = 0;
i < 2;
i++)
705 s->s.h.lossless =
s->s.h.yac_qi == 0 &&
s->s.h.ydc_qdelta == 0 &&
706 s->s.h.uvdc_qdelta == 0 &&
s->s.h.uvac_qdelta == 0;
707 #if FF_API_CODEC_PROPS
715 if ((
s->s.h.segmentation.enabled =
get_bits1(&
s->gb))) {
716 if ((
s->s.h.segmentation.update_map =
get_bits1(&
s->gb))) {
717 for (
i = 0;
i < 7;
i++)
720 if ((
s->s.h.segmentation.temporal =
get_bits1(&
s->gb)))
721 for (
i = 0;
i < 3;
i++)
727 s->s.h.segmentation.absolute_vals =
get_bits1(&
s->gb);
728 for (
i = 0;
i < 8;
i++) {
729 if ((
s->s.h.segmentation.feat[
i].q_enabled =
get_bits1(&
s->gb)))
731 if ((
s->s.h.segmentation.feat[
i].lf_enabled =
get_bits1(&
s->gb)))
733 if ((
s->s.h.segmentation.feat[
i].ref_enabled =
get_bits1(&
s->gb)))
734 s->s.h.segmentation.feat[
i].ref_val =
get_bits(&
s->gb, 2);
735 s->s.h.segmentation.feat[
i].skip_enabled =
get_bits1(&
s->gb);
742 s->s.h.segmentation.temporal = 0;
743 s->s.h.segmentation.update_map = 0;
747 for (
i = 0;
i < (
s->s.h.segmentation.enabled ? 8 : 1);
i++) {
748 int qyac, qydc, quvac, quvdc, lflvl, sh;
750 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].q_enabled) {
751 if (
s->s.h.segmentation.absolute_vals)
756 qyac =
s->s.h.yac_qi;
768 sh =
s->s.h.filter.level >= 32;
769 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].lf_enabled) {
770 if (
s->s.h.segmentation.absolute_vals)
773 lflvl =
av_clip_uintp2(
s->s.h.filter.level +
s->s.h.segmentation.feat[
i].lf_val, 6);
775 lflvl =
s->s.h.filter.level;
777 if (
s->s.h.lf_delta.enabled) {
778 s->s.h.segmentation.feat[
i].lflvl[0][0] =
779 s->s.h.segmentation.feat[
i].lflvl[0][1] =
781 for (j = 1; j < 4; j++) {
782 s->s.h.segmentation.feat[
i].lflvl[j][0] =
784 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
785 s->s.h.segmentation.feat[
i].lflvl[j][1] =
787 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
790 memset(
s->s.h.segmentation.feat[
i].lflvl, lflvl,
791 sizeof(
s->s.h.segmentation.feat[
i].lflvl));
801 for (
s->s.h.tiling.log2_tile_cols = 0;
802 s->sb_cols > (64 <<
s->s.h.tiling.log2_tile_cols);
803 s->s.h.tiling.log2_tile_cols++) ;
804 for (
max = 0; (
s->sb_cols >>
max) >= 4;
max++) ;
806 while (
max >
s->s.h.tiling.log2_tile_cols) {
808 s->s.h.tiling.log2_tile_cols++;
813 s->s.h.tiling.tile_rows = 1 <<
s->s.h.tiling.log2_tile_rows;
814 if (
s->s.h.tiling.tile_cols != (1 <<
s->s.h.tiling.log2_tile_cols) || changed) {
819 for (
i = 0;
i <
s->active_tile_cols;
i++)
824 s->s.h.tiling.tile_cols = 1 <<
s->s.h.tiling.log2_tile_cols;
826 s->s.h.tiling.tile_cols : 1;
831 n_range_coders =
s->s.h.tiling.tile_cols;
838 for (
i = 0;
i <
s->active_tile_cols;
i++) {
841 rc += n_range_coders;
846 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
847 int valid_ref_frame = 0;
848 for (
i = 0;
i < 3;
i++) {
850 int refw =
ref->width, refh =
ref->height;
854 "Ref pixfmt (%s) did not match current frame (%s)",
858 }
else if (refw ==
w && refh ==
h) {
859 s->mvscale[
i][0] =
s->mvscale[
i][1] = 0;
863 if (
w * 2 < refw ||
h * 2 < refh ||
w > 16 * refw ||
h > 16 * refh) {
865 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
870 s->mvscale[
i][0] = (refw << 14) /
w;
871 s->mvscale[
i][1] = (refh << 14) /
h;
872 s->mvstep[
i][0] = 16 *
s->mvscale[
i][0] >> 14;
873 s->mvstep[
i][1] = 16 *
s->mvscale[
i][1] >> 14;
877 if (!valid_ref_frame) {
878 av_log(avctx,
AV_LOG_ERROR,
"No valid reference frame is found, bitstream not supported\n");
883 if (
s->s.h.keyframe ||
s->s.h.errorres || (
s->s.h.intraonly &&
s->s.h.resetctx == 3)) {
884 s->prob_ctx[0].p =
s->prob_ctx[1].p =
s->prob_ctx[2].p =
894 }
else if (
s->s.h.intraonly &&
s->s.h.resetctx == 2) {
901 s->s.h.compressed_header_size = size2 =
get_bits(&
s->gb, 16);
905 if (size2 >
size - (data2 -
data)) {
918 for (
i = 0;
i <
s->active_tile_cols;
i++) {
919 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
920 memset(
s->td[
i].counts.coef, 0,
sizeof(
s->td[0].counts.coef));
921 memset(
s->td[
i].counts.eob, 0,
sizeof(
s->td[0].counts.eob));
923 memset(&
s->td[
i].counts, 0,
sizeof(
s->td[0].counts));
925 s->td[
i].nb_block_structure = 0;
931 s->prob.p =
s->prob_ctx[
c].p;
934 if (
s->s.h.lossless) {
938 if (
s->s.h.txfmmode == 3)
942 for (
i = 0;
i < 2;
i++)
945 for (
i = 0;
i < 2;
i++)
946 for (j = 0; j < 2; j++)
948 s->prob.p.tx16p[
i][j] =
950 for (
i = 0;
i < 2;
i++)
951 for (j = 0; j < 3; j++)
953 s->prob.p.tx32p[
i][j] =
959 for (
i = 0;
i < 4;
i++) {
960 uint8_t (*
ref)[2][6][6][3] =
s->prob_ctx[
c].coef[
i];
962 for (j = 0; j < 2; j++)
963 for (k = 0; k < 2; k++)
964 for (l = 0; l < 6; l++)
965 for (m = 0; m < 6; m++) {
966 uint8_t *
p =
s->prob.coef[
i][j][k][l][m];
967 uint8_t *
r =
ref[j][k][l][m];
968 if (m >= 3 && l == 0)
970 for (n = 0; n < 3; n++) {
979 for (j = 0; j < 2; j++)
980 for (k = 0; k < 2; k++)
981 for (l = 0; l < 6; l++)
982 for (m = 0; m < 6; m++) {
983 uint8_t *
p =
s->prob.coef[
i][j][k][l][m];
984 uint8_t *
r =
ref[j][k][l][m];
991 if (
s->s.h.txfmmode ==
i)
996 for (
i = 0;
i < 3;
i++)
999 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
1000 for (
i = 0;
i < 7;
i++)
1001 for (j = 0; j < 3; j++)
1003 s->prob.p.mv_mode[
i][j] =
1007 for (
i = 0;
i < 4;
i++)
1008 for (j = 0; j < 2; j++)
1010 s->prob.p.filter[
i][j] =
1013 for (
i = 0;
i < 4;
i++)
1017 if (
s->s.h.allowcompinter) {
1019 if (
s->s.h.comppredmode)
1022 for (
i = 0;
i < 5;
i++)
1031 for (
i = 0;
i < 5;
i++) {
1033 s->prob.p.single_ref[
i][0] =
1036 s->prob.p.single_ref[
i][1] =
1042 for (
i = 0;
i < 5;
i++)
1044 s->prob.p.comp_ref[
i] =
1048 for (
i = 0;
i < 4;
i++)
1049 for (j = 0; j < 9; j++)
1051 s->prob.p.y_mode[
i][j] =
1054 for (
i = 0;
i < 4;
i++)
1055 for (j = 0; j < 4; j++)
1056 for (k = 0; k < 3; k++)
1058 s->prob.p.partition[3 -
i][j][k] =
1060 s->prob.p.partition[3 -
i][j][k]);
1063 for (
i = 0;
i < 3;
i++)
1067 for (
i = 0;
i < 2;
i++) {
1069 s->prob.p.mv_comp[
i].sign =
1072 for (j = 0; j < 10; j++)
1074 s->prob.p.mv_comp[
i].classes[j] =
1078 s->prob.p.mv_comp[
i].class0 =
1081 for (j = 0; j < 10; j++)
1083 s->prob.p.mv_comp[
i].bits[j] =
1087 for (
i = 0;
i < 2;
i++) {
1088 for (j = 0; j < 2; j++)
1089 for (k = 0; k < 3; k++)
1091 s->prob.p.mv_comp[
i].class0_fp[j][k] =
1094 for (j = 0; j < 3; j++)
1096 s->prob.p.mv_comp[
i].fp[j] =
1100 if (
s->s.h.highprecisionmvs) {
1101 for (
i = 0;
i < 2;
i++) {
1103 s->prob.p.mv_comp[
i].class0_hp =
1107 s->prob.p.mv_comp[
i].hp =
1113 return (data2 -
data) + size2;
1117 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1120 int c = ((
s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1123 s->prob.p.partition[bl][
c];
1125 ptrdiff_t hbs = 4 >> bl;
1127 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1128 int bytesperpixel =
s->bytesperpixel;
1133 }
else if (col + hbs < s->cols) {
1134 if (row + hbs < s->rows) {
1142 yoff += hbs * 8 * y_stride;
1143 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1148 yoff += hbs * 8 * bytesperpixel;
1149 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1153 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1155 yoff + 8 * hbs * bytesperpixel,
1156 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1157 yoff += hbs * 8 * y_stride;
1158 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1159 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1160 decode_sb(td, row + hbs, col + hbs, lflvl,
1161 yoff + 8 * hbs * bytesperpixel,
1162 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1166 "the four PARTITION_* terminal codes");
1170 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1172 yoff + 8 * hbs * bytesperpixel,
1173 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1178 }
else if (row + hbs < s->rows) {
1181 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1182 yoff += hbs * 8 * y_stride;
1183 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1184 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1191 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1197 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1201 ptrdiff_t hbs = 4 >> bl;
1203 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1204 int bytesperpixel =
s->bytesperpixel;
1209 }
else if (td->
b->
bl == bl) {
1212 yoff += hbs * 8 * y_stride;
1213 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1215 }
else if (
b->bp ==
PARTITION_V && col + hbs < s->cols) {
1216 yoff += hbs * 8 * bytesperpixel;
1217 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1222 if (col + hbs < s->cols) {
1223 if (row + hbs < s->rows) {
1224 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1225 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1226 yoff += hbs * 8 * y_stride;
1227 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1228 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1230 yoff + 8 * hbs * bytesperpixel,
1231 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1233 yoff += hbs * 8 * bytesperpixel;
1234 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1235 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1237 }
else if (row + hbs < s->rows) {
1238 yoff += hbs * 8 * y_stride;
1239 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1240 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1247 int sb_start = ( idx * n) >> log2_n;
1248 int sb_end = ((idx + 1) * n) >> log2_n;
1249 *start =
FFMIN(sb_start, n) << 3;
1250 *end =
FFMIN(sb_end, n) << 3;
1258 for (
i = 0;
i <
s->active_tile_cols;
i++)
1267 for (
int i = 0;
i < 3;
i++)
1270 for (
i = 0;
i < 8;
i++) {
1283 ff_cbs_fragment_free(&
s->current_frag);
1284 ff_cbs_close(&
s->cbc);
1295 int row, col, tile_row, tile_col,
ret;
1297 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1299 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1302 ls_y =
f->linesize[0];
1303 ls_uv =
f->linesize[1];
1304 bytesperpixel =
s->bytesperpixel;
1307 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1309 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1311 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1314 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1315 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1322 if (tile_size >
size)
1333 for (row = tile_row_start; row < tile_row_end;
1334 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1336 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1338 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1340 tile_col,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1345 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1354 td->
c = &td->
c_b[tile_col];
1357 for (col = tile_col_start;
1359 col += 8, yoff2 += 64 * bytesperpixel,
1360 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1364 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1385 if (row + 8 <
s->rows) {
1386 memcpy(
s->intra_pred_data[0],
1387 f->data[0] + yoff + 63 * ls_y,
1388 8 *
s->cols * bytesperpixel);
1389 memcpy(
s->intra_pred_data[1],
1390 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1391 8 *
s->cols * bytesperpixel >>
s->ss_h);
1392 memcpy(
s->intra_pred_data[2],
1393 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1394 8 *
s->cols * bytesperpixel >>
s->ss_h);
1398 if (
s->s.h.filter.level) {
1401 lflvl_ptr =
s->lflvl;
1402 for (col = 0; col <
s->cols;
1403 col += 8, yoff2 += 64 * bytesperpixel,
1404 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1421 int decode_tiles_mt(
AVCodecContext *avctx,
void *tdata,
int jobnr,
1426 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1427 int bytesperpixel =
s->bytesperpixel, row, col, tile_row;
1428 unsigned tile_cols_len;
1429 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1434 ls_y =
f->linesize[0];
1435 ls_uv =
f->linesize[1];
1438 jobnr,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1440 uvoff = (64 * bytesperpixel >>
s->ss_h)*(tile_col_start >> 3);
1441 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1442 lflvl_ptr_base =
s->lflvl+(tile_col_start >> 3);
1444 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1446 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1448 td->
c = &td->
c_b[tile_row];
1449 for (row = tile_row_start; row < tile_row_end;
1450 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1451 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1452 VP9Filter *lflvl_ptr = lflvl_ptr_base+
s->sb_cols*(row >> 3);
1456 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1465 for (col = tile_col_start;
1467 col += 8, yoff2 += 64 * bytesperpixel,
1468 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1471 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1478 tile_cols_len = tile_col_end - tile_col_start;
1479 if (row + 8 <
s->rows) {
1480 memcpy(
s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1481 f->data[0] + yoff + 63 * ls_y,
1482 8 * tile_cols_len * bytesperpixel);
1483 memcpy(
s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1484 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1485 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1486 memcpy(
s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1487 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1488 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1491 vp9_report_tile_progress(
s, row >> 3, 1);
1501 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1503 int bytesperpixel =
s->bytesperpixel, col,
i;
1507 ls_y =
f->linesize[0];
1508 ls_uv =
f->linesize[1];
1510 for (
i = 0;
i <
s->sb_rows;
i++) {
1511 vp9_await_tile_progress(
s,
i,
s->s.h.tiling.tile_cols);
1513 if (
s->s.h.filter.level) {
1514 yoff = (ls_y * 64)*
i;
1515 uvoff = (ls_uv * 64 >>
s->ss_v)*
i;
1516 lflvl_ptr =
s->lflvl+
s->sb_cols*
i;
1517 for (col = 0; col <
s->cols;
1518 col += 8, yoff += 64 * bytesperpixel,
1519 uvoff += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1532 unsigned int tile, nb_blocks = 0;
1534 if (
s->s.h.segmentation.enabled) {
1536 nb_blocks +=
s->td[
tile].nb_block_structure;
1544 par->
qp =
s->s.h.yac_qi;
1545 par->
delta_qp[0][0] =
s->s.h.ydc_qdelta;
1546 par->
delta_qp[1][0] =
s->s.h.uvdc_qdelta;
1547 par->
delta_qp[2][0] =
s->s.h.uvdc_qdelta;
1548 par->
delta_qp[1][1] =
s->s.h.uvac_qdelta;
1549 par->
delta_qp[2][1] =
s->s.h.uvac_qdelta;
1552 unsigned int block = 0;
1553 unsigned int tile, block_tile;
1562 uint8_t seg_id =
frame->segmentation_map[row * 8 *
s->sb_cols + col];
1569 if (
s->s.h.segmentation.feat[seg_id].q_enabled) {
1570 b->delta_qp =
s->s.h.segmentation.feat[seg_id].q_val;
1571 if (
s->s.h.segmentation.absolute_vals)
1572 b->delta_qp -= par->
qp;
1592 (!
s->s.h.segmentation.enabled || !
s->s.h.segmentation.update_map);
1596 ret = ff_cbs_read_packet(
s->cbc, &
s->current_frag,
pkt);
1598 ff_cbs_fragment_reset(&
s->current_frag);
1603 unit = &
s->current_frag.units[0];
1607 s->frame_header = &rf->
header;
1611 }
else if (
ret == 0) {
1612 if (!
s->s.refs[
ref].f) {
1616 for (
int i = 0;
i < 8;
i++)
1620 ff_cbs_fragment_reset(&
s->current_frag);
1632 src = !
s->s.h.keyframe && !
s->s.h.intraonly && !
s->s.h.errorres ?
1634 if (!retain_segmap_ref ||
s->s.h.keyframe ||
s->s.h.intraonly)
1642 s->s.frames[
CUR_FRAME].frame_header =
s->frame_header;
1645 if (
s->s.h.keyframe)
1649 if (
s->s.h.lossless)
1663 for (
i = 0;
i < 8;
i++) {
1665 s->s.h.refreshrefmask & (1 <<
i) ?
1681 for (
i = 0;
i < 8;
i++) {
1683 s->s.h.refreshrefmask & (1 <<
i) ?
1691 memset(
s->above_partition_ctx, 0,
s->cols);
1692 memset(
s->above_skip_ctx, 0,
s->cols);
1693 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1694 memset(
s->above_mode_ctx,
DC_PRED,
s->cols * 2);
1698 memset(
s->above_y_nnz_ctx, 0,
s->sb_cols * 16);
1699 memset(
s->above_uv_nnz_ctx[0], 0,
s->sb_cols * 16 >>
s->ss_h);
1700 memset(
s->above_uv_nnz_ctx[1], 0,
s->sb_cols * 16 >>
s->ss_h);
1701 memset(
s->above_segpred_ctx, 0,
s->cols);
1706 "Failed to allocate block buffers\n");
1709 if (
s->s.h.refreshctx &&
s->s.h.parallelmode) {
1712 for (
i = 0;
i < 4;
i++) {
1713 for (j = 0; j < 2; j++)
1714 for (k = 0; k < 2; k++)
1715 for (l = 0; l < 6; l++)
1716 for (m = 0; m < 6; m++)
1717 memcpy(
s->prob_ctx[
s->s.h.framectxid].coef[
i][j][k][l][m],
1718 s->prob.coef[
i][j][k][l][m], 3);
1719 if (
s->s.h.txfmmode ==
i)
1722 s->prob_ctx[
s->s.h.framectxid].p =
s->prob.p;
1724 }
else if (!
s->s.h.refreshctx) {
1730 for (
i = 0;
i <
s->sb_rows;
i++)
1736 for (
i = 0;
i <
s->active_tile_cols;
i++) {
1737 s->td[
i].b =
s->td[
i].b_base;
1738 s->td[
i].block =
s->td[
i].block_base;
1739 s->td[
i].uvblock[0] =
s->td[
i].uvblock_base[0];
1740 s->td[
i].uvblock[1] =
s->td[
i].uvblock_base[1];
1741 s->td[
i].eob =
s->td[
i].eob_base;
1742 s->td[
i].uveob[0] =
s->td[
i].uveob_base[0];
1743 s->td[
i].uveob[1] =
s->td[
i].uveob_base[1];
1744 s->td[
i].error_info = 0;
1749 int tile_row, tile_col;
1753 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1754 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1757 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1758 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1765 if (tile_size >
size)
1788 for (
i = 1;
i <
s->s.h.tiling.tile_cols;
i++)
1789 for (j = 0; j <
sizeof(
s->td[
i].counts) /
sizeof(
unsigned); j++)
1790 ((
unsigned *)&
s->td[0].counts)[j] += ((
unsigned *)&
s->td[
i].counts)[j];
1792 if (
s->pass < 2 &&
s->s.h.refreshctx && !
s->s.h.parallelmode) {
1796 }
while (
s->pass++ == 1);
1798 if (
s->td->error_info < 0) {
1800 s->td->error_info = 0;
1811 ff_cbs_fragment_reset(&
s->current_frag);
1815 for (
int i = 0;
i < 8;
i++)
1818 if (!
s->s.h.invisible) {
1835 for (
i = 0;
i < 3;
i++)
1838 for (
i = 0;
i < 8;
i++) {
1843 ff_cbs_fragment_reset(&
s->current_frag);
1844 ff_cbs_flush(
s->cbc);
1856 s->s.h.filter.sharpness = -1;
1878 for (
int i = 0;
i < 3;
i++)
1880 for (
int i = 0;
i < 8;
i++)
1883 s->frame_extradata_pool_size = ssrc->frame_extradata_pool_size;
1886 for (
int i = 0;
i < 8;
i++)
1889 s->frame_header = ssrc->frame_header;
1892 s->s.h.invisible = ssrc->s.h.invisible;
1893 s->s.h.keyframe = ssrc->s.h.keyframe;
1894 s->s.h.intraonly = ssrc->s.h.intraonly;
1895 s->ss_v = ssrc->ss_v;
1896 s->ss_h = ssrc->ss_h;
1897 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1898 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1899 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1900 s->bytesperpixel = ssrc->bytesperpixel;
1901 s->gf_fmt = ssrc->gf_fmt;
1904 s->s.h.bpp = ssrc->s.h.bpp;
1905 s->bpp_index = ssrc->bpp_index;
1906 s->pix_fmt = ssrc->pix_fmt;
1907 memcpy(&
s->prob_ctx, &ssrc->prob_ctx,
sizeof(
s->prob_ctx));
1908 memcpy(&
s->s.h.lf_delta, &ssrc->s.h.lf_delta,
sizeof(
s->s.h.lf_delta));
1909 memcpy(&
s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1910 sizeof(
s->s.h.segmentation.feat));
1932 .bsfs =
"vp9_superframe_split",
1934 #if CONFIG_VP9_DXVA2_HWACCEL
1937 #if CONFIG_VP9_D3D11VA_HWACCEL
1940 #if CONFIG_VP9_D3D11VA2_HWACCEL
1943 #if CONFIG_VP9_D3D12VA_HWACCEL
1946 #if CONFIG_VP9_NVDEC_HWACCEL
1949 #if CONFIG_VP9_VAAPI_HWACCEL
1952 #if CONFIG_VP9_VDPAU_HWACCEL
1955 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
1958 #if CONFIG_VP9_VULKAN_HWACCEL