27 const pixel *lut = _lut;
29 dst_stride /=
sizeof(
pixel);
31 for (
int y = 0; y <
height; y++) {
32 for (
int x = 0; x <
width; x++)
43 static void FUNC(
alf_filter_luma)(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *_src, ptrdiff_t src_stride,
49 const int vb_above = vb_pos - 4;
50 const int vb_below = vb_pos + 3;
52 dst_stride /=
sizeof(
pixel);
53 src_stride /=
sizeof(
pixel);
66 pixel *dst = (
pixel *)_dst + (y +
i) * dst_stride + x;
68 const pixel *p0 =
s0 +
i * src_stride;
69 const pixel *p1 =
s1 +
i * src_stride;
70 const pixel *p2 =
s2 +
i * src_stride;
71 const pixel *p3 =
s3 +
i * src_stride;
72 const pixel *p4 =
s4 +
i * src_stride;
73 const pixel *p5 =
s5 +
i * src_stride;
74 const pixel *p6 =
s6 +
i * src_stride;
76 const int is_near_vb_above = (y +
i < vb_pos) && (y +
i >= vb_pos - 1);
77 const int is_near_vb_below = (y +
i >= vb_pos) && (y +
i <= vb_pos);
78 const int is_near_vb = is_near_vb_above || is_near_vb_below;
80 if ((y +
i < vb_pos) && ((y +
i) >= vb_above)) {
81 p1 = (y +
i == vb_pos - 1) ? p0 : p1;
82 p3 = (y +
i >= vb_pos - 2) ? p1 : p3;
83 p5 = (y +
i >= vb_pos - 3) ? p3 : p5;
85 p2 = (y +
i == vb_pos - 1) ? p0 : p2;
86 p4 = (y +
i >= vb_pos - 2) ? p2 : p4;
87 p6 = (y +
i >= vb_pos - 3) ? p4 : p6;
88 }
else if ((y +
i >= vb_pos) && ((y +
i) <= vb_below)) {
89 p2 = (y +
i == vb_pos ) ? p0 : p2;
90 p4 = (y +
i <= vb_pos + 1) ? p2 : p4;
91 p6 = (y +
i <= vb_pos + 2) ? p4 : p6;
93 p1 = (y +
i == vb_pos ) ? p0 : p1;
94 p3 = (y +
i <= vb_pos + 1) ? p1 : p3;
95 p5 = (y +
i <= vb_pos + 2) ? p3 : p5;
100 const pixel curr = *p0;
118 sum = (sum + (1 << ((
shift + 3) - 1))) >> (
shift + 3);
143 const int vb_above = vb_pos - 2;
144 const int vb_below = vb_pos + 1;
146 dst_stride /=
sizeof(
pixel);
147 src_stride /=
sizeof(
pixel);
160 pixel *dst = (
pixel *)_dst + (y +
i) * dst_stride + x;
162 const pixel *p0 =
s0 +
i * src_stride;
163 const pixel *p1 =
s1 +
i * src_stride;
164 const pixel *p2 =
s2 +
i * src_stride;
165 const pixel *p3 =
s3 +
i * src_stride;
166 const pixel *p4 =
s4 +
i * src_stride;
167 const pixel *p5 =
s5 +
i * src_stride;
168 const pixel *p6 =
s6 +
i * src_stride;
170 const int is_near_vb_above = (y +
i < vb_pos) && (y +
i >= vb_pos - 1);
171 const int is_near_vb_below = (y +
i >= vb_pos) && (y +
i <= vb_pos);
172 const int is_near_vb = is_near_vb_above || is_near_vb_below;
174 if ((y +
i < vb_pos) && ((y +
i) >= vb_above)) {
175 p1 = (y +
i == vb_pos - 1) ? p0 : p1;
176 p3 = (y +
i >= vb_pos - 2) ? p1 : p3;
177 p5 = (y +
i >= vb_pos - 3) ? p3 : p5;
179 p2 = (y +
i == vb_pos - 1) ? p0 : p2;
180 p4 = (y +
i >= vb_pos - 2) ? p2 : p4;
181 p6 = (y +
i >= vb_pos - 3) ? p4 : p6;
182 }
else if ((y +
i >= vb_pos) && ((y +
i) <= vb_below)) {
183 p2 = (y +
i == vb_pos ) ? p0 : p2;
184 p4 = (y +
i <= vb_pos + 1) ? p2 : p4;
185 p6 = (y +
i <= vb_pos + 2) ? p4 : p6;
187 p1 = (y +
i == vb_pos ) ? p0 : p1;
188 p3 = (y +
i <= vb_pos + 1) ? p1 : p3;
189 p5 = (y +
i <= vb_pos + 2) ? p3 : p5;
194 const pixel curr = *p0;
206 sum = (sum + (1 << ((
shift + 3) - 1))) >> (
shift + 3);
223 static void FUNC(
alf_filter_cc)(uint8_t *_dst, ptrdiff_t dst_stride,
const uint8_t *_luma,
const ptrdiff_t luma_stride,
224 const int width,
const int height,
const int hs,
const int vs,
const int16_t *
filter,
const int vb_pos)
226 const ptrdiff_t
stride = luma_stride /
sizeof(
pixel);
228 dst_stride /=
sizeof(
pixel);
230 for (
int y = 0; y <
height; y++) {
231 for (
int x = 0; x <
width; x++) {
233 pixel *dst = (
pixel *)_dst + y * dst_stride + x;
241 const int pos = y << vs;
242 if (!vs && (
pos == vb_pos ||
pos == vb_pos + 1))
245 if (
pos == (vb_pos - 2) ||
pos == (vb_pos + 1))
247 else if (
pos == (vb_pos - 1) ||
pos == vb_pos)
265 #define ALF_DIR_VERT 0
266 #define ALF_DIR_HORZ 1
267 #define ALF_DIR_DIGA0 2
268 #define ALF_DIR_DIGA1 3
270 static void FUNC(
alf_get_idx)(
int *class_idx,
int *transpose_idx,
const int *sum,
const int ac)
272 static const int arg_var[] = {0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4 };
274 int hv0, hv1, dir_hv, d0, d1, dir_d, hvd1, hvd0, sum_hv, dir1;
285 dir1 = (uint64_t)d1 * hv0 <= (uint64_t)hv1 * d0;
286 hvd1 = dir1 ? hv1 : d1;
287 hvd0 = dir1 ? hv0 : d0;
291 if (hvd1 * 2 > 9 * hvd0)
292 *class_idx += ((dir1 << 1) + 2) * 5;
293 else if (hvd1 > 2 * hvd0)
294 *class_idx += ((dir1 << 1) + 1) * 5;
296 *transpose_idx = dir_d * 2 + dir_hv;
300 const uint8_t *_src,
const ptrdiff_t _src_stride,
const int width,
const int height,
301 const int vb_pos,
int *gradient_tmp)
311 const ptrdiff_t src_stride = _src_stride /
sizeof(
pixel);
331 const int val0 = (*p0) << 1;
336 const int val1 = (*p1) << 1;
353 }
else if (y == vb_pos) {
362 grad = gradient_tmp + (yg + start) * gstride + xg *
ALF_NUM_DIR;
364 for (
int i = start;
i < end;
i++) {
365 for (
int j = 0; j <
size; j++) {
384 const int *class_idx,
const int *transpose_idx,
const int size,
385 const int16_t *coeff_set,
const uint8_t *clip_idx_set,
const uint8_t *class_to_filt)
388 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 },
389 { 9, 4, 10, 8, 1, 5, 11, 7, 3, 0, 2, 6 },
390 { 0, 3, 2, 1, 8, 7, 6, 5, 4, 9, 10, 11 },
391 { 9, 8, 10, 4, 3, 7, 11, 5, 1, 0, 2, 6 },
394 const int16_t clip_set[] = {
398 for (
int i = 0;
i <
size;
i++) {
403 const int idx =
index[transpose_idx[
i]][j];
404 *
coeff++ = src_coeff[idx];
405 *
clip++ = clip_set[clip_idx[idx]];
416 #define P7 pix[-8 * xstride]
417 #define P6 pix[-7 * xstride]
418 #define P5 pix[-6 * xstride]
419 #define P4 pix[-5 * xstride]
420 #define P3 pix[-4 * xstride]
421 #define P2 pix[-3 * xstride]
422 #define P1 pix[-2 * xstride]
423 #define P0 pix[-1 * xstride]
424 #define Q0 pix[0 * xstride]
425 #define Q1 pix[1 * xstride]
426 #define Q2 pix[2 * xstride]
427 #define Q3 pix[3 * xstride]
428 #define Q4 pix[4 * xstride]
429 #define Q5 pix[5 * xstride]
430 #define Q6 pix[6 * xstride]
431 #define Q7 pix[7 * xstride]
432 #define P(x) pix[(-(x)-1) * xstride]
433 #define Q(x) pix[(x) * xstride]
436 #define TP7 pix[-8 * xstride + 3 * ystride]
437 #define TP6 pix[-7 * xstride + 3 * ystride]
438 #define TP5 pix[-6 * xstride + 3 * ystride]
439 #define TP4 pix[-5 * xstride + 3 * ystride]
440 #define TP3 pix[-4 * xstride + 3 * ystride]
441 #define TP2 pix[-3 * xstride + 3 * ystride]
442 #define TP1 pix[-2 * xstride + 3 * ystride]
443 #define TP0 pix[-1 * xstride + 3 * ystride]
444 #define TQ0 pix[0 * xstride + 3 * ystride]
445 #define TQ1 pix[1 * xstride + 3 * ystride]
446 #define TQ2 pix[2 * xstride + 3 * ystride]
447 #define TQ3 pix[3 * xstride + 3 * ystride]
448 #define TQ4 pix[4 * xstride + 3 * ystride]
449 #define TQ5 pix[5 * xstride + 3 * ystride]
450 #define TQ6 pix[6 * xstride + 3 * ystride]
451 #define TQ7 pix[7 * xstride + 3 * ystride]
452 #define TP(x) pix[(-(x)-1) * xstride + 3 * ystride]
453 #define TQ(x) pix[(x) * xstride + 3 * ystride]
455 #define FP3 pix[-4 * xstride + 1 * ystride]
456 #define FP2 pix[-3 * xstride + 1 * ystride]
457 #define FP1 pix[-2 * xstride + 1 * ystride]
458 #define FP0 pix[-1 * xstride + 1 * ystride]
459 #define FQ0 pix[0 * xstride + 1 * ystride]
460 #define FQ1 pix[1 * xstride + 1 * ystride]
461 #define FQ2 pix[2 * xstride + 1 * ystride]
462 #define FQ3 pix[3 * xstride + 1 * ystride]
467 const uint8_t no_p,
const uint8_t no_q,
const uint8_t max_len_p,
const uint8_t max_len_q)
469 for (
int d = 0;
d < 4;
d++) {
485 if (max_len_p == 5 && max_len_q == 5)
486 m = (p4 + p3 + 2 * (p2 + p1 + p0 +
q0 +
q1 + q2) + q3 + q4 + 8) >> 4;
487 else if (max_len_p == max_len_q)
488 m = (p6 + p5 + p4 + p3 + p2 + p1 + 2 * (p0 +
q0) +
q1 + q2 + q3 + q4 + q5 + q6 + 8) >> 4;
489 else if (max_len_p + max_len_q == 12)
490 m = (p5 + p4 + p3 + p2 + 2 * (p1 + p0 +
q0 +
q1) + q2 + q3 + q4 + q5 + 8) >> 4;
491 else if (max_len_p + max_len_q == 8)
492 m = (p3 + p2 + p1 + p0 +
q0 +
q1 + q2 + q3 + 4) >> 3;
493 else if (max_len_q == 7)
494 m = (2 * (p2 + p1 + p0 +
q0) + p0 + p1 +
q1 + q2 + q3 + q4 + q5 + q6 + 8) >> 4;
496 m = (p6 + p5 + p4 + p3 + p2 + p1 + 2 * (q2 +
q1 +
q0 + p0) +
q0 +
q1 + 8) >> 4;
498 const int refp = (
P(max_len_p) +
P(max_len_p - 1) + 1) >> 1;
499 if (max_len_p == 3) {
500 P0 = p0 +
av_clip(((m * 53 + refp * 11 + 32) >> 6) - p0, -(
tc * 6 >> 1), (
tc * 6 >> 1));
501 P1 = p1 +
av_clip(((m * 32 + refp * 32 + 32) >> 6) - p1, -(
tc * 4 >> 1), (
tc * 4 >> 1));
502 P2 = p2 +
av_clip(((m * 11 + refp * 53 + 32) >> 6) - p2, -(
tc * 2 >> 1), (
tc * 2 >> 1));
503 }
else if (max_len_p == 5) {
504 P0 = p0 +
av_clip(((m * 58 + refp * 6 + 32) >> 6) - p0, -(
tc * 6 >> 1), (
tc * 6 >> 1));
505 P1 = p1 +
av_clip(((m * 45 + refp * 19 + 32) >> 6) - p1, -(
tc * 5 >> 1), (
tc * 5 >> 1));
506 P2 = p2 +
av_clip(((m * 32 + refp * 32 + 32) >> 6) - p2, -(
tc * 4 >> 1), (
tc * 4 >> 1));
507 P3 = p3 +
av_clip(((m * 19 + refp * 45 + 32) >> 6) - p3, -(
tc * 3 >> 1), (
tc * 3 >> 1));
508 P4 = p4 +
av_clip(((m * 6 + refp * 58 + 32) >> 6) - p4, -(
tc * 2 >> 1), (
tc * 2 >> 1));
510 P0 = p0 +
av_clip(((m * 59 + refp * 5 + 32) >> 6) - p0, -(
tc * 6 >> 1), (
tc * 6 >> 1));
511 P1 = p1 +
av_clip(((m * 50 + refp * 14 + 32) >> 6) - p1, -(
tc * 5 >> 1), (
tc * 5 >> 1));
512 P2 = p2 +
av_clip(((m * 41 + refp * 23 + 32) >> 6) - p2, -(
tc * 4 >> 1), (
tc * 4 >> 1));
513 P3 = p3 +
av_clip(((m * 32 + refp * 32 + 32) >> 6) - p3, -(
tc * 3 >> 1), (
tc * 3 >> 1));
514 P4 = p4 +
av_clip(((m * 23 + refp * 41 + 32) >> 6) - p4, -(
tc * 2 >> 1), (
tc * 2 >> 1));
515 P5 = p5 +
av_clip(((m * 14 + refp * 50 + 32) >> 6) - p5, -(
tc * 1 >> 1), (
tc * 1 >> 1));
516 P6 = p6 +
av_clip(((m * 5 + refp * 59 + 32) >> 6) - p6, -(
tc * 1 >> 1), (
tc * 1 >> 1));
520 const int refq = (
Q(max_len_q) +
Q(max_len_q - 1) + 1) >> 1;
521 if (max_len_q == 3) {
522 Q0 =
q0 +
av_clip(((m * 53 + refq * 11 + 32) >> 6) -
q0, -(
tc * 6 >> 1), (
tc * 6 >> 1));
523 Q1 =
q1 +
av_clip(((m * 32 + refq * 32 + 32) >> 6) -
q1, -(
tc * 4 >> 1), (
tc * 4 >> 1));
524 Q2 = q2 +
av_clip(((m * 11 + refq * 53 + 32) >> 6) - q2, -(
tc * 2 >> 1), (
tc * 2 >> 1));
525 }
else if (max_len_q == 5) {
526 Q0 =
q0 +
av_clip(((m * 58 + refq * 6 + 32) >> 6) -
q0, -(
tc * 6 >> 1), (
tc * 6 >> 1));
527 Q1 =
q1 +
av_clip(((m * 45 + refq * 19 + 32) >> 6) -
q1, -(
tc * 5 >> 1), (
tc * 5 >> 1));
528 Q2 = q2 +
av_clip(((m * 32 + refq * 32 + 32) >> 6) - q2, -(
tc * 4 >> 1), (
tc * 4 >> 1));
529 Q3 = q3 +
av_clip(((m * 19 + refq * 45 + 32) >> 6) - q3, -(
tc * 3 >> 1), (
tc * 3 >> 1));
530 Q4 = q4 +
av_clip(((m * 6 + refq * 58 + 32) >> 6) - q4, -(
tc * 2 >> 1), (
tc * 2 >> 1));
532 Q0 =
q0 +
av_clip(((m * 59 + refq * 5 + 32) >> 6) -
q0, -(
tc * 6 >> 1), (
tc * 6 >> 1));
533 Q1 =
q1 +
av_clip(((m * 50 + refq * 14 + 32) >> 6) -
q1, -(
tc * 5 >> 1), (
tc * 5 >> 1));
534 Q2 = q2 +
av_clip(((m * 41 + refq * 23 + 32) >> 6) - q2, -(
tc * 4 >> 1), (
tc * 4 >> 1));
535 Q3 = q3 +
av_clip(((m * 32 + refq * 32 + 32) >> 6) - q3, -(
tc * 3 >> 1), (
tc * 3 >> 1));
536 Q4 = q4 +
av_clip(((m * 23 + refq * 41 + 32) >> 6) - q4, -(
tc * 2 >> 1), (
tc * 2 >> 1));
537 Q5 = q5 +
av_clip(((m * 14 + refq * 50 + 32) >> 6) - q5, -(
tc * 1 >> 1), (
tc * 1 >> 1));
538 Q6 = q6 +
av_clip(((m * 5 + refq * 59 + 32) >> 6) - q6, -(
tc * 1 >> 1), (
tc * 1 >> 1));
547 const int32_t *_beta,
const int32_t *_tc,
const uint8_t *_no_p,
const uint8_t *_no_q,
548 const uint8_t *_max_len_p,
const uint8_t *_max_len_q,
const int hor_ctu_edge)
550 const ptrdiff_t xstride = _xstride /
sizeof(
pixel);
551 const ptrdiff_t ystride = _ystride /
sizeof(
pixel);
553 for (
int i = 0;
i < 2;
i++) {
565 const int d0 = dp0 + dq0;
566 const int d3 = dp3 + dq3;
567 const int tc25 = ((
tc * 5 + 1) >> 1);
569 const int no_p = _no_p[
i];
570 const int no_q = _no_q[
i];
572 int max_len_p = _max_len_p[
i];
573 int max_len_q = _max_len_q[
i];
575 const int large_p = (max_len_p > 3 && !hor_ctu_edge);
576 const int large_q = max_len_q > 3;
579 const int beta_3 = beta >> 3;
580 const int beta_2 = beta >> 2;
582 if (large_p || large_q) {
583 const int dp0l = large_p ? ((dp0 +
abs(
P5 - 2 *
P4 +
P3) + 1) >> 1) : dp0;
584 const int dq0l = large_q ? ((dq0 +
abs(
Q5 - 2 *
Q4 +
Q3) + 1) >> 1) : dq0;
585 const int dp3l = large_p ? ((dp3 +
abs(
TP5 - 2 *
TP4 +
TP3) + 1) >> 1) : dp3;
586 const int dq3l = large_q ? ((dq3 +
abs(
TQ5 - 2 *
TQ4 +
TQ3) + 1) >> 1) : dq3;
587 const int d0l = dp0l + dq0l;
588 const int d3l = dp3l + dq3l;
589 const int beta53 = beta * 3 >> 5;
590 const int beta_4 = beta >> 4;
591 max_len_p = large_p ? max_len_p : 3;
592 max_len_q = large_q ? max_len_q : 3;
594 if (d0l + d3l < beta) {
599 const int sp0 = large_p ? ((sp0l +
abs(
P3 -
P(max_len_p)) + 1) >> 1) : sp0l;
600 const int sp3 = large_p ? ((sp3l +
abs(
TP3 -
TP(max_len_p)) + 1) >> 1) : sp3l;
601 const int sq0 = large_q ? ((sq0l +
abs(
Q3 -
Q(max_len_q)) + 1) >> 1) : sq0l;
602 const int sq3 = large_q ? ((sq3l +
abs(
TQ3 -
TQ(max_len_q)) + 1) >> 1) : sq3l;
603 if (sp0 + sq0 < beta53 &&
abs(
P0 -
Q0) < tc25 &&
604 sp3 + sq3 < beta53 &&
abs(
TP0 -
TQ0) < tc25 &&
605 (d0l << 1) < beta_4 && (d3l << 1) < beta_4) {
611 if (d0 + d3 < beta) {
612 if (max_len_p > 2 && max_len_q > 2 &&
615 (d0 << 1) < beta_2 && (d3 << 1) < beta_2) {
620 if (max_len_p > 1 && max_len_q > 1) {
621 if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
623 if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
634 const int size,
const int32_t tc,
const uint8_t no_p,
const uint8_t no_q)
636 for (
int d = 0;
d <
size;
d++) {
648 P2 =
av_clip((3 * p3 + 2 * p2 + p1 + p0 +
q0 + 4) >> 3, p2 -
tc, p2 +
tc );
660 const int size,
const int32_t tc,
const uint8_t no_p,
const uint8_t no_q)
662 for (
int d = 0;
d <
size;
d++) {
682 const int32_t *_beta,
const int32_t *_tc,
const uint8_t *_no_p,
const uint8_t *_no_q,
683 const uint8_t *_max_len_p,
const uint8_t *_max_len_q,
const int shift)
685 const ptrdiff_t xstride = _xstride /
sizeof(
pixel);
686 const ptrdiff_t ystride = _ystride /
sizeof(
pixel);
688 const int end = 8 /
size;
690 for (
int i = 0;
i < end;
i++) {
698 const uint8_t no_p = _no_p[
i];
699 const uint8_t no_q = _no_q[
i];
702 const int beta_3 = beta >> 3;
703 const int beta_2 = beta >> 2;
705 const int tc25 = ((
tc * 5 + 1) >> 1);
707 uint8_t max_len_p = _max_len_p[
i];
708 uint8_t max_len_q = _max_len_q[
i];
710 if (!max_len_p || !max_len_q)
715 const int p2n = max_len_p == 1 ? p1n : (
shift ?
FP2 :
TP2);
720 const int p3 = max_len_p == 1 ?
P1 :
P3;
721 const int p2 = max_len_p == 1 ?
P1 :
P2;
724 const int dp0 =
abs(p2 - 2 * p1 + p0);
727 const int dp1 =
abs(p2n - 2 * p1n + p0n);
728 const int dq1 =
abs(q2n - 2 * q1n + q0n);
729 const int d0 = dp0 + dq0;
730 const int d1 = dp1 + dq1;
732 if (d0 + d1 < beta) {
733 const int p3n = max_len_p == 1 ? p1n : (
shift ?
FP3 :
TP3);
735 const int dsam0 = (d0 << 1) < beta_2 && (
abs(p3 - p0) +
abs(
Q0 -
Q3) < beta_3) &&
737 const int dsam1 = (d1 << 1) < beta_2 && (
abs(p3n - p0n) +
abs(q0n - q3n) < beta_3) &&
738 abs(p0n - q0n) < tc25;
739 if (!dsam0 || !dsam1)
740 max_len_p = max_len_q = 1;
742 max_len_p = max_len_q = 1;
746 if (max_len_p == 3 && max_len_q == 3)
748 else if (max_len_q == 3)
757 const int32_t *beta,
const int32_t *
tc,
const uint8_t *no_p,
const uint8_t *no_q,
758 const uint8_t *max_len_p,
const uint8_t *max_len_q,
int shift)
761 no_p, no_q, max_len_p, max_len_q,
shift);
765 const int32_t *beta,
const int32_t *
tc,
const uint8_t *no_p,
const uint8_t *no_q,
766 const uint8_t *max_len_p,
const uint8_t *max_len_q,
int shift)
769 no_p, no_q, max_len_p, max_len_q,
shift);
773 const int32_t *beta,
const int32_t *
tc,
const uint8_t *no_p,
const uint8_t *no_q,
774 const uint8_t *max_len_p,
const uint8_t *max_len_q,
const int hor_ctu_edge)
777 no_p, no_q, max_len_p, max_len_q, hor_ctu_edge);
781 const int32_t *beta,
const int32_t *
tc,
const uint8_t *no_p,
const uint8_t *no_q,
782 const uint8_t *max_len_p,
const uint8_t *max_len_q,
const int hor_ctu_edge)
785 no_p, no_q, max_len_p, max_len_q, hor_ctu_edge);
791 const ptrdiff_t xstride = _xstride /
sizeof(
pixel);
792 const ptrdiff_t ystride = _ystride /
sizeof(
pixel);