FFmpeg
dsp.h
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2024 Zhao Zhili
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef AVCODEC_AARCH64_H26X_DSP_H
22 #define AVCODEC_AARCH64_H26X_DSP_H
23 
24 #include <stddef.h>
25 #include <stdint.h>
26 
27 void ff_h26x_sao_band_filter_8x8_8_neon(uint8_t *_dst, const uint8_t *_src,
28  ptrdiff_t stride_dst, ptrdiff_t stride_src,
29  const int16_t *sao_offset_val, int sao_left_class,
30  int width, int height);
31 void ff_h26x_sao_band_filter_16x16_8_neon(uint8_t *_dst, const uint8_t *_src,
32  ptrdiff_t stride_dst, ptrdiff_t stride_src,
33  const int16_t *sao_offset_val, int sao_left_class,
34  int width, int height);
35 void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst,
36  const int16_t *sao_offset_val, int eo, int width, int height);
37 void ff_hevc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst,
38  const int16_t *sao_offset_val, int eo, int width, int height);
39 
40 void ff_vvc_sao_edge_filter_16x16_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst,
41  const int16_t *sao_offset_val, int eo, int width, int height);
42 void ff_vvc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst,
43  const int16_t *sao_offset_val, int eo, int width, int height);
44 
45 #define NEON8_FNPROTO_PARTIAL_6(fn, args, ext) \
46  void ff_hevc_put_hevc_##fn##_h4_8_neon##ext args; \
47  void ff_hevc_put_hevc_##fn##_h6_8_neon##ext args; \
48  void ff_hevc_put_hevc_##fn##_h8_8_neon##ext args; \
49  void ff_hevc_put_hevc_##fn##_h12_8_neon##ext args; \
50  void ff_hevc_put_hevc_##fn##_h16_8_neon##ext args; \
51  void ff_hevc_put_hevc_##fn##_h32_8_neon##ext args;
52 
53 NEON8_FNPROTO_PARTIAL_6(qpel, (int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height,
54  intptr_t mx, intptr_t my, int width),)
55 
56 NEON8_FNPROTO_PARTIAL_6(qpel_uni, (uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
57  ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width),)
58 
59 NEON8_FNPROTO_PARTIAL_6(qpel_bi, (uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
60  ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t
61  mx, intptr_t my, int width),)
62 
63 #define NEON8_FNPROTO(fn, args, ext) \
64  void ff_hevc_put_hevc_##fn##4_8_neon##ext args; \
65  void ff_hevc_put_hevc_##fn##6_8_neon##ext args; \
66  void ff_hevc_put_hevc_##fn##8_8_neon##ext args; \
67  void ff_hevc_put_hevc_##fn##12_8_neon##ext args; \
68  void ff_hevc_put_hevc_##fn##16_8_neon##ext args; \
69  void ff_hevc_put_hevc_##fn##24_8_neon##ext args; \
70  void ff_hevc_put_hevc_##fn##32_8_neon##ext args; \
71  void ff_hevc_put_hevc_##fn##48_8_neon##ext args; \
72  void ff_hevc_put_hevc_##fn##64_8_neon##ext args
73 
74 #define NEON8_FNPROTO_PARTIAL_4(fn, args, ext) \
75  void ff_hevc_put_hevc_##fn##4_8_neon##ext args; \
76  void ff_hevc_put_hevc_##fn##8_8_neon##ext args; \
77  void ff_hevc_put_hevc_##fn##16_8_neon##ext args; \
78  void ff_hevc_put_hevc_##fn##64_8_neon##ext args
79 
80 #define NEON8_FNPROTO_PARTIAL_5(fn, args, ext) \
81  void ff_hevc_put_hevc_##fn##4_8_neon##ext args; \
82  void ff_hevc_put_hevc_##fn##8_8_neon##ext args; \
83  void ff_hevc_put_hevc_##fn##16_8_neon##ext args; \
84  void ff_hevc_put_hevc_##fn##32_8_neon##ext args; \
85  void ff_hevc_put_hevc_##fn##64_8_neon##ext args
86 
87 NEON8_FNPROTO(pel_pixels, (int16_t *dst,
88  const uint8_t *src, ptrdiff_t srcstride,
89  int height, intptr_t mx, intptr_t my, int width),);
90 
91 NEON8_FNPROTO(pel_bi_pixels, (uint8_t *dst, ptrdiff_t dststride,
92  const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
93  int height, intptr_t mx, intptr_t my, int width),);
94 
95 NEON8_FNPROTO(pel_bi_w_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
96  const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
97  int height, int denom, int wx0, int wx1,
98  int ox0, int ox1, intptr_t mx, intptr_t my, int width),);
99 
100 NEON8_FNPROTO(epel_bi_h, (uint8_t *dst, ptrdiff_t dststride,
101  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
102  int height, intptr_t mx, intptr_t my, int width),);
103 
104 NEON8_FNPROTO(epel_bi_v, (uint8_t *dst, ptrdiff_t dststride,
105  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
106  int height, intptr_t mx, intptr_t my, int width),);
107 
108 NEON8_FNPROTO(epel_bi_hv, (uint8_t *dst, ptrdiff_t dststride,
109  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
110  int height, intptr_t mx, intptr_t my, int width),);
111 
112 NEON8_FNPROTO(epel_bi_hv, (uint8_t *dst, ptrdiff_t dststride,
113  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
114  int height, intptr_t mx, intptr_t my, int width), _i8mm);
115 
116 NEON8_FNPROTO(epel_v, (int16_t *dst,
117  const uint8_t *src, ptrdiff_t srcstride,
118  int height, intptr_t mx, intptr_t my, int width),);
119 
120 NEON8_FNPROTO(pel_uni_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
121  const uint8_t *_src, ptrdiff_t _srcstride,
122  int height, intptr_t mx, intptr_t my, int width),);
123 
124 NEON8_FNPROTO(pel_uni_w_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
125  const uint8_t *_src, ptrdiff_t _srcstride,
126  int height, int denom, int wx, int ox,
127  intptr_t mx, intptr_t my, int width),);
128 
129 NEON8_FNPROTO(epel_uni_v, (uint8_t *dst, ptrdiff_t dststride,
130  const uint8_t *src, ptrdiff_t srcstride,
131  int height, intptr_t mx, intptr_t my, int width),);
132 
133 NEON8_FNPROTO(epel_uni_hv, (uint8_t *dst, ptrdiff_t _dststride,
134  const uint8_t *src, ptrdiff_t srcstride,
135  int height, intptr_t mx, intptr_t my, int width),);
136 
137 NEON8_FNPROTO(epel_uni_hv, (uint8_t *dst, ptrdiff_t _dststride,
138  const uint8_t *src, ptrdiff_t srcstride,
139  int height, intptr_t mx, intptr_t my, int width), _i8mm);
140 
141 NEON8_FNPROTO(epel_uni_w_v, (uint8_t *_dst, ptrdiff_t _dststride,
142  const uint8_t *_src, ptrdiff_t _srcstride,
143  int height, int denom, int wx, int ox,
144  intptr_t mx, intptr_t my, int width),);
145 
146 NEON8_FNPROTO_PARTIAL_4(qpel_uni_w_v, (uint8_t *_dst, ptrdiff_t _dststride,
147  const uint8_t *_src, ptrdiff_t _srcstride,
148  int height, int denom, int wx, int ox,
149  intptr_t mx, intptr_t my, int width),);
150 
151 NEON8_FNPROTO(epel_h, (int16_t *dst,
152  const uint8_t *_src, ptrdiff_t _srcstride,
153  int height, intptr_t mx, intptr_t my, int width),);
154 
155 NEON8_FNPROTO(epel_hv, (int16_t *dst,
156  const uint8_t *src, ptrdiff_t srcstride,
157  int height, intptr_t mx, intptr_t my, int width), );
158 
159 NEON8_FNPROTO(epel_h, (int16_t *dst,
160  const uint8_t *_src, ptrdiff_t _srcstride,
161  int height, intptr_t mx, intptr_t my, int width), _i8mm);
162 
163 NEON8_FNPROTO(epel_hv, (int16_t *dst,
164  const uint8_t *src, ptrdiff_t srcstride,
165  int height, intptr_t mx, intptr_t my, int width), _i8mm);
166 
167 NEON8_FNPROTO(epel_uni_w_h, (uint8_t *_dst, ptrdiff_t _dststride,
168  const uint8_t *_src, ptrdiff_t _srcstride,
169  int height, int denom, int wx, int ox,
170  intptr_t mx, intptr_t my, int width),);
171 
172 NEON8_FNPROTO(epel_uni_w_h, (uint8_t *_dst, ptrdiff_t _dststride,
173  const uint8_t *_src, ptrdiff_t _srcstride,
174  int height, int denom, int wx, int ox,
175  intptr_t mx, intptr_t my, int width), _i8mm);
176 
177 NEON8_FNPROTO(qpel_h, (int16_t *dst,
178  const uint8_t *_src, ptrdiff_t _srcstride,
179  int height, intptr_t mx, intptr_t my, int width), _i8mm);
180 
181 NEON8_FNPROTO(qpel_v, (int16_t *dst,
182  const uint8_t *src, ptrdiff_t srcstride,
183  int height, intptr_t mx, intptr_t my, int width),);
184 
185 NEON8_FNPROTO(qpel_hv, (int16_t *dst,
186  const uint8_t *src, ptrdiff_t srcstride,
187  int height, intptr_t mx, intptr_t my, int width),);
188 
189 NEON8_FNPROTO(qpel_hv, (int16_t *dst,
190  const uint8_t *src, ptrdiff_t srcstride,
191  int height, intptr_t mx, intptr_t my, int width), _i8mm);
192 
193 NEON8_FNPROTO(qpel_uni_v, (uint8_t *dst, ptrdiff_t dststride,
194  const uint8_t *src, ptrdiff_t srcstride,
195  int height, intptr_t mx, intptr_t my, int width),);
196 
197 NEON8_FNPROTO(qpel_uni_hv, (uint8_t *dst, ptrdiff_t dststride,
198  const uint8_t *src, ptrdiff_t srcstride,
199  int height, intptr_t mx, intptr_t my, int width),);
200 
201 NEON8_FNPROTO(qpel_uni_hv, (uint8_t *dst, ptrdiff_t dststride,
202  const uint8_t *src, ptrdiff_t srcstride,
203  int height, intptr_t mx, intptr_t my, int width), _i8mm);
204 
205 NEON8_FNPROTO(qpel_uni_w_h, (uint8_t *_dst, ptrdiff_t _dststride,
206  const uint8_t *_src, ptrdiff_t _srcstride,
207  int height, int denom, int wx, int ox,
208  intptr_t mx, intptr_t my, int width),);
209 
210 NEON8_FNPROTO(qpel_uni_w_h, (uint8_t *_dst, ptrdiff_t _dststride,
211  const uint8_t *_src, ptrdiff_t _srcstride,
212  int height, int denom, int wx, int ox,
213  intptr_t mx, intptr_t my, int width), _i8mm);
214 
215 NEON8_FNPROTO(epel_uni_w_hv, (uint8_t *_dst, ptrdiff_t _dststride,
216  const uint8_t *_src, ptrdiff_t _srcstride,
217  int height, int denom, int wx, int ox,
218  intptr_t mx, intptr_t my, int width),);
219 
220 NEON8_FNPROTO(epel_uni_w_hv, (uint8_t *_dst, ptrdiff_t _dststride,
221  const uint8_t *_src, ptrdiff_t _srcstride,
222  int height, int denom, int wx, int ox,
223  intptr_t mx, intptr_t my, int width), _i8mm);
224 
225 NEON8_FNPROTO_PARTIAL_5(qpel_uni_w_hv, (uint8_t *_dst, ptrdiff_t _dststride,
226  const uint8_t *_src, ptrdiff_t _srcstride,
227  int height, int denom, int wx, int ox,
228  intptr_t mx, intptr_t my, int width),);
229 
230 NEON8_FNPROTO_PARTIAL_5(qpel_uni_w_hv, (uint8_t *_dst, ptrdiff_t _dststride,
231  const uint8_t *_src, ptrdiff_t _srcstride,
232  int height, int denom, int wx, int ox,
233  intptr_t mx, intptr_t my, int width), _i8mm);
234 
235 NEON8_FNPROTO(qpel_bi_v, (uint8_t *dst, ptrdiff_t dststride,
236  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
237  int height, intptr_t mx, intptr_t my, int width),);
238 
239 NEON8_FNPROTO(qpel_bi_hv, (uint8_t *dst, ptrdiff_t dststride,
240  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
241  int height, intptr_t mx, intptr_t my, int width),);
242 
243 NEON8_FNPROTO(qpel_bi_hv, (uint8_t *dst, ptrdiff_t dststride,
244  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
245  int height, intptr_t mx, intptr_t my, int width), _i8mm);
246 
247 #undef NEON8_FNPROTO_PARTIAL_4
248 #define NEON8_FNPROTO_PARTIAL_4(fn, args, ext) \
249  void ff_vvc_put_##fn##_h4_8_neon##ext args; \
250  void ff_vvc_put_##fn##_h8_8_neon##ext args; \
251  void ff_vvc_put_##fn##_h16_8_neon##ext args; \
252  void ff_vvc_put_##fn##_h32_8_neon##ext args;
253 
254 NEON8_FNPROTO_PARTIAL_4(qpel, (int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height,
255  const int8_t *hf, const int8_t *vf, int width),)
256 
257 NEON8_FNPROTO_PARTIAL_4(qpel_uni, (uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
258  ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width),)
259 
260 NEON8_FNPROTO_PARTIAL_4(epel, (int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height,
261  const int8_t *hf, const int8_t *vf, int width),)
262 
263 #undef NEON8_FNPROTO_PARTIAL_6
264 #define NEON8_FNPROTO_PARTIAL_6(fn, args, ext) \
265  void ff_vvc_put_##fn##4_8_neon##ext args; \
266  void ff_vvc_put_##fn##8_8_neon##ext args; \
267  void ff_vvc_put_##fn##16_8_neon##ext args; \
268  void ff_vvc_put_##fn##32_8_neon##ext args; \
269  void ff_vvc_put_##fn##64_8_neon##ext args; \
270  void ff_vvc_put_##fn##128_8_neon##ext args
271 
272 NEON8_FNPROTO_PARTIAL_6(pel_pixels, (int16_t *dst,
273  const uint8_t *src, ptrdiff_t srcstride, int height,
274  const int8_t *hf, const int8_t *vf, int width),);
275 
276 NEON8_FNPROTO_PARTIAL_6(pel_uni_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
277  const uint8_t *_src, ptrdiff_t _srcstride, int height,
278  const int8_t *hf, const int8_t *vf, int width),);
279 
280 NEON8_FNPROTO_PARTIAL_6(pel_uni_w_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
281  const uint8_t *_src, ptrdiff_t _srcstride,
282  int height, int denom, int wx, int ox,
283  const int8_t *hf, const int8_t *vf, int width),);
284 
285 NEON8_FNPROTO_PARTIAL_6(qpel_h, (int16_t *dst,
286  const uint8_t *_src, ptrdiff_t _srcstride, int height,
287  const int8_t *hf, const int8_t *vf, int width), _i8mm);
288 
289 NEON8_FNPROTO_PARTIAL_6(epel_h, (int16_t *dst,
290  const uint8_t *_src, ptrdiff_t _srcstride, int height,
291  const int8_t *hf, const int8_t *vf, int width), _i8mm);
292 
293 void ff_vvc_put_qpel_v4_8_neon(int16_t *dst, const uint8_t *_src,
294  ptrdiff_t _srcstride, int height,
295  const int8_t *hf, const int8_t *vf, int width);
296 
297 void ff_vvc_put_qpel_v8_8_neon(int16_t *dst, const uint8_t *_src,
298  ptrdiff_t _srcstride, int height,
299  const int8_t *hf, const int8_t *vf, int width);
300 
301 NEON8_FNPROTO_PARTIAL_6(qpel_hv, (int16_t *dst,
302  const uint8_t *src, ptrdiff_t srcstride, int height,
303  const int8_t *hf, const int8_t *vf, int width),);
304 
305 NEON8_FNPROTO_PARTIAL_6(qpel_hv, (int16_t *dst,
306  const uint8_t *src, ptrdiff_t srcstride, int height,
307  const int8_t *hf, const int8_t *vf, int width), _i8mm);
308 
309 NEON8_FNPROTO_PARTIAL_6(epel_hv, (int16_t *dst,
310  const uint8_t *src, ptrdiff_t srcstride, int height,
311  const int8_t *hf, const int8_t *vf, int width),);
312 
313 NEON8_FNPROTO_PARTIAL_6(epel_hv, (int16_t *dst,
314  const uint8_t *src, ptrdiff_t srcstride, int height,
315  const int8_t *hf, const int8_t *vf, int width), _i8mm);
316 
317 #endif
_dst
uint8_t * _dst
Definition: dsp.h:56
ff_hevc_sao_edge_filter_16x16_8_neon
void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, const int16_t *sao_offset_val, int eo, int width, int height)
src
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t const uint8_t * src
Definition: dsp.h:88
ff_vvc_put_qpel_v8_8_neon
void ff_vvc_put_qpel_v8_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width)
height
uint8_t ptrdiff_t const uint8_t ptrdiff_t int height
Definition: dsp.h:57
ff_hevc_sao_edge_filter_8x8_8_neon
void ff_hevc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, const int16_t *sao_offset_val, int eo, int width, int height)
_src
uint8_t ptrdiff_t const uint8_t * _src
Definition: dsp.h:56
mx
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t mx
Definition: dsp.h:57
_srcstride
uint8_t ptrdiff_t const uint8_t ptrdiff_t _srcstride
Definition: dsp.h:57
ff_vvc_put_qpel_v4_8_neon
void ff_vvc_put_qpel_v4_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width)
ff_h26x_sao_band_filter_16x16_8_neon
void ff_h26x_sao_band_filter_16x16_8_neon(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, const int16_t *sao_offset_val, int sao_left_class, int width, int height)
my
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t my
Definition: dsp.h:57
srcstride
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t const uint8_t ptrdiff_t srcstride
Definition: dsp.h:88
NEON8_FNPROTO
#define NEON8_FNPROTO(fn, args, ext)
width
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int width
Definition: dsp.h:57
_dststride
uint8_t ptrdiff_t _dststride
Definition: dsp.h:56
ff_vvc_sao_edge_filter_8x8_8_neon
void ff_vvc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, const int16_t *sao_offset_val, int eo, int width, int height)
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
ff_h26x_sao_band_filter_8x8_8_neon
void ff_h26x_sao_band_filter_8x8_8_neon(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, const int16_t *sao_offset_val, int sao_left_class, int width, int height)
NEON8_FNPROTO_PARTIAL_5
#define NEON8_FNPROTO_PARTIAL_5(fn, args, ext)
NEON8_FNPROTO_PARTIAL_4
#define NEON8_FNPROTO_PARTIAL_4(fn, args, ext)
Definition: dsp.h:248
src2
const pixel * src2
Definition: h264pred_template.c:421
hf
uint8_t ptrdiff_t const uint8_t ptrdiff_t int const int8_t * hf
Definition: dsp.h:258
NEON8_FNPROTO_PARTIAL_6
#define NEON8_FNPROTO_PARTIAL_6(fn, args, ext)
Definition: dsp.h:45
ff_vvc_sao_edge_filter_16x16_8_neon
void ff_vvc_sao_edge_filter_16x16_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, const int16_t *sao_offset_val, int eo, int width, int height)
vf
uint8_t ptrdiff_t const uint8_t ptrdiff_t int const int8_t const int8_t * vf
Definition: dsp.h:258