FFmpeg
swscale.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "config.h"
20 #include "libavutil/attributes.h"
21 #include "libswscale/swscale.h"
23 #include "libavutil/aarch64/cpu.h"
24 
25 void ff_hscale16to15_4_neon_asm(int shift, int16_t *_dst, int dstW,
26  const uint8_t *_src, const int16_t *filter,
27  const int32_t *filterPos, int filterSize);
28 void ff_hscale16to15_X8_neon_asm(int shift, int16_t *_dst, int dstW,
29  const uint8_t *_src, const int16_t *filter,
30  const int32_t *filterPos, int filterSize);
31 void ff_hscale16to15_X4_neon_asm(int shift, int16_t *_dst, int dstW,
32  const uint8_t *_src, const int16_t *filter,
33  const int32_t *filterPos, int filterSize);
34 void ff_hscale16to19_4_neon_asm(int shift, int16_t *_dst, int dstW,
35  const uint8_t *_src, const int16_t *filter,
36  const int32_t *filterPos, int filterSize);
37 void ff_hscale16to19_X8_neon_asm(int shift, int16_t *_dst, int dstW,
38  const uint8_t *_src, const int16_t *filter,
39  const int32_t *filterPos, int filterSize);
40 void ff_hscale16to19_X4_neon_asm(int shift, int16_t *_dst, int dstW,
41  const uint8_t *_src, const int16_t *filter,
42  const int32_t *filterPos, int filterSize);
43 
44 static void ff_hscale16to15_4_neon(SwsContext *c, int16_t *_dst, int dstW,
45  const uint8_t *_src, const int16_t *filter,
46  const int32_t *filterPos, int filterSize)
47 {
48  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
49  int sh = desc->comp[0].depth - 1;
50 
51  if (sh<15) {
52  sh = isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8 ? 13 : (desc->comp[0].depth - 1);
53  } else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
54  sh = 16 - 1;
55  }
56  ff_hscale16to15_4_neon_asm(sh, _dst, dstW, _src, filter, filterPos, filterSize);
57 
58 }
59 
60 static void ff_hscale16to15_X8_neon(SwsContext *c, int16_t *_dst, int dstW,
61  const uint8_t *_src, const int16_t *filter,
62  const int32_t *filterPos, int filterSize)
63 {
64  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
65  int sh = desc->comp[0].depth - 1;
66 
67  if (sh<15) {
68  sh = isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8 ? 13 : (desc->comp[0].depth - 1);
69  } else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
70  sh = 16 - 1;
71  }
72  ff_hscale16to15_X8_neon_asm(sh, _dst, dstW, _src, filter, filterPos, filterSize);
73 
74 }
75 
76 static void ff_hscale16to15_X4_neon(SwsContext *c, int16_t *_dst, int dstW,
77  const uint8_t *_src, const int16_t *filter,
78  const int32_t *filterPos, int filterSize)
79 {
80  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
81  int sh = desc->comp[0].depth - 1;
82 
83  if (sh<15) {
84  sh = isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8 ? 13 : (desc->comp[0].depth - 1);
85  } else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
86  sh = 16 - 1;
87  }
88  ff_hscale16to15_X4_neon_asm(sh, _dst, dstW, _src, filter, filterPos, filterSize);
89 }
90 
91 static void ff_hscale16to19_4_neon(SwsContext *c, int16_t *_dst, int dstW,
92  const uint8_t *_src, const int16_t *filter,
93  const int32_t *filterPos, int filterSize)
94 {
95  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
96  int bits = desc->comp[0].depth - 1;
97  int sh = bits - 4;
98 
99  if ((isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8) && desc->comp[0].depth<16) {
100  sh = 9;
101  } else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
102  sh = 16 - 1 - 4;
103  }
104 
105  ff_hscale16to19_4_neon_asm(sh, _dst, dstW, _src, filter, filterPos, filterSize);
106 
107 }
108 
109 static void ff_hscale16to19_X8_neon(SwsContext *c, int16_t *_dst, int dstW,
110  const uint8_t *_src, const int16_t *filter,
111  const int32_t *filterPos, int filterSize)
112 {
113  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
114  int bits = desc->comp[0].depth - 1;
115  int sh = bits - 4;
116 
117  if ((isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8) && desc->comp[0].depth<16) {
118  sh = 9;
119  } else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
120  sh = 16 - 1 - 4;
121  }
122 
123  ff_hscale16to19_X8_neon_asm(sh, _dst, dstW, _src, filter, filterPos, filterSize);
124 
125 }
126 
127 static void ff_hscale16to19_X4_neon(SwsContext *c, int16_t *_dst, int dstW,
128  const uint8_t *_src, const int16_t *filter,
129  const int32_t *filterPos, int filterSize)
130 {
131  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
132  int bits = desc->comp[0].depth - 1;
133  int sh = bits - 4;
134 
135  if ((isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8) && desc->comp[0].depth<16) {
136  sh = 9;
137  } else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
138  sh = 16 - 1 - 4;
139  }
140 
141  ff_hscale16to19_X4_neon_asm(sh, _dst, dstW, _src, filter, filterPos, filterSize);
142 
143 }
144 
145 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
146 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
147  SwsContext *c, int16_t *data, \
148  int dstW, const uint8_t *src, \
149  const int16_t *filter, \
150  const int32_t *filterPos, int filterSize)
151 #define SCALE_FUNCS(filter_n, opt) \
152  SCALE_FUNC(filter_n, 8, 15, opt); \
153  SCALE_FUNC(filter_n, 8, 19, opt);
154 #define ALL_SCALE_FUNCS(opt) \
155  SCALE_FUNCS(4, opt); \
156  SCALE_FUNCS(X8, opt); \
157  SCALE_FUNCS(X4, opt)
158 
159 ALL_SCALE_FUNCS(neon);
160 
161 void ff_yuv2planeX_8_neon(const int16_t *filter, int filterSize,
162  const int16_t **src, uint8_t *dest, int dstW,
163  const uint8_t *dither, int offset);
165  const int16_t *src,
166  uint8_t *dest,
167  int dstW,
168  const uint8_t *dither,
169  int offset);
170 
171 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt) do { \
172  if (c->srcBpc == 8) { \
173  if(c->dstBpc <= 14) { \
174  hscalefn = \
175  ff_hscale8to15_ ## filtersize ## _ ## opt; \
176  } else \
177  hscalefn = \
178  ff_hscale8to19_ ## filtersize ## _ ## opt; \
179  } else { \
180  if (c->dstBpc <= 14) \
181  hscalefn = \
182  ff_hscale16to15_ ## filtersize ## _ ## opt; \
183  else \
184  hscalefn = \
185  ff_hscale16to19_ ## filtersize ## _ ## opt; \
186  } \
187 } while (0)
188 
189 #define ASSIGN_SCALE_FUNC(hscalefn, filtersize, opt) do { \
190  if (filtersize == 4) \
191  ASSIGN_SCALE_FUNC2(hscalefn, 4, opt); \
192  else if (filtersize % 8 == 0) \
193  ASSIGN_SCALE_FUNC2(hscalefn, X8, opt); \
194  else if (filtersize % 4 == 0 && filtersize % 8 != 0) \
195  ASSIGN_SCALE_FUNC2(hscalefn, X4, opt); \
196 } while (0)
197 
198 #define ASSIGN_VSCALE_FUNC(vscalefn, opt) \
199  switch (c->dstBpc) { \
200  case 8: vscalefn = ff_yuv2plane1_8_ ## opt; break; \
201  default: break; \
202  }
203 
205 {
206  int cpu_flags = av_get_cpu_flags();
207 
208  if (have_neon(cpu_flags)) {
209  ASSIGN_SCALE_FUNC(c->hyScale, c->hLumFilterSize, neon);
210  ASSIGN_SCALE_FUNC(c->hcScale, c->hChrFilterSize, neon);
211  ASSIGN_VSCALE_FUNC(c->yuv2plane1, neon);
212  if (c->dstBpc == 8) {
213  c->yuv2planeX = ff_yuv2planeX_8_neon;
214  }
215  }
216 }
ff_hscale16to19_X4_neon_asm
void ff_hscale16to19_X4_neon_asm(int shift, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2964
AV_PIX_FMT_FLAG_FLOAT
#define AV_PIX_FMT_FLAG_FLOAT
The pixel format contains IEEE-754 floating point values.
Definition: pixdesc.h:158
ff_yuv2planeX_8_neon
void ff_yuv2planeX_8_neon(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
ff_yuv2plane1_8_neon
void ff_yuv2plane1_8_neon(const int16_t *src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
ff_hscale16to15_X4_neon
static void ff_hscale16to15_X4_neon(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Definition: swscale.c:76
filter
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
Definition: filter_design.txt:228
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:103
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:52
ff_hscale16to15_X4_neon_asm
void ff_hscale16to15_X4_neon_asm(int shift, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
ff_sws_init_swscale_aarch64
av_cold void ff_sws_init_swscale_aarch64(SwsContext *c)
Definition: swscale.c:204
ff_hscale16to15_4_neon
static void ff_hscale16to15_4_neon(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Definition: swscale.c:44
ff_hscale16to19_X4_neon
static void ff_hscale16to19_X4_neon(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Definition: swscale.c:127
ff_hscale16to15_X8_neon_asm
void ff_hscale16to15_X8_neon_asm(int shift, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
av_cold
#define av_cold
Definition: attributes.h:90
ff_hscale16to19_X8_neon
static void ff_hscale16to19_X8_neon(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Definition: swscale.c:109
bits
uint8_t bits
Definition: vp3data.h:128
ff_hscale16to19_X8_neon_asm
void ff_hscale16to19_X8_neon_asm(int shift, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_hscale16to19_4_neon
static void ff_hscale16to19_4_neon(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Definition: swscale.c:91
shift
static int shift(int a, int b)
Definition: bonk.c:262
isAnyRGB
static av_always_inline int isAnyRGB(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:832
have_neon
#define have_neon(flags)
Definition: cpu.h:26
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
attributes.h
ff_hscale16to19_4_neon_asm
void ff_hscale16to19_4_neon_asm(int shift, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
swscale_internal.h
ASSIGN_VSCALE_FUNC
#define ASSIGN_VSCALE_FUNC(vscalefn, opt)
Definition: swscale.c:198
AV_PIX_FMT_PAL8
@ AV_PIX_FMT_PAL8
8 bits with AV_PIX_FMT_RGB32 palette
Definition: pixfmt.h:77
ASSIGN_SCALE_FUNC
#define ASSIGN_SCALE_FUNC(hscalefn, filtersize, opt)
Definition: swscale.c:189
desc
const char * desc
Definition: libsvtav1.c:83
ff_hscale16to15_4_neon_asm
void ff_hscale16to15_4_neon_asm(int shift, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
ff_hscale16to15_X8_neon
static void ff_hscale16to15_X8_neon(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Definition: swscale.c:60
int32_t
int32_t
Definition: audioconvert.c:56
cpu.h
ALL_SCALE_FUNCS
#define ALL_SCALE_FUNCS(opt)
Definition: swscale.c:154
SwsContext
Definition: swscale_internal.h:299
swscale.h
dither
static const uint8_t dither[8][8]
Definition: vf_fspp.c:60