FFmpeg
mpegvideo_msa.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/avassert.h"
23 #include "h263dsp_mips.h"
24 
25 static void h263_dct_unquantize_msa(int16_t *block, int16_t qmul,
26  int16_t qadd, int8_t n_coeffs,
27  uint8_t loop_start)
28 {
29  int16_t *block_dup = block;
30  int32_t level, cnt;
31  v8i16 block_vec, qmul_vec, qadd_vec, sub;
32  v8i16 add, mask, mul, zero_mask;
33 
34  qmul_vec = __msa_fill_h(qmul);
35  qadd_vec = __msa_fill_h(qadd);
36  for (cnt = 0; cnt < (n_coeffs >> 3); cnt++) {
37  block_vec = LD_SH(block_dup + loop_start);
38  mask = __msa_clti_s_h(block_vec, 0);
39  zero_mask = __msa_ceqi_h(block_vec, 0);
40  mul = block_vec * qmul_vec;
41  sub = mul - qadd_vec;
42  add = mul + qadd_vec;
43  add = (v8i16) __msa_bmnz_v((v16u8) add, (v16u8) sub, (v16u8) mask);
44  block_vec = (v8i16) __msa_bmnz_v((v16u8) add, (v16u8) block_vec,
45  (v16u8) zero_mask);
46  ST_SH(block_vec, block_dup + loop_start);
47  block_dup += 8;
48  }
49 
50  cnt = ((n_coeffs >> 3) * 8) + loop_start;
51 
52  for (; cnt <= n_coeffs; cnt++) {
53  level = block[cnt];
54  if (level) {
55  if (level < 0) {
56  level = level * qmul - qadd;
57  } else {
58  level = level * qmul + qadd;
59  }
60  block[cnt] = level;
61  }
62  }
63 }
64 
66  int32_t qscale,
67  const int16_t *quant_matrix)
68 {
69  int32_t cnt, sum_res = -1;
70  v8i16 block_vec, block_neg, qscale_vec, mask;
71  v8i16 block_org0, block_org1, block_org2, block_org3;
72  v8i16 quant_m0, quant_m1, quant_m2, quant_m3;
73  v8i16 sum, mul, zero_mask;
74  v4i32 mul_vec, qscale_l, qscale_r, quant_m_r, quant_m_l;
75  v4i32 block_l, block_r, sad;
76 
77  qscale_vec = __msa_fill_h(qscale);
78  for (cnt = 0; cnt < 2; cnt++) {
79  LD_SH4(block, 8, block_org0, block_org1, block_org2, block_org3);
80  LD_SH4(quant_matrix, 8, quant_m0, quant_m1, quant_m2, quant_m3);
81  mask = __msa_clti_s_h(block_org0, 0);
82  zero_mask = __msa_ceqi_h(block_org0, 0);
83  block_neg = -block_org0;
84  block_vec = (v8i16) __msa_bmnz_v((v16u8) block_org0, (v16u8) block_neg,
85  (v16u8) mask);
86  block_vec <<= 1;
87  block_vec += 1;
88  UNPCK_SH_SW(block_vec, block_r, block_l);
89  UNPCK_SH_SW(qscale_vec, qscale_r, qscale_l);
90  UNPCK_SH_SW(quant_m0, quant_m_r, quant_m_l);
91  mul_vec = block_l * qscale_l;
92  mul_vec *= quant_m_l;
93  block_l = mul_vec >> 4;
94  mul_vec = block_r * qscale_r;
95  mul_vec *= quant_m_r;
96  block_r = mul_vec >> 4;
97  mul = (v8i16) __msa_pckev_h((v8i16) block_l, (v8i16) block_r);
98  block_neg = - mul;
99  sum = (v8i16) __msa_bmnz_v((v16u8) mul, (v16u8) block_neg,
100  (v16u8) mask);
101  sum = (v8i16) __msa_bmnz_v((v16u8) sum, (v16u8) block_org0,
102  (v16u8) zero_mask);
103  ST_SH(sum, block);
104  block += 8;
105  quant_matrix += 8;
106  sad = __msa_hadd_s_w(sum, sum);
107  sum_res += HADD_SW_S32(sad);
108  mask = __msa_clti_s_h(block_org1, 0);
109  zero_mask = __msa_ceqi_h(block_org1, 0);
110  block_neg = - block_org1;
111  block_vec = (v8i16) __msa_bmnz_v((v16u8) block_org1, (v16u8) block_neg,
112  (v16u8) mask);
113  block_vec <<= 1;
114  block_vec += 1;
115  UNPCK_SH_SW(block_vec, block_r, block_l);
116  UNPCK_SH_SW(qscale_vec, qscale_r, qscale_l);
117  UNPCK_SH_SW(quant_m1, quant_m_r, quant_m_l);
118  mul_vec = block_l * qscale_l;
119  mul_vec *= quant_m_l;
120  block_l = mul_vec >> 4;
121  mul_vec = block_r * qscale_r;
122  mul_vec *= quant_m_r;
123  block_r = mul_vec >> 4;
124  mul = __msa_pckev_h((v8i16) block_l, (v8i16) block_r);
125  block_neg = - mul;
126  sum = (v8i16) __msa_bmnz_v((v16u8) mul, (v16u8) block_neg,
127  (v16u8) mask);
128  sum = (v8i16) __msa_bmnz_v((v16u8) sum, (v16u8) block_org1,
129  (v16u8) zero_mask);
130  ST_SH(sum, block);
131 
132  block += 8;
133  quant_matrix += 8;
134  sad = __msa_hadd_s_w(sum, sum);
135  sum_res += HADD_SW_S32(sad);
136  mask = __msa_clti_s_h(block_org2, 0);
137  zero_mask = __msa_ceqi_h(block_org2, 0);
138  block_neg = - block_org2;
139  block_vec = (v8i16) __msa_bmnz_v((v16u8) block_org2, (v16u8) block_neg,
140  (v16u8) mask);
141  block_vec <<= 1;
142  block_vec += 1;
143  UNPCK_SH_SW(block_vec, block_r, block_l);
144  UNPCK_SH_SW(qscale_vec, qscale_r, qscale_l);
145  UNPCK_SH_SW(quant_m2, quant_m_r, quant_m_l);
146  mul_vec = block_l * qscale_l;
147  mul_vec *= quant_m_l;
148  block_l = mul_vec >> 4;
149  mul_vec = block_r * qscale_r;
150  mul_vec *= quant_m_r;
151  block_r = mul_vec >> 4;
152  mul = __msa_pckev_h((v8i16) block_l, (v8i16) block_r);
153  block_neg = - mul;
154  sum = (v8i16) __msa_bmnz_v((v16u8) mul, (v16u8) block_neg,
155  (v16u8) mask);
156  sum = (v8i16) __msa_bmnz_v((v16u8) sum, (v16u8) block_org2,
157  (v16u8) zero_mask);
158  ST_SH(sum, block);
159 
160  block += 8;
161  quant_matrix += 8;
162  sad = __msa_hadd_s_w(sum, sum);
163  sum_res += HADD_SW_S32(sad);
164  mask = __msa_clti_s_h(block_org3, 0);
165  zero_mask = __msa_ceqi_h(block_org3, 0);
166  block_neg = - block_org3;
167  block_vec = (v8i16) __msa_bmnz_v((v16u8) block_org3, (v16u8) block_neg,
168  (v16u8) mask);
169  block_vec <<= 1;
170  block_vec += 1;
171  UNPCK_SH_SW(block_vec, block_r, block_l);
172  UNPCK_SH_SW(qscale_vec, qscale_r, qscale_l);
173  UNPCK_SH_SW(quant_m3, quant_m_r, quant_m_l);
174  mul_vec = block_l * qscale_l;
175  mul_vec *= quant_m_l;
176  block_l = mul_vec >> 4;
177  mul_vec = block_r * qscale_r;
178  mul_vec *= quant_m_r;
179  block_r = mul_vec >> 4;
180  mul = __msa_pckev_h((v8i16) block_l, (v8i16) block_r);
181  block_neg = - mul;
182  sum = (v8i16) __msa_bmnz_v((v16u8) mul, (v16u8) block_neg,
183  (v16u8) mask);
184  sum = (v8i16) __msa_bmnz_v((v16u8) sum, (v16u8) block_org3,
185  (v16u8) zero_mask);
186  ST_SH(sum, block);
187 
188  block += 8;
189  quant_matrix += 8;
190  sad = __msa_hadd_s_w(sum, sum);
191  sum_res += HADD_SW_S32(sad);
192  }
193 
194  return sum_res;
195 }
196 
198  int16_t *block, int32_t index,
199  int32_t qscale)
200 {
201  int32_t qmul, qadd;
202  int32_t nCoeffs;
203 
204  av_assert2(s->block_last_index[index] >= 0 || s->h263_aic);
205 
206  qmul = qscale << 1;
207 
208  if (!s->h263_aic) {
209  block[0] *= index < 4 ? s->y_dc_scale : s->c_dc_scale;
210  qadd = (qscale - 1) | 1;
211  } else {
212  qadd = 0;
213  }
214  if (s->ac_pred)
215  nCoeffs = 63;
216  else
217  nCoeffs = s->inter_scantable.raster_end[s->block_last_index[index]];
218 
219  h263_dct_unquantize_msa(block, qmul, qadd, nCoeffs, 1);
220 }
221 
223  int16_t *block, int32_t index,
224  int32_t qscale)
225 {
226  int32_t qmul, qadd;
227  int32_t nCoeffs;
228 
229  av_assert2(s->block_last_index[index] >= 0);
230 
231  qadd = (qscale - 1) | 1;
232  qmul = qscale << 1;
233 
234  nCoeffs = s->inter_scantable.raster_end[s->block_last_index[index]];
235 
236  h263_dct_unquantize_msa(block, qmul, qadd, nCoeffs, 0);
237 }
238 
240  int16_t *block, int32_t index,
241  int32_t qscale)
242 {
243  const uint16_t *quant_matrix;
244  int32_t sum = -1;
245 
246  quant_matrix = s->inter_matrix;
247 
248  sum = mpeg2_dct_unquantize_inter_msa(block, qscale, quant_matrix);
249 
250  block[63] ^= sum & 1;
251 }
level
uint8_t level
Definition: svq3.c:208
LD_SH4
#define LD_SH4(...)
Definition: generic_macros_msa.h:299
mask
int mask
Definition: mediacodecdec_common.c:154
ff_dct_unquantize_mpeg2_inter_msa
void ff_dct_unquantize_mpeg2_inter_msa(MpegEncContext *s, int16_t *block, int32_t index, int32_t qscale)
Definition: mpegvideo_msa.c:239
LD_SH
#define LD_SH(...)
Definition: generic_macros_msa.h:35
generic_macros_msa.h
avassert.h
ff_dct_unquantize_h263_inter_msa
void ff_dct_unquantize_h263_inter_msa(MpegEncContext *s, int16_t *block, int32_t index, int32_t qscale)
Definition: mpegvideo_msa.c:222
s
#define s(width, name)
Definition: cbs_vp9.c:198
h263_dct_unquantize_msa
static void h263_dct_unquantize_msa(int16_t *block, int16_t qmul, int16_t qadd, int8_t n_coeffs, uint8_t loop_start)
Definition: mpegvideo_msa.c:25
HADD_SW_S32
#define HADD_SW_S32(in)
Definition: generic_macros_msa.h:998
UNPCK_SH_SW
#define UNPCK_SH_SW(in, out0, out1)
Definition: generic_macros_msa.h:2224
index
int index
Definition: gxfenc.c:90
ff_dct_unquantize_h263_intra_msa
void ff_dct_unquantize_h263_intra_msa(MpegEncContext *s, int16_t *block, int32_t index, int32_t qscale)
Definition: mpegvideo_msa.c:197
mpeg2_dct_unquantize_inter_msa
static int32_t mpeg2_dct_unquantize_inter_msa(int16_t *block, int32_t qscale, const int16_t *quant_matrix)
Definition: mpegvideo_msa.c:65
h263dsp_mips.h
av_assert2
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:68
ST_SH
#define ST_SH(...)
Definition: generic_macros_msa.h:43
int32_t
int32_t
Definition: audioconvert.c:56
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
MpegEncContext
MpegEncContext.
Definition: mpegvideo.h:63