00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 #include "libswresample/swresample_internal.h"
00022 #include "libswresample/audioconvert.h"
00023 
00024 #define PROTO(pre, in, out, cap) void ff ## pre ## _ ##in## _to_ ##out## _a_ ##cap(uint8_t **dst, const uint8_t **src, int len);
00025 #define PROTO2(pre, out, cap) PROTO(pre, int16, out, cap) PROTO(pre, int32, out, cap) PROTO(pre, float, out, cap)
00026 #define PROTO3(pre, cap) PROTO2(pre, int16, cap) PROTO2(pre, int32, cap) PROTO2(pre, float, cap)
00027 #define PROTO4(pre) PROTO3(pre, mmx) PROTO3(pre, sse) PROTO3(pre, sse2) PROTO3(pre, ssse3) PROTO3(pre, sse4) PROTO3(pre, avx)
00028 PROTO4()
00029 PROTO4(_pack_2ch)
00030 PROTO4(_pack_6ch)
00031 PROTO4(_unpack_2ch)
00032 
00033 av_cold void swri_audio_convert_init_x86(struct AudioConvert *ac,
00034                                  enum AVSampleFormat out_fmt,
00035                                  enum AVSampleFormat in_fmt,
00036                                  int channels){
00037     int mm_flags = av_get_cpu_flags();
00038 
00039     ac->simd_f= NULL;
00040 
00041 
00042 
00043 #define MULTI_CAPS_FUNC(flag, cap) \
00044     if (mm_flags & flag) {\
00045         if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_S16 || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S16P)\
00046             ac->simd_f =  ff_int16_to_int32_a_ ## cap;\
00047         if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_S32P)\
00048             ac->simd_f =  ff_int32_to_int16_a_ ## cap;\
00049     }
00050 
00051 MULTI_CAPS_FUNC(AV_CPU_FLAG_MMX, mmx)
00052 MULTI_CAPS_FUNC(AV_CPU_FLAG_SSE2, sse2)
00053 
00054     if(mm_flags & AV_CPU_FLAG_MMX) {
00055         if(channels == 6) {
00056             if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
00057                 ac->simd_f =  ff_pack_6ch_float_to_float_a_mmx;
00058         }
00059     }
00060 
00061     if(mm_flags & AV_CPU_FLAG_SSE2) {
00062         if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
00063             ac->simd_f =  ff_int32_to_float_a_sse2;
00064         if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S16 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S16P)
00065             ac->simd_f =  ff_int16_to_float_a_sse2;
00066         if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_FLTP)
00067             ac->simd_f =  ff_float_to_int32_a_sse2;
00068         if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLTP)
00069             ac->simd_f =  ff_float_to_int16_a_sse2;
00070 
00071         if(channels == 2) {
00072             if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
00073                 ac->simd_f =  ff_pack_2ch_int32_to_int32_a_sse2;
00074             if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S16P)
00075                 ac->simd_f =  ff_pack_2ch_int16_to_int16_a_sse2;
00076             if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_S16P)
00077                 ac->simd_f =  ff_pack_2ch_int16_to_int32_a_sse2;
00078             if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S32P)
00079                 ac->simd_f =  ff_pack_2ch_int32_to_int16_a_sse2;
00080 
00081             if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S32)
00082                 ac->simd_f =  ff_unpack_2ch_int32_to_int32_a_sse2;
00083             if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S16)
00084                 ac->simd_f =  ff_unpack_2ch_int16_to_int16_a_sse2;
00085             if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_S16)
00086                 ac->simd_f =  ff_unpack_2ch_int16_to_int32_a_sse2;
00087             if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S32)
00088                 ac->simd_f =  ff_unpack_2ch_int32_to_int16_a_sse2;
00089 
00090             if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
00091                 ac->simd_f =  ff_pack_2ch_int32_to_float_a_sse2;
00092             if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)
00093                 ac->simd_f =  ff_pack_2ch_float_to_int32_a_sse2;
00094             if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S16P)
00095                 ac->simd_f =  ff_pack_2ch_int16_to_float_a_sse2;
00096             if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_FLTP)
00097                 ac->simd_f =  ff_pack_2ch_float_to_int16_a_sse2;
00098             if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_S32)
00099                 ac->simd_f =  ff_unpack_2ch_int32_to_float_a_sse2;
00100             if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_FLT)
00101                 ac->simd_f =  ff_unpack_2ch_float_to_int32_a_sse2;
00102             if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_S16)
00103                 ac->simd_f =  ff_unpack_2ch_int16_to_float_a_sse2;
00104             if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_FLT)
00105                 ac->simd_f =  ff_unpack_2ch_float_to_int16_a_sse2;
00106         }
00107     }
00108     if(mm_flags & AV_CPU_FLAG_SSSE3) {
00109         if(channels == 2) {
00110             if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S16)
00111                 ac->simd_f =  ff_unpack_2ch_int16_to_int16_a_ssse3;
00112             if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_S16)
00113                 ac->simd_f =  ff_unpack_2ch_int16_to_int32_a_ssse3;
00114             if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_S16)
00115                 ac->simd_f =  ff_unpack_2ch_int16_to_float_a_ssse3;
00116         }
00117     }
00118     if(mm_flags & AV_CPU_FLAG_SSE4) {
00119         if(channels == 6) {
00120             if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
00121                 ac->simd_f =  ff_pack_6ch_float_to_float_a_sse4;
00122             if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
00123                 ac->simd_f =  ff_pack_6ch_int32_to_float_a_sse4;
00124             if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)
00125                 ac->simd_f =  ff_pack_6ch_float_to_int32_a_sse4;
00126         }
00127     }
00128     if(HAVE_AVX_EXTERNAL && mm_flags & AV_CPU_FLAG_AVX) {
00129         if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
00130             ac->simd_f =  ff_int32_to_float_a_avx;
00131         if(channels == 6) {
00132             if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
00133                 ac->simd_f =  ff_pack_6ch_float_to_float_a_avx;
00134             if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
00135                 ac->simd_f =  ff_pack_6ch_int32_to_float_a_avx;
00136             if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)
00137                 ac->simd_f =  ff_pack_6ch_float_to_int32_a_avx;
00138         }
00139     }
00140 }
00141 
00142 #define D(type, simd) \
00143 mix_1_1_func_type ff_mix_1_1_a_## type ## _ ## simd;\
00144 mix_2_1_func_type ff_mix_2_1_a_## type ## _ ## simd;
00145 
00146 D(float, sse)
00147 D(float, avx)
00148 D(int16, mmx)
00149 D(int16, sse2)
00150 
00151 
00152 av_cold void swri_rematrix_init_x86(struct SwrContext *s){
00153     int mm_flags = av_get_cpu_flags();
00154     int nb_in  = av_get_channel_layout_nb_channels(s->in_ch_layout);
00155     int nb_out = av_get_channel_layout_nb_channels(s->out_ch_layout);
00156     int num    = nb_in * nb_out;
00157     int i,j;
00158 
00159     s->mix_1_1_simd = NULL;
00160     s->mix_2_1_simd = NULL;
00161 
00162     if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){
00163         if(mm_flags & AV_CPU_FLAG_MMX) {
00164             s->mix_1_1_simd = ff_mix_1_1_a_int16_mmx;
00165             s->mix_2_1_simd = ff_mix_2_1_a_int16_mmx;
00166         }
00167         if(mm_flags & AV_CPU_FLAG_SSE2) {
00168             s->mix_1_1_simd = ff_mix_1_1_a_int16_sse2;
00169             s->mix_2_1_simd = ff_mix_2_1_a_int16_sse2;
00170         }
00171         s->native_simd_matrix = av_mallocz(2 * num * sizeof(int16_t));
00172         for(i=0; i<nb_out; i++){
00173             int sh = 0;
00174             for(j=0; j<nb_in; j++)
00175                 sh = FFMAX(sh, FFABS(((int*)s->native_matrix)[i * nb_in + j]));
00176             sh = FFMAX(av_log2(sh) - 14, 0);
00177             for(j=0; j<nb_in; j++) {
00178                 ((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)+1] = 15 - sh;
00179                 ((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)] =
00180                     ((((int*)s->native_matrix)[i * nb_in + j]) + (1<<sh>>1)) >> sh;
00181             }
00182         }
00183     } else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){
00184         if(mm_flags & AV_CPU_FLAG_SSE) {
00185             s->mix_1_1_simd = ff_mix_1_1_a_float_sse;
00186             s->mix_2_1_simd = ff_mix_2_1_a_float_sse;
00187         }
00188         if(HAVE_AVX_EXTERNAL && mm_flags & AV_CPU_FLAG_AVX) {
00189             s->mix_1_1_simd = ff_mix_1_1_a_float_avx;
00190             s->mix_2_1_simd = ff_mix_2_1_a_float_avx;
00191         }
00192         s->native_simd_matrix = av_mallocz(num * sizeof(float));
00193         memcpy(s->native_simd_matrix, s->native_matrix, num * sizeof(float));
00194     }
00195 }