doxygen/3.1/ppc_2h264chroma__template_8c_source.html

 /*

  * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>

  *

  * This file is part of FFmpeg.

  *

  * FFmpeg is free software; you can redistribute it and/or

  * modify it under the terms of the GNU Lesser General Public

  * License as published by the Free Software Foundation; either

  * version 2.1 of the License, or (at your option) any later version.

  *

  * FFmpeg is distributed in the hope that it will be useful,

  * but WITHOUT ANY WARRANTY; without even the implied warranty of

  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

  * Lesser General Public License for more details.

  *

  * You should have received a copy of the GNU Lesser General Public

  * License along with FFmpeg; if not, write to the Free Software

  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

  */


 #include "libavutil/mem.h"

 #include "libavutil/ppc/types_altivec.h"

 #include "libavutil/ppc/util_altivec.h"


 /* this code assume that stride % 16 == 0 */


 #define CHROMA_MC8_ALTIVEC_CORE(BIAS1, BIAS2) \

         vsrc2ssH = (vec_s16)VEC_MERGEH(zero_u8v,(vec_u8)vsrc2uc);\

         vsrc3ssH = (vec_s16)VEC_MERGEH(zero_u8v,(vec_u8)vsrc3uc);\

 \

         psum = vec_mladd(vA, vsrc0ssH, BIAS1);\

         psum = vec_mladd(vB, vsrc1ssH, psum);\

         psum = vec_mladd(vC, vsrc2ssH, psum);\

         psum = vec_mladd(vD, vsrc3ssH, psum);\

         psum = BIAS2(psum);\

         psum = vec_sr(psum, v6us);\

 \

         vdst = vec_ld(0, dst);\

         ppsum = (vec_u8)vec_pack(psum, psum);\

         vfdst = vec_perm(vdst, ppsum, fperm);\

 \

         OP_U8_ALTIVEC(fsum, vfdst, vdst);\

 \

         vec_st(fsum, 0, dst);\

 \

         vsrc0ssH = vsrc2ssH;\

         vsrc1ssH = vsrc3ssH;\

 \

         dst += stride;\

         src += stride;


 #define CHROMA_MC8_ALTIVEC_CORE_SIMPLE \

 \

         vsrc0ssH = (vec_s16)VEC_MERGEH(zero_u8v,(vec_u8)vsrc0uc);\

         vsrc1ssH = (vec_s16)VEC_MERGEH(zero_u8v,(vec_u8)vsrc1uc);\

 \

         psum = vec_mladd(vA, vsrc0ssH, v32ss);\

         psum = vec_mladd(vE, vsrc1ssH, psum);\

         psum = vec_sr(psum, v6us);\

 \

         vdst = vec_ld(0, dst);\

         ppsum = (vec_u8)vec_pack(psum, psum);\

         vfdst = vec_perm(vdst, ppsum, fperm);\

 \

         OP_U8_ALTIVEC(fsum, vfdst, vdst);\

 \

         vec_st(fsum, 0, dst);\

 \

         dst += stride;\

         src += stride;


 #define noop(a) a

 #define add28(a) vec_add(v28ss, a)


 #if HAVE_BIGENDIAN

 #define GET_VSRC1(vs0, off, b, perm0, s){    \

     vec_u8 vsrcCuc, vsrcDuc;                 \

     vsrcCuc = vec_ld(off, s);                \

     if (loadSecond){                         \

         vsrcDuc = vec_ld(off + b, s);        \

     } else                                   \

         vsrcDuc = vsrcCuc;                   \

                                              \

     vs0 = vec_perm(vsrcCuc, vsrcDuc, perm0); \

 }

 #define GET_VSRC(vs0, vs1, off, b, perm0, perm1, s){ \

     vec_u8 vsrcCuc, vsrcDuc;                         \

     vsrcCuc = vec_ld(off, s);                        \

     if (loadSecond){                                 \

         vsrcDuc = vec_ld(off + b, s);                \

     } else                                           \

         vsrcDuc = vsrcCuc;                           \

                                                      \

     vs0 = vec_perm(vsrcCuc, vsrcDuc, perm0);         \

     if (reallyBadAlign){                             \

         vs1 = vsrcDuc;                               \

     } else                                           \

         vs1 = vec_perm(vsrcCuc, vsrcDuc, perm1);     \

  }


 #else


 #define GET_VSRC1(vs0, off, b, perm0, s){            \

     vs0 = vec_vsx_ld(off, s);                        \

  }

 #define GET_VSRC(vs0, vs1, off, b, perm0, perm1, s){ \

     vs0 = vec_vsx_ld(off, s);                        \

     vs1 = vec_vsx_ld(off + 1, s);                    \

  }

 #endif /* HAVE_BIGENDIAN */


 #ifdef PREFIX_h264_chroma_mc8_altivec

 static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,

                                     int stride, int h, int x, int y) {

     DECLARE_ALIGNED(16, signed int, ABCD)[4] =

                         {((8 - x) * (8 - y)),

                          ((    x) * (8 - y)),

                          ((8 - x) * (    y)),

                          ((    x) * (    y))};

     register int i;

     vec_u8 fperm;

     LOAD_ZERO;

     const vec_s32 vABCD = vec_ld(0, ABCD);

     const vec_s16 vA = VEC_SPLAT16(vABCD, 1);

     const vec_s16 vB = VEC_SPLAT16(vABCD, 3);

     const vec_s16 vC = VEC_SPLAT16(vABCD, 5);

     const vec_s16 vD = VEC_SPLAT16(vABCD, 7);

     const vec_s16 v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5));

     const vec_u16 v6us = vec_splat_u16(6);


     vec_u8 vsrcperm0, vsrcperm1;

     vec_u8 vsrc0uc, vsrc1uc;

     vec_s16 vsrc0ssH, vsrc1ssH;

     vec_u8 vsrc2uc, vsrc3uc;

     vec_s16 vsrc2ssH, vsrc3ssH, psum;

     vec_u8 vdst, ppsum, vfdst, fsum;

 #if HAVE_BIGENDIAN

     register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;

     register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;

     vsrcperm0 = vec_lvsl(0, src);

     vsrcperm1 = vec_lvsl(1, src);

 #endif


     if (((unsigned long)dst) % 16 == 0) {

         fperm = (vec_u8){0x10, 0x11, 0x12, 0x13,

                          0x14, 0x15, 0x16, 0x17,

                          0x08, 0x09, 0x0A, 0x0B,

                          0x0C, 0x0D, 0x0E, 0x0F};

     } else {

         fperm = (vec_u8){0x00, 0x01, 0x02, 0x03,

                          0x04, 0x05, 0x06, 0x07,

                          0x18, 0x19, 0x1A, 0x1B,

                          0x1C, 0x1D, 0x1E, 0x1F};

     }


     GET_VSRC(vsrc0uc, vsrc1uc, 0, 16, vsrcperm0, vsrcperm1, src);


     vsrc0ssH = (vec_s16)VEC_MERGEH(zero_u8v,(vec_u8)vsrc0uc);

     vsrc1ssH = (vec_s16)VEC_MERGEH(zero_u8v,(vec_u8)vsrc1uc);


     if (ABCD[3]) {

         for (i = 0 ; i < h ; i++) {

             GET_VSRC(vsrc2uc, vsrc3uc, stride, 16, vsrcperm0, vsrcperm1, src);

             CHROMA_MC8_ALTIVEC_CORE(v32ss, noop);

         }

     } else {

         const vec_s16 vE = vec_add(vB, vC);

         if (ABCD[2]) { // x == 0 B == 0

             for (i = 0 ; i < h ; i++) {

                 GET_VSRC1(vsrc1uc, stride, 15, vsrcperm0, src);

                 CHROMA_MC8_ALTIVEC_CORE_SIMPLE;

                 vsrc0uc = vsrc1uc;

             }

         } else { // y == 0 C == 0

             for (i = 0 ; i < h ; i++) {

                GET_VSRC(vsrc0uc, vsrc1uc, 0, 15, vsrcperm0, vsrcperm1, src);

                CHROMA_MC8_ALTIVEC_CORE_SIMPLE;

             }

         }

     }

 }

 #endif


 /* this code assume that stride % 16 == 0 */

 #ifdef PREFIX_no_rnd_vc1_chroma_mc8_altivec

 static void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {

    DECLARE_ALIGNED(16, signed int, ABCD)[4] =

                         {((8 - x) * (8 - y)),

                          ((    x) * (8 - y)),

                          ((8 - x) * (    y)),

                          ((    x) * (    y))};

     register int i;

     vec_u8 fperm;

     LOAD_ZERO;

     const vec_s32 vABCD = vec_ld(0, ABCD);

     const vec_s16 vA = VEC_SPLAT16(vABCD, 1);

     const vec_s16 vB = VEC_SPLAT16(vABCD, 3);

     const vec_s16 vC = VEC_SPLAT16(vABCD, 5);

     const vec_s16 vD = VEC_SPLAT16(vABCD, 7);

     const vec_s16 v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));

     const vec_u16 v6us  = vec_splat_u16(6);


     vec_u8 vsrcperm0, vsrcperm1;

     vec_u8 vsrc0uc, vsrc1uc;

     vec_s16 vsrc0ssH, vsrc1ssH;

     vec_u8 vsrc2uc, vsrc3uc;

     vec_s16 vsrc2ssH, vsrc3ssH, psum;

     vec_u8 vdst, ppsum, vfdst, fsum;

 #if HAVE_BIGENDIAN

     register int loadSecond     = (((unsigned long)src) % 16) <= 7 ? 0 : 1;

     register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;

     vsrcperm0 = vec_lvsl(0, src);

     vsrcperm1 = vec_lvsl(1, src);

 #endif


     if (((unsigned long)dst) % 16 == 0) {

         fperm = (vec_u8){0x10, 0x11, 0x12, 0x13,

                          0x14, 0x15, 0x16, 0x17,

                          0x08, 0x09, 0x0A, 0x0B,

                          0x0C, 0x0D, 0x0E, 0x0F};

     } else {

         fperm = (vec_u8){0x00, 0x01, 0x02, 0x03,

                          0x04, 0x05, 0x06, 0x07,

                          0x18, 0x19, 0x1A, 0x1B,

                          0x1C, 0x1D, 0x1E, 0x1F};

     }


     GET_VSRC(vsrc0uc, vsrc1uc, 0, 16, vsrcperm0, vsrcperm1, src);


     vsrc0ssH = (vec_s16)VEC_MERGEH(zero_u8v, (vec_u8)vsrc0uc);

     vsrc1ssH = (vec_s16)VEC_MERGEH(zero_u8v, (vec_u8)vsrc1uc);


     for (i = 0 ; i < h ; i++) {

         GET_VSRC(vsrc2uc, vsrc3uc, stride, 16, vsrcperm0, vsrcperm1, src);

         CHROMA_MC8_ALTIVEC_CORE(vec_splat_s16(0), add28);

     }

 }

 #endif


 #undef noop

 #undef add28

 #undef CHROMA_MC8_ALTIVEC_CORE

h
h
Definition: vp9dsp_template.c:2093

mem.h
memory handling functions

vec_s32
#define vec_s32
Definition: types_altivec.h:32

DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:53

uint8_t
uint8_t
Definition: audio_convert.c:194

vec_s16
#define vec_s16
Definition: types_altivec.h:30

GET_VSRC1
#define GET_VSRC1(vs0, off, b, perm0, s)
Definition: h264chroma_template.c:103

vec_u16
#define vec_u16
Definition: types_altivec.h:29

LOAD_ZERO
#define LOAD_ZERO
Definition: types_altivec.h:38

noop
#define noop(a)
Definition: h264chroma_template.c:72

src
#define src
Definition: vp9dsp.c:530

vec_u8
#define vec_u8
Definition: types_altivec.h:27

types_altivec.h

util_altivec.h
Contains misc utility macros and inline functions.

GET_VSRC
#define GET_VSRC(vs0, vs1, off, b, perm0, perm1, s)
Definition: h264chroma_template.c:106

stride
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:105

CHROMA_MC8_ALTIVEC_CORE_SIMPLE
#define CHROMA_MC8_ALTIVEC_CORE_SIMPLE
Definition: h264chroma_template.c:52

CHROMA_MC8_ALTIVEC_CORE
#define CHROMA_MC8_ALTIVEC_CORE(BIAS1, BIAS2)
Definition: h264chroma_template.c:27

add28
#define add28(a)
Definition: h264chroma_template.c:73

zero_u8v
#define zero_u8v
Definition: types_altivec.h:40