shithub: openh264

ref: 1d357ef65360f0ea72a3d3ddd9f1af3cc6373511
dir: /codec/common/inc/asmdefs_mmi.h/

View raw version
/*!
 * \copy
 *     Copyright (c)  2013, Loongson Technology Co.,Ltd.
 *     All rights reserved.
 *
 *     Redistribution and use in source and binary forms, with or without
 *     modification, are permitted provided that the following conditions
 *     are met:
 *
 *        * Redistributions of source code must retain the above copyright
 *          notice, this list of conditions and the following disclaimer.
 *
 *        * Redistributions in binary form must reproduce the above copyright
 *          notice, this list of conditions and the following disclaimer in
 *          the documentation and/or other materials provided with the
 *          distribution.
 *
 *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 *     POSSIBILITY OF SUCH DAMAGE.
 *
 */

#ifndef ASMDEFS_MMI_H_
#define ASMDEFS_MMI_H_

#define CACHE_LINE_SIZE 32

#if defined(_ABI64) && _MIPS_SIM == _ABI64
# define mips_reg       int64_t
# define PTRSIZE        " 8 "
# define PTRLOG         " 3 "
# define PTR_ADDU       "daddu "
# define PTR_ADDIU      "daddiu "
# define PTR_ADDI       "daddi "
# define PTR_SUBU       "dsubu "
# define PTR_L          "ld "
# define PTR_S          "sd "
# define PTR_SRA        "dsra "
# define PTR_SRL        "dsrl "
# define PTR_SLL        "dsll "
#else
# define mips_reg       int32_t
# define PTRSIZE        " 4 "
# define PTRLOG         " 2 "
# define PTR_ADDU       "addu "
# define PTR_ADDIU      "addiu "
# define PTR_ADDI       "addi "
# define PTR_SUBU       "subu "
# define PTR_L          "lw "
# define PTR_S          "sw "
# define PTR_SRA        "sra "
# define PTR_SRL        "srl "
# define PTR_SLL        "sll "
#endif

#define MMI_XSawp_BH(f0, f2, f4, f6, f8, f10) \
  "mov.d      "#f8", "#f2"                \n\t" \
  "punpckhbh  "#f2", "#f0", "#f4"         \n\t" \
  "punpcklbh  "#f0", "#f0", "#f4"         \n\t" \
  "punpckhbh  "#f10", "#f8", "#f6"        \n\t" \
  "punpcklbh  "#f8", "#f8", "#f6"         \n\t"

#define MMI_XSawp_HW(f0, f2, f4, f6, f8, f10) \
  "mov.d      "#f8", "#f2"                \n\t" \
  "punpckhhw  "#f2", "#f0", "#f4"         \n\t" \
  "punpcklhw  "#f0", "#f0", "#f4"         \n\t" \
  "punpckhhw  "#f10", "#f8", "#f6"        \n\t" \
  "punpcklhw  "#f8", "#f8", "#f6"         \n\t"

#define MMI_XSawp_WD(f0, f2, f4, f6, f8, f10) \
  "mov.d      "#f8", "#f2"                \n\t" \
  "punpckhwd  "#f2", "#f0", "#f4"         \n\t" \
  "punpcklwd  "#f0", "#f0", "#f4"         \n\t" \
  "punpckhwd  "#f10", "#f8", "#f6"        \n\t" \
  "punpcklwd  "#f8", "#f8", "#f6"         \n\t"

#define MMI_XSawp_DQ(f0, f2, f4, f6, f8, f10) \
  "mov.d      "#f8", "#f2"                \n\t" \
  "mov.d      "#f2", "#f4"                \n\t" \
  "mov.d      "#f10", "#f6"               \n\t"

#define WELS_AbsH(f0, f2, f4, f6, f8, f10) \
  "xor        "#f8", "#f8", "#f8"         \n\t" \
  "psubh      "#f10", "#f8", "#f6"        \n\t" \
  "psubh      "#f8", "#f8", "#f4"         \n\t" \
  "pmaxsh     "#f0", "#f4", "#f8"         \n\t" \
  "pmaxsh     "#f2", "#f6", "#f10"        \n\t"

#define MMI_SumSub(f0, f2, f4, f6, f8, f10) \
  "mov.d      "#f8", "#f4"                    \n\t" \
  "mov.d      "#f10", "#f6"                   \n\t" \
  "paddh      "#f4", "#f4", "#f0"             \n\t" \
  "paddh      "#f6", "#f6", "#f2"             \n\t" \
  "psubh      "#f0", "#f0", "#f8"             \n\t" \
  "psubh      "#f2", "#f2", "#f10"            \n\t"

#define MMI_LoadDiff8P(f0, f2, f4, f6, f8, r0, r1) \
  "gsldlc1    "#f0", 0x7("#r0")               \n\t" \
  "gsldlc1    "#f4", 0x7("#r1")               \n\t" \
  "gsldrc1    "#f0", 0x0("#r0")               \n\t" \
  "gsldrc1    "#f4", 0x0("#r1")               \n\t" \
  "punpckhbh  "#f2", "#f0", "#f8"             \n\t" \
  "punpcklbh  "#f0", "#f0", "#f8"             \n\t" \
  "punpckhbh  "#f6", "#f4", "#f8"             \n\t" \
  "punpcklbh  "#f4", "#f4", "#f8"             \n\t" \
  "psubh      "#f0", "#f0", "#f4"             \n\t" \
  "psubh      "#f2", "#f2", "#f6"             \n\t"

#define MMI_TransTwo4x4H(f0, f2, f4, f6, f8, f10, f12, f14, f16, f18) \
  MMI_XSawp_HW(f0, f2, f4, f6, f16, f18)  \
  MMI_XSawp_HW(f8, f10, f12, f14, f4, f6) \
  MMI_XSawp_WD(f0, f2, f8, f10, f12, f14) \
  MMI_XSawp_WD(f16, f18, f4, f6, f8, f10) \
  MMI_XSawp_DQ(f0, f2, f16, f18, f4, f6)  \
  MMI_XSawp_DQ(f12, f14, f8, f10, f16, f18)

#define MMI_TransTwo8x8B(f0, f2, f4, f6, f8, f10, f12, f14, f16, f18, f20, f22, f24, f26, f28, f30, r0, r1) \
  "dmfc1      "#r0", "#f28"                   \n\t" \
  "dmfc1      "#r1", "#f30"                   \n\t" \
  MMI_XSawp_BH(f0, f2, f4, f6, f28, f30)            \
  MMI_XSawp_BH(f8, f10, f12, f14, f4, f6)           \
  MMI_XSawp_BH(f16, f18, f20, f22, f12, f14)        \
  "dmtc1      "#r0", "#f20"                   \n\t" \
  "dmtc1      "#r1", "#f22"                   \n\t" \
  "dmfc1      "#r0", "#f12"                   \n\t" \
  "dmfc1      "#r1", "#f14"                   \n\t" \
  MMI_XSawp_BH(f24, f26, f20, f22, f12, f14)        \
  MMI_XSawp_HW(f0, f2, f8, f10, f20, f22)           \
  MMI_XSawp_HW(f28, f30, f4, f6, f8, f10)           \
  MMI_XSawp_HW(f16, f18, f24, f26, f4, f6)          \
  "dmtc1      "#r0", "#f24"                   \n\t" \
  "dmtc1      "#r1", "#f26"                   \n\t" \
  "dmfc1      "#r0", "#f8"                    \n\t" \
  "dmfc1      "#r1", "#f10"                   \n\t" \
  MMI_XSawp_HW(f24, f26, f12, f14, f8, f10)         \
  MMI_XSawp_WD(f0, f2, f16, f18, f12, f14)          \
  MMI_XSawp_WD(f20, f22, f4, f6, f16, f18)          \
  MMI_XSawp_WD(f28, f30, f24, f26, f4, f6)          \
  "dmtc1      "#r0", "#f24"                   \n\t" \
  "dmtc1      "#r1", "#f26"                   \n\t" \
  "dmfc1      "#r0", "#f16"                   \n\t" \
  "dmfc1      "#r1", "#f18"                   \n\t" \
  MMI_XSawp_WD(f24, f26, f8, f10, f16, f18)         \
  MMI_XSawp_DQ(f0, f2, f28, f30, f8, f10)           \
  MMI_XSawp_DQ(f12, f14, f4, f6, f28, f30)          \
  MMI_XSawp_DQ(f20, f22, f24, f26, f4, f6)          \
  "dmtc1      "#r0", "#f24"                   \n\t" \
  "dmtc1      "#r1", "#f26"                   \n\t" \
  "dmfc1      "#r0", "#f0"                    \n\t" \
  "dmfc1      "#r1", "#f2"                    \n\t" \
  MMI_XSawp_DQ(f24, f26, f16, f18, f0, f2)          \
  "dmtc1      "#r0", "#f16"                   \n\t" \
  "dmtc1      "#r1", "#f18"                   \n\t"

#define MMI_XSwap_HW_SINGLE(f0, f2, f4) \
  "punpckhhw  "#f4", "#f0", "#f2"             \n\t" \
  "punpcklhw  "#f0", "#f0", "#f2"             \n\t"

#define MMI_XSwap_WD_SINGLE(f0, f2, f4) \
  "punpckhwd  "#f4", "#f0", "#f2"             \n\t" \
  "punpcklwd  "#f0", "#f0", "#f2"             \n\t"

#define MMI_Trans4x4H_SINGLE(f0, f2, f4, f6, f8) \
  MMI_XSwap_HW_SINGLE(f0, f2, f8)              \
  MMI_XSwap_HW_SINGLE(f4, f6, f2)              \
  MMI_XSwap_WD_SINGLE(f0, f4, f6)              \
  MMI_XSwap_WD_SINGLE(f8, f2, f4)

#define MMI_SumSub_SINGLE(f0, f2, f4) \
  "mov.d      "#f4", "#f2"                    \n\t" \
  "psubh      "#f2", "#f2", "#f0"             \n\t" \
  "paddh      "#f0", "#f0", "#f4"             \n\t"

#define MMI_SumSubMul2_SINGLE(f0, f2, f4, f6) \
  "mov.d      "#f4", "#f0"                    \n\t" \
  "psllh      "#f0", "#f0", "#f6"             \n\t" \
  "paddh      "#f0", "#f0", "#f2"             \n\t" \
  "psllh      "#f2", "#f2", "#f6"             \n\t" \
  "psubh      "#f4", "#f4", "#f2"             \n\t"

//f4 should be 0x0
#define MMI_Copy8Times(f0, f2, f4, r0) \
  "dmtc1      "#r0", "#f0"                    \n\t" \
  "pshufh     "#f0", "#f0", "#f4"             \n\t" \
  "mov.d      "#f2", "#f0"                    \n\t"

//f4 should be 0x0
#define MMI_Copy16Times(f0, f2, f4, r0) \
  "dmtc1      "#r0", "#f0"                    \n\t" \
  "punpcklbh  "#f0", "#f0", "#f0"             \n\t" \
  "pshufh     "#f0", "#f0", "#f4"             \n\t" \
  "mov.d      "#f2", "#f0"                    \n\t"

#define MMI_SumSubDiv2_SINGLE(f0, f2, f4, f6) \
  "psrah      "#f4", "#f2", "#f6"             \n\t" \
  "paddh      "#f4", "#f4", "#f0"             \n\t" \
  "psrah      "#f0", "#f0", "#f6"             \n\t" \
  "psubh      "#f0", "#f0", "#f2"             \n\t"

#define MMI_IDCT_SINGLE(f0, f2, f4, f6, f8, f10, f12) \
  MMI_SumSub_SINGLE(f6, f8, f10)             \
  MMI_SumSubDiv2_SINGLE(f4, f2, f0, f12)     \
  MMI_SumSub_SINGLE(f0, f6, f10)             \
  MMI_SumSub_SINGLE(f4, f8, f10)

#define MMI_StoreDiff4P_SINGLE(f0, f2, f4, f6, r0, r1, f8) \
  "gsldlc1    "#f2", 0x7("#r1")               \n\t" \
  "gsldrc1    "#f2", 0x0("#r1")               \n\t" \
  "punpcklbh  "#f2", "#f2", "#f6"             \n\t" \
  "paddh      "#f0", "#f0", "#f4"             \n\t" \
  "psrah      "#f0", "#f0", "#f8"             \n\t" \
  "paddsh     "#f0", "#f0", "#f2"             \n\t" \
  "packushb   "#f0", "#f0", "#f2"             \n\t" \
  "gsswlc1    "#f0", 0x3("#r0")               \n\t" \
  "gsswrc1    "#f0", 0x0("#r0")               \n\t"

#define SUMH_HORIZON(f0, f2, f4, f6, f8) \
  "paddh      "#f0", "#f0", "#f2"                       \n\t" \
  "punpckhhw  "#f2", "#f0", "#f8"                       \n\t" \
  "punpcklhw  "#f0", "#f0", "#f8"                       \n\t" \
  "paddw      "#f0", "#f0", "#f2"                       \n\t" \
  "punpckhwd  "#f2", "#f0", "#f0"                       \n\t" \
  "paddw      "#f0", "#f0", "#f2"                       \n\t"

#define LOAD_COLUMN(f0, f2, f4, f6, f8, f10, f12, f14, r0, r1, r2) \
  "daddu      "#r2", "#r0", "#r1"                       \n\t" \
  "gsldlc1    "#f0", 0x7("#r0")                         \n\t" \
  "gsldlc1    "#f4", 0x7("#r2")                         \n\t" \
  "gsldrc1    "#f0", 0x0("#r0")                         \n\t" \
  "gsldrc1    "#f4", 0x0("#r2")                         \n\t" \
  "punpcklbh  "#f0", "#f0", "#f4"                       \n\t" \
  "daddu      "#r0", "#r2", "#r1"                       \n\t" \
  "daddu      "#r2", "#r0", "#r1"                       \n\t" \
  "gsldlc1    "#f8", 0x7("#r0")                         \n\t" \
  "gsldlc1    "#f4", 0x7("#r2")                         \n\t" \
  "gsldrc1    "#f8", 0x0("#r0")                         \n\t" \
  "gsldrc1    "#f4", 0x0("#r2")                         \n\t" \
  "punpcklbh  "#f8", "#f8", "#f4"                       \n\t" \
  "punpckhhw  "#f2", "#f0", "#f8"                       \n\t" \
  "punpcklhw  "#f0", "#f0", "#f8"                       \n\t" \
  "daddu      "#r0", "#r2", "#r1"                       \n\t" \
  "daddu      "#r2", "#r0", "#r1"                       \n\t" \
  "gsldlc1    "#f12", 0x7("#r0")                        \n\t" \
  "gsldlc1    "#f4", 0x7("#r2")                         \n\t" \
  "gsldrc1    "#f12", 0x0("#r0")                        \n\t" \
  "gsldrc1    "#f4", 0x0("#r2")                         \n\t" \
  "punpcklbh  "#f12", "#f12", "#f4"                     \n\t" \
  "daddu      "#r0", "#r2", "#r1"                       \n\t" \
  "daddu      "#r2", "#r0", "#r1"                       \n\t" \
  "gsldlc1    "#f8", 0x7("#r0")                         \n\t" \
  "gsldlc1    "#f4", 0x7("#r2")                         \n\t" \
  "gsldrc1    "#f8", 0x0("#r0")                         \n\t" \
  "gsldrc1    "#f4", 0x0("#r2")                         \n\t" \
  "punpcklbh  "#f8", "#f8", "#f4"                       \n\t" \
  "punpckhhw  "#f14", "#f12", "#f8"                     \n\t" \
  "punpcklhw  "#f12", "#f12", "#f8"                     \n\t" \
  "daddu      "#r0", "#r2", "#r1"                       \n\t" \
  "punpcklwd  "#f0", "#f2", "#f14"                      \n\t" \
  "punpckhwd  "#f2", "#f2", "#f14"                      \n\t"

#define LOAD_COLUMN_C(f0, f2, f4, f6, r0, r1, r2) \
  "daddu      "#r2", "#r0", "#r1"                       \n\t" \
  "gsldlc1    "#f0", 0x7("#r0")                         \n\t" \
  "gsldlc1    "#f2", 0x7("#r2")                         \n\t" \
  "gsldrc1    "#f0", 0x0("#r0")                         \n\t" \
  "gsldrc1    "#f2", 0x0("#r2")                         \n\t" \
  "punpcklbh  "#f0", "#f0", "#f2"                       \n\t" \
  "daddu      "#r0", "#r2", "#r1"                       \n\t" \
  "daddu      "#r2", "#r0", "#r1"                       \n\t" \
  "gsldlc1    "#f4", 0x7("#r0")                         \n\t" \
  "gsldlc1    "#f2", 0x7("#r2")                         \n\t" \
  "gsldrc1    "#f4", 0x0("#r0")                         \n\t" \
  "gsldrc1    "#f2", 0x0("#r2")                         \n\t" \
  "punpcklbh  "#f4", "#f4", "#f2"                       \n\t" \
  "punpckhhw  "#f0", "#f0", "#f4"                       \n\t" \
  "daddu      "#r0", "#r2", "#r1"                       \n\t"

/**
 * backup register
 */
#define BACKUP_REG \
   double __back_temp[8];                                      \
   if (_MIPS_SIM == _ABI64)                                    \
   __asm__ volatile (                                          \
     "gssqc1       $f25,      $f24,       0x00(%[temp])  \n\t" \
     "gssqc1       $f27,      $f26,       0x10(%[temp])  \n\t" \
     "gssqc1       $f29,      $f28,       0x20(%[temp])  \n\t" \
     "gssqc1       $f31,      $f30,       0x30(%[temp])  \n\t" \
     :                                                         \
     : [temp]"r"(__back_temp)                                  \
     : "memory"                                                \
   );                                                          \
  else                                                         \
   __asm__ volatile (                                          \
     "gssqc1       $f22,      $f20,       0x00(%[temp])  \n\t" \
     "gssqc1       $f26,      $f24,       0x10(%[temp])  \n\t" \
     "gssqc1       $f30,      $f28,       0x20(%[temp])  \n\t" \
     :                                                         \
     : [temp]"r"(__back_temp)                                  \
     : "memory"                                                \
   );

/**
 * recover register
 */
#define RECOVER_REG \
   if (_MIPS_SIM == _ABI64)                                    \
   __asm__ volatile (                                          \
     "gslqc1       $f25,      $f24,       0x00(%[temp])  \n\t" \
     "gslqc1       $f27,      $f26,       0x10(%[temp])  \n\t" \
     "gslqc1       $f29,      $f28,       0x20(%[temp])  \n\t" \
     "gslqc1       $f31,      $f30,       0x30(%[temp])  \n\t" \
     :                                                         \
     : [temp]"r"(__back_temp)                                  \
     : "memory"                                                \
   );                                                          \
   else                                                        \
   __asm__ volatile (                                          \
     "gslqc1       $f22,      $f20,       0x00(%[temp])  \n\t" \
     "gslqc1       $f26,      $f24,       0x10(%[temp])  \n\t" \
     "gslqc1       $f30,      $f28,       0x20(%[temp])  \n\t" \
     :                                                         \
     : [temp]"r"(__back_temp)                                  \
     : "memory"                                                \
   );

# define OK             1
# define NOTOK          0

#endif  /* ASMDEFS_MMI_H_ */