Automatically change MANGLE() into named inline asm operands when direct symbol reference in inline asm are not supported.

This is part of the patch-set for intel C inline asm on windows support

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
pull/293/head
Matt Oliver 11 years ago committed by Michael Niedermayer
parent b2d3a45598
commit 8236747511
  1. 3
      configure
  2. 3
      libavcodec/x86/cabac.h
  3. 2
      libavcodec/x86/cavsdsp.c
  4. 1
      libavcodec/x86/dsputil_mmx.c
  5. 2
      libavcodec/x86/h264_i386.h
  6. 2
      libavcodec/x86/idct_sse2_xvid.c
  7. 3
      libavcodec/x86/lpc.c
  8. 3
      libavcodec/x86/motion_est.c
  9. 1
      libavcodec/x86/simple_idct.c
  10. 6
      libavcodec/x86/vc1dsp_mmx.c
  11. 34
      libavutil/x86/asm.h
  12. 7
      libpostproc/postprocess_template.c
  13. 2
      libswresample/x86/resample_mmx.h
  14. 11
      libswscale/x86/rgb2rgb_template.c
  15. 12
      libswscale/x86/swscale_template.c
  16. 9
      libswscale/x86/yuv2rgb_template.c

3
configure vendored

@ -1691,6 +1691,7 @@ TOOLCHAIN_FEATURES="
ibm_asm ibm_asm
inline_asm_labels inline_asm_labels
inline_asm_nonlocal_labels inline_asm_nonlocal_labels
inline_asm_direct_symbol_refs
pragma_deprecated pragma_deprecated
rsync_contimeout rsync_contimeout
symver_asm_label symver_asm_label
@ -4306,6 +4307,8 @@ EOF
# check whether xmm clobbers are supported # check whether xmm clobbers are supported
check_inline_asm xmm_clobbers '"":::"%xmm0"' check_inline_asm xmm_clobbers '"":::"%xmm0"'
check_inline_asm inline_asm_direct_symbol_refs '"movl test, %eax"'
# check whether binutils is new enough to compile SSSE3/MMXEXT # check whether binutils is new enough to compile SSSE3/MMXEXT
enabled ssse3 && check_inline_asm ssse3_inline '"pabsw %xmm0, %xmm0"' enabled ssse3 && check_inline_asm ssse3_inline '"pabsw %xmm0, %xmm0"'
enabled mmxext && check_inline_asm mmxext_inline '"pmaxub %mm0, %mm1"' enabled mmxext && check_inline_asm mmxext_inline '"pmaxub %mm0, %mm1"'

@ -110,7 +110,7 @@
"2: \n\t" "2: \n\t"
#else /* BROKEN_RELOCATIONS */ #else /* BROKEN_RELOCATIONS */
#define TABLES_ARG #define TABLES_ARG NAMED_CONSTRAINTS_ADD(ff_h264_cabac_tables)
#define RIP_ARG #define RIP_ARG
#if HAVE_FAST_CMOV #if HAVE_FAST_CMOV
@ -184,6 +184,7 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c,
__asm__ volatile( __asm__ volatile(
"lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t" "lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t"
: "=&r"(tables) : "=&r"(tables)
: NAMED_CONSTRAINTS(ff_h264_cabac_tables)
); );
#endif #endif

@ -309,6 +309,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
\ \
: "+a"(src), "+c"(dst)\ : "+a"(src), "+c"(dst)\
: "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1)\ : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1)\
NAMED_CONSTRAINTS_ADD(MUL2)\
: "memory"\ : "memory"\
);\ );\
if(h==16){\ if(h==16){\
@ -324,6 +325,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
\ \
: "+a"(src), "+c"(dst)\ : "+a"(src), "+c"(dst)\
: "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1)\ : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1)\
NAMED_CONSTRAINTS_ADD(MUL2)\
: "memory"\ : "memory"\
);\ );\
}\ }\

@ -123,6 +123,7 @@ void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
put_signed_pixels_clamped_mmx_half(64) put_signed_pixels_clamped_mmx_half(64)
: "+&r" (pixels), "=&r" (line_skip3) : "+&r" (pixels), "=&r" (line_skip3)
: "r" (block), "r" (line_skip) : "r" (block), "r" (line_skip)
NAMED_CONSTRAINTS_ADD(ff_pb_80)
: "memory"); : "memory");
} }

@ -55,6 +55,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
__asm__ volatile( __asm__ volatile(
"lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t" "lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t"
: "=&r"(tables) : "=&r"(tables)
: NAMED_CONSTRAINTS(ff_h264_cabac_tables)
); );
#endif #endif
@ -130,6 +131,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
__asm__ volatile( __asm__ volatile(
"lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t" "lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t"
: "=&r"(tables) : "=&r"(tables)
: NAMED_CONSTRAINTS(ff_h264_cabac_tables)
); );
#endif #endif

@ -381,7 +381,7 @@ inline void ff_idct_xvid_sse2(short *block)
iLLM_PASS("%0") iLLM_PASS("%0")
"6: \n\t" "6: \n\t"
: "+r"(block) : "+r"(block)
: : NAMED_CONSTRAINTS(m127,iTab1,walkenIdctRounders,iTab2,iTab3,iTab4,tan3,tan1,tan2,sqrt2)
: XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" ,
"%xmm4" , "%xmm5" , "%xmm6" , "%xmm7" ,) "%xmm4" , "%xmm5" , "%xmm6" , "%xmm7" ,)
#if ARCH_X86_64 #if ARCH_X86_64

@ -72,6 +72,7 @@ static void lpc_apply_welch_window_sse2(const int32_t *data, int len,
"3: \n\t" "3: \n\t"
:"+&r"(i), "+&r"(j) :"+&r"(i), "+&r"(j)
:"r"(w_data+n2), "r"(data+n2), "m"(c), "r"(len) :"r"(w_data+n2), "r"(data+n2), "m"(c), "r"(len)
NAMED_CONSTRAINTS_ADD(pd_1,pd_2)
XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3", XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
"%xmm5", "%xmm6", "%xmm7") "%xmm5", "%xmm6", "%xmm7")
); );
@ -116,6 +117,7 @@ static void lpc_compute_autocorr_sse2(const double *data, int len, int lag,
"movsd %%xmm2, 16(%1) \n\t" "movsd %%xmm2, 16(%1) \n\t"
:"+&r"(i) :"+&r"(i)
:"r"(autoc+j), "r"(data+len), "r"(data+len-j) :"r"(autoc+j), "r"(data+len), "r"(data+len-j)
NAMED_CONSTRAINTS_ADD(pd_1)
:"memory" :"memory"
); );
} else { } else {
@ -139,6 +141,7 @@ static void lpc_compute_autocorr_sse2(const double *data, int len, int lag,
"movsd %%xmm1, %2 \n\t" "movsd %%xmm1, %2 \n\t"
:"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]) :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1])
:"r"(data+len), "r"(data+len-j) :"r"(data+len), "r"(data+len-j)
NAMED_CONSTRAINTS_ADD(pd_1)
); );
} }
} }

@ -193,7 +193,8 @@ static inline void sad8_4_mmxext(uint8_t *blk1, uint8_t *blk2,
"sub $2, %0 \n\t" "sub $2, %0 \n\t"
" jg 1b \n\t" " jg 1b \n\t"
: "+r" (h), "+r" (blk1), "+r" (blk2) : "+r" (h), "+r" (blk1), "+r" (blk2)
: "r" ((x86_reg) stride)); : "r" ((x86_reg) stride)
NAMED_CONSTRAINTS_ADD(bone));
} }
static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2,

@ -1143,6 +1143,7 @@ Temp
"9: \n\t" "9: \n\t"
:: "r" (block), "r" (temp), "r" (coeffs) :: "r" (block), "r" (temp), "r" (coeffs)
NAMED_CONSTRAINTS_ADD(wm1010,d40000)
: "%eax" : "%eax"
); );
} }

@ -110,6 +110,7 @@ static void vc1_put_ver_16b_shift2_mmx(int16_t *dst,
: "+r"(src), "+r"(dst) : "+r"(src), "+r"(dst)
: "r"(stride), "r"(-2*stride), : "r"(stride), "r"(-2*stride),
"m"(shift), "m"(rnd), "r"(9*stride-4) "m"(shift), "m"(rnd), "r"(9*stride-4)
NAMED_CONSTRAINTS_ADD(ff_pw_9)
: "%"REG_c, "memory" : "%"REG_c, "memory"
); );
} }
@ -154,6 +155,7 @@ static void OPNAME ## vc1_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride,\
"jnz 1b \n\t"\ "jnz 1b \n\t"\
: "+r"(h), "+r" (src), "+r" (dst)\ : "+r"(h), "+r" (src), "+r" (dst)\
: "r"(stride), "m"(rnd)\ : "r"(stride), "m"(rnd)\
NAMED_CONSTRAINTS_ADD(ff_pw_128,ff_pw_9)\
: "memory"\ : "memory"\
);\ );\
} }
@ -212,6 +214,7 @@ static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\
: "+r"(src), "+r"(dst)\ : "+r"(src), "+r"(dst)\
: "r"(offset), "r"(-2*offset), "g"(stride), "m"(rnd),\ : "r"(offset), "r"(-2*offset), "g"(stride), "m"(rnd),\
"g"(stride-offset)\ "g"(stride-offset)\
NAMED_CONSTRAINTS_ADD(ff_pw_9)\
: "%"REG_c, "memory"\ : "%"REG_c, "memory"\
);\ );\
} }
@ -314,6 +317,7 @@ vc1_put_ver_16b_ ## NAME ## _mmx(int16_t *dst, const uint8_t *src, \
: "+r"(h), "+r" (src), "+r" (dst) \ : "+r"(h), "+r" (src), "+r" (dst) \
: "r"(src_stride), "r"(3*src_stride), \ : "r"(src_stride), "r"(3*src_stride), \
"m"(rnd), "m"(shift) \ "m"(rnd), "m"(shift) \
NAMED_CONSTRAINTS_ADD(ff_pw_3,ff_pw_53,ff_pw_18) \
: "memory" \ : "memory" \
); \ ); \
} }
@ -351,6 +355,7 @@ OPNAME ## vc1_hor_16b_ ## NAME ## _mmx(uint8_t *dst, x86_reg stride, \
"jnz 1b \n\t" \ "jnz 1b \n\t" \
: "+r"(h), "+r" (src), "+r" (dst) \ : "+r"(h), "+r" (src), "+r" (dst) \
: "r"(stride), "m"(rnd) \ : "r"(stride), "m"(rnd) \
NAMED_CONSTRAINTS_ADD(ff_pw_3,ff_pw_18,ff_pw_53,ff_pw_128) \
: "memory" \ : "memory" \
); \ ); \
} }
@ -386,6 +391,7 @@ OPNAME ## vc1_## NAME ## _mmx(uint8_t *dst, const uint8_t *src, \
"jnz 1b \n\t" \ "jnz 1b \n\t" \
: "+r"(h), "+r" (src), "+r" (dst) \ : "+r"(h), "+r" (src), "+r" (dst) \
: "r"(offset), "r"(3*offset), "g"(stride), "m"(rnd) \ : "r"(offset), "r"(3*offset), "g"(stride), "m"(rnd) \
NAMED_CONSTRAINTS_ADD(ff_pw_53,ff_pw_18,ff_pw_3) \
: "memory" \ : "memory" \
); \ ); \
} }

@ -107,6 +107,40 @@ typedef int x86_reg;
# define LOCAL_MANGLE(a) #a # define LOCAL_MANGLE(a) #a
#endif #endif
#if HAVE_INLINE_ASM_DIRECT_SYMBOL_REFS
# define MANGLE(a) EXTERN_PREFIX LOCAL_MANGLE(a) # define MANGLE(a) EXTERN_PREFIX LOCAL_MANGLE(a)
# define NAMED_CONSTRAINTS_ADD(...)
# define NAMED_CONSTRAINTS(...)
#else
/* When direct symbol references are used in code passed to a compiler that does not support them
* then these references need to be converted to named asm constraints instead.
* Instead of returning a direct symbol MANGLE now returns a named constraint for that specific symbol.
* In order for this to work there must also be a corresponding entry in the asm-interface. To add this
* entry use the macro NAMED_CONSTRAINTS() and pass in a list of each symbol reference used in the
* corresponding block of code. (e.g. NAMED_CONSTRAINTS(var1,var2,var3) where var1 is the first symbol etc. ).
* If there are already existing constraints then use NAMED_CONSTRAINTS_ADD to add to the existing constraint list.
*/
# define MANGLE(a) "%["#a"]"
// Intel/MSVC does not correctly expand va-args so we need a rather ugly hack in order to get it to work
# define FE_0(P,X) P(X)
# define FE_1(P,X,X1) P(X), FE_0(P,X1)
# define FE_2(P,X,X1,X2) P(X), FE_1(P,X1,X2)
# define FE_3(P,X,X1,X2,X3) P(X), FE_2(P,X1,X2,X3)
# define FE_4(P,X,X1,X2,X3,X4) P(X), FE_3(P,X1,X2,X3,X4)
# define FE_5(P,X,X1,X2,X3,X4,X5) P(X), FE_4(P,X1,X2,X3,X4,X5)
# define FE_6(P,X,X1,X2,X3,X4,X5,X6) P(X), FE_5(P,X1,X2,X3,X4,X5,X6)
# define FE_7(P,X,X1,X2,X3,X4,X5,X6,X7) P(X), FE_6(P,X1,X2,X3,X4,X5,X6,X7)
# define FE_8(P,X,X1,X2,X3,X4,X5,X6,X7,X8) P(X), FE_7(P,X1,X2,X3,X4,X5,X6,X7,X8)
# define FE_9(P,X,X1,X2,X3,X4,X5,X6,X7,X8,X9) P(X), FE_8(P,X1,X2,X3,X4,X5,X6,X7,X8,X9)
# define GET_FE_IMPL(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9,NAME,...) NAME
# define GET_FE(A) GET_FE_IMPL A
# define GET_FE_GLUE(x, y) x y
# define FOR_EACH_VA(P,...) GET_FE_GLUE(GET_FE((__VA_ARGS__,FE_9,FE_8,FE_7,FE_6,FE_5,FE_4,FE_3,FE_2,FE_1,FE_0)), (P,__VA_ARGS__))
# define NAME_CONSTRAINT(x) [x] "m"(x)
// Parameters are a list of each symbol reference required
# define NAMED_CONSTRAINTS_ADD(...) , FOR_EACH_VA(NAME_CONSTRAINT,__VA_ARGS__)
// Same but without comma for when there are no previously defined constraints
# define NAMED_CONSTRAINTS(...) FOR_EACH_VA(NAME_CONSTRAINT,__VA_ARGS__)
#endif
#endif /* AVUTIL_X86_ASM_H */ #endif /* AVUTIL_X86_ASM_H */

@ -490,6 +490,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
: :
: "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb) : "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb)
NAMED_CONSTRAINTS_ADD(b01)
: "%"REG_a, "%"REG_c : "%"REG_a, "%"REG_c
); );
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
@ -755,6 +756,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
: :
: "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb) : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
NAMED_CONSTRAINTS_ADD(b80,b00,b01)
: "%"REG_a, "%"REG_c : "%"REG_a, "%"REG_c
); );
@ -1042,6 +1044,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
: "+r" (src) : "+r" (src)
: "r" ((x86_reg)stride), "m" (c->pQPb), "r"(tmp) : "r" ((x86_reg)stride), "m" (c->pQPb), "r"(tmp)
NAMED_CONSTRAINTS_ADD(w05,w20)
: "%"REG_a : "%"REG_a
); );
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
@ -1313,6 +1316,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
"1: \n\t" "1: \n\t"
: : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2), "q"(tmp) : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2), "q"(tmp)
NAMED_CONSTRAINTS_ADD(deringThreshold,b00,b02,b08)
: "%"REG_a, "%"REG_d, "%"REG_SP : "%"REG_a, "%"REG_d, "%"REG_SP
); );
#else // HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) #else // HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW)
@ -2446,6 +2450,7 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc))
"4: \n\t" "4: \n\t"
:: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast) :: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast)
NAMED_CONSTRAINTS_ADD(b80)
: "%"REG_a, "%"REG_d, "%"REG_c, "memory" : "%"REG_a, "%"REG_d, "%"REG_c, "memory"
); );
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
@ -2790,6 +2795,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
: "+&r"(src) : "+&r"(src)
: "r" ((x86_reg)step), "m" (c->pQPb), "r"(sums), "g"(src) : "r" ((x86_reg)step), "m" (c->pQPb), "r"(sums), "g"(src)
NAMED_CONSTRAINTS_ADD(w04)
); );
src+= step; // src points to begin of the 8x8 Block src+= step; // src points to begin of the 8x8 Block
@ -3061,6 +3067,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
: "+r" (temp_src) : "+r" (temp_src)
: "r" ((x86_reg)step), "m" (c->pQPb), "m"(eq_mask), "r"(tmp) : "r" ((x86_reg)step), "m" (c->pQPb), "m"(eq_mask), "r"(tmp)
NAMED_CONSTRAINTS_ADD(w05,w20)
: "%"REG_a : "%"REG_a
); );
} }

@ -46,6 +46,7 @@ __asm__ volatile(\
: "r" (((uint8_t*)(src+sample_index))-len),\ : "r" (((uint8_t*)(src+sample_index))-len),\
"r" (((uint8_t*)filter)-len),\ "r" (((uint8_t*)filter)-len),\
"r" (dst+dst_index)\ "r" (dst+dst_index)\
NAMED_CONSTRAINTS_ADD(ff_resample_int16_rounder)\
); );
#define COMMON_CORE_INT16_SSE2 \ #define COMMON_CORE_INT16_SSE2 \
@ -69,4 +70,5 @@ __asm__ volatile(\
: "r" (((uint8_t*)(src+sample_index))-len),\ : "r" (((uint8_t*)(src+sample_index))-len),\
"r" (((uint8_t*)filter)-len),\ "r" (((uint8_t*)filter)-len),\
"r" (dst+dst_index)\ "r" (dst+dst_index)\
NAMED_CONSTRAINTS_ADD(ff_resample_int16_rounder)\
); );

@ -163,6 +163,7 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"movq %%mm5, %%mm7 \n\t" "movq %%mm5, %%mm7 \n\t"
STORE_BGR24_MMX STORE_BGR24_MMX
:: "r"(dest), "r"(s) :: "r"(dest), "r"(s)
NAMED_CONSTRAINTS_ADD(mask24l,mask24h)
:"memory"); :"memory");
dest += 24; dest += 24;
s += 32; s += 32;
@ -785,6 +786,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
:"=m"(*d) :"=m"(*d)
:"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) :"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
NAMED_CONSTRAINTS_ADD(mul15_mid,mul15_hi)
:"memory"); :"memory");
/* borrowed 32 to 24 */ /* borrowed 32 to 24 */
__asm__ volatile( __asm__ volatile(
@ -801,6 +803,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
STORE_BGR24_MMX STORE_BGR24_MMX
:: "r"(d), "m"(*s) :: "r"(d), "m"(*s)
NAMED_CONSTRAINTS_ADD(mask24l,mask24h)
:"memory"); :"memory");
d += 24; d += 24;
s += 8; s += 8;
@ -890,6 +893,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t" "por %%mm5, %%mm3 \n\t"
:"=m"(*d) :"=m"(*d)
:"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) :"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
NAMED_CONSTRAINTS_ADD(mul15_mid,mul16_mid,mul15_hi)
:"memory"); :"memory");
/* borrowed 32 to 24 */ /* borrowed 32 to 24 */
__asm__ volatile( __asm__ volatile(
@ -906,6 +910,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
STORE_BGR24_MMX STORE_BGR24_MMX
:: "r"(d), "m"(*s) :: "r"(d), "m"(*s)
NAMED_CONSTRAINTS_ADD(mask24l,mask24h)
:"memory"); :"memory");
d += 24; d += 24;
s += 8; s += 8;
@ -966,6 +971,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
"pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
PACK_RGB32 PACK_RGB32
::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r) ,"m"(mul15_mid) ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r) ,"m"(mul15_mid)
NAMED_CONSTRAINTS_ADD(mul15_hi)
:"memory"); :"memory");
d += 16; d += 16;
s += 4; s += 4;
@ -1009,6 +1015,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
"pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
PACK_RGB32 PACK_RGB32
::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid) ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid)
NAMED_CONSTRAINTS_ADD(mul16_mid,mul15_hi)
:"memory"); :"memory");
d += 16; d += 16;
s += 4; s += 4;
@ -1133,6 +1140,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"2: \n\t" "2: \n\t"
: "+a" (mmx_size) : "+a" (mmx_size)
: "r" (src-mmx_size), "r"(dst-mmx_size) : "r" (src-mmx_size), "r"(dst-mmx_size)
NAMED_CONSTRAINTS_ADD(mask24r,mask24g,mask24b)
); );
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
@ -1468,6 +1476,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid
:: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ), :: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ),
"r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2), "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
"g" (-mmxSize) "g" (-mmxSize)
NAMED_CONSTRAINTS_ADD(mmx_ff)
: "%"REG_a : "%"REG_a
); );
@ -1689,6 +1698,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"add $8, %%"REG_a" \n\t" "add $8, %%"REG_a" \n\t"
" js 1b \n\t" " js 1b \n\t"
: : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width), "r"(rgb2yuv) : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width), "r"(rgb2yuv)
NAMED_CONSTRAINTS_ADD(ff_w1111,ff_bgr2YOffset)
: "%"REG_a, "%"REG_d : "%"REG_a, "%"REG_d
); );
ydst += lumStride; ydst += lumStride;
@ -1837,6 +1847,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"add $4, %%"REG_a" \n\t" "add $4, %%"REG_a" \n\t"
" js 1b \n\t" " js 1b \n\t"
: : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth), "r"(rgb2yuv) : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth), "r"(rgb2yuv)
NAMED_CONSTRAINTS_ADD(ff_w1111,ff_bgr2UVOffset)
: "%"REG_a, "%"REG_d : "%"REG_a, "%"REG_d
); );

@ -172,6 +172,7 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
:: "r" (&c->redDither), \ :: "r" (&c->redDither), \
"m" (dummy), "m" (dummy), "m" (dummy),\ "m" (dummy), "m" (dummy), "m" (dummy),\
"r" (dest), "m" (dstW_reg), "m"(uv_off) \ "r" (dest), "m" (dstW_reg), "m"(uv_off) \
NAMED_CONSTRAINTS_ADD(bF8,bFC) \
: "%"REG_a, "%"REG_d, "%"REG_S \ : "%"REG_a, "%"REG_d, "%"REG_S \
); );
@ -680,6 +681,7 @@ static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
:: "r" (&c->redDither), :: "r" (&c->redDither),
"m" (dummy), "m" (dummy), "m" (dummy), "m" (dummy), "m" (dummy), "m" (dummy),
"r" (dest), "m" (dstW_reg), "m"(uv_off) "r" (dest), "m" (dstW_reg), "m"(uv_off)
NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
); );
} }
@ -704,6 +706,7 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
:: "r" (&c->redDither), :: "r" (&c->redDither),
"m" (dummy), "m" (dummy), "m" (dummy), "m" (dummy), "m" (dummy), "m" (dummy),
"r" (dest), "m" (dstW_reg), "m"(uv_off) "r" (dest), "m" (dstW_reg), "m"(uv_off)
NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
); );
} }
@ -931,6 +934,7 @@ static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
); );
} }
@ -960,6 +964,7 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
NAMED_CONSTRAINTS_ADD(bF8)
); );
} }
@ -989,6 +994,7 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
NAMED_CONSTRAINTS_ADD(bF8,bFC)
); );
} }
@ -1262,6 +1268,7 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
); );
} else { } else {
const int16_t *ubuf1 = ubuf[1]; const int16_t *ubuf1 = ubuf[1];
@ -1276,6 +1283,7 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
); );
} }
} }
@ -1307,6 +1315,7 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
NAMED_CONSTRAINTS_ADD(bF8)
); );
} else { } else {
const int16_t *ubuf1 = ubuf[1]; const int16_t *ubuf1 = ubuf[1];
@ -1327,6 +1336,7 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
NAMED_CONSTRAINTS_ADD(bF8)
); );
} }
} }
@ -1358,6 +1368,7 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
NAMED_CONSTRAINTS_ADD(bF8,bFC)
); );
} else { } else {
const int16_t *ubuf1 = ubuf[1]; const int16_t *ubuf1 = ubuf[1];
@ -1378,6 +1389,7 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
NAMED_CONSTRAINTS_ADD(bF8,bFC)
); );
} }
} }

@ -134,10 +134,18 @@
"add $4, %0\n\t" \ "add $4, %0\n\t" \
"js 1b\n\t" \ "js 1b\n\t" \
#if COMPILE_TEMPLATE_MMXEXT
#define RGB_PACK24_B_OPERANDS NAMED_CONSTRAINTS_ADD(mask1101,mask0110,mask0100,mask0010,mask1001)
#else
#define RGB_PACK24_B_OPERANDS
#endif
#define YUV2RGB_OPERANDS \ #define YUV2RGB_OPERANDS \
: "+r" (index), "+r" (image) \ : "+r" (index), "+r" (image) \
: "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \ : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \
"r" (py - 2*index) \ "r" (py - 2*index) \
NAMED_CONSTRAINTS_ADD(mmx_00ffw,pb_03,pb_07,mmx_redmask,pb_e0) \
RGB_PACK24_B_OPERANDS \
: "memory" \ : "memory" \
); \ ); \
} \ } \
@ -146,6 +154,7 @@
: "+r" (index), "+r" (image) \ : "+r" (index), "+r" (image) \
: "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \ : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \
"r" (py - 2*index), "r" (pa - 2*index) \ "r" (py - 2*index), "r" (pa - 2*index) \
NAMED_CONSTRAINTS_ADD(mmx_00ffw) \
: "memory" \ : "memory" \
); \ ); \
} \ } \

Loading…
Cancel
Save