ref: f47d263f1bb01fe7701249fe5df4ac2e1534bd51
parent: 238bb38513dbc29081870757887c17c0c2c383e0
author: David Turner <[email protected]>
date: Mon Sep 1 22:21:58 EDT 2008
* include/freetype/ftoption.h, include/freetype/ftconfig.h, builds/unix/ftconfig.in, include/freetype/freetype.h, src/base/ftcalc.c: Make FT_MulFix an inlined function. Also provide an assembler implementation for ARM architectures. this is done to speedup FreeType a little (on x86 3% when loading+hinting, 10% when rendering, ARM savings are more important though). Disable this by undefining FT_CONFIG_OPTION_INLINE_MULFIX in ftconfig.h
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,15 @@
2008-09-01 david turner <[email protected]>
+ * include/freetype/ftoption.h, include/freetype/ftconfig.h,
+ builds/unix/ftconfig.in, include/freetype/freetype.h,
+ src/base/ftcalc.c:
+ Make FT_MulFix an inlined function. Also provide an assembler
+ implementation for ARM architectures. this is done to speedup
+ FreeType a little (on x86 3% when loading+hinting, 10% when
+ rendering, ARM savings are more important though).
+ Disable this by undefining FT_CONFIG_OPTION_INLINE_MULFIX in
+ ftconfig.h
+
* include/freetype/ftadvanc.h, src/base/ftadvanc.c,
include/freetype/config/ftheader.h, include/freetype/freetype.h,
src/base/Jamfile, src/base/rules.mk, src/cff/cffdrivr.c,
--- a/builds/unix/ftconfig.in
+++ b/builds/unix/ftconfig.in
@@ -197,6 +197,67 @@
#endif /* FT_SIZEOF_LONG == 8 */
+#if !defined(FT_CONFIG_OPTION_NO_ASSEMBLER)
+/* provide assembler fragments for performance-critical
+ * functions. these must be defined static __inline__
+ * with GCC
+ */
+#if defined(__GNUC__)
+
+# if defined(__arm__) && !defined(__thumb__)
+# define FT_MULFIX_ASSEMBLER FT_MulFix_arm
+ static __inline__ FT_Int32
+ FT_MulFix_arm( FT_Int32 a, FT_Int32 b )
+ {
+ register FT_Int32 t, t2;
+ asm __volatile__ (
+ "smull %1, %2, %4, %3\n\t" /* (lo=%1,hi=%2) = a*b */
+ "mov %0, %2, asr #31\n\t" /* %0 = (hi >> 31) */
+ "add %0, %0, #0x8000\n\t" /* %0 += 0x8000 */
+ "adds %1, %1, %0\n\t" /* %1 += %0 */
+ "adc %2, %2, #0\n\t" /* %2 += carry */
+ "mov %0, %1, lsr #16\n\t" /* %0 = %1 >> 16 */
+ "orr %0, %2, lsl #16\n\t" /* %0 |= %2 << 16 */
+ : "=r"(a), "=&r"(t2), "=&r"(t)
+ : "r"(a), "r"(b)
+ );
+ return a;
+ }
+# endif /* __arm__ */
+
+# if defined(i386)
+# define FT_MULFIX_ASSEMBLER FT_MulFix_i386
+ static __inline__ FT_Int32
+ FT_MulFix_i386( FT_Int32 a, FT_Int32 b )
+ {
+ register FT_Int32 result;
+
+ __asm__ __volatile__ (
+ "imul %%edx\n"
+ "movl %%edx, %%ecx\n"
+ "sarl $31, %%ecx\n"
+ "addl $0x8000, %%ecx\n"
+ "addl %%ecx, %%eax\n"
+ "adcl $0, %%edx\n"
+ "shrl $16, %%eax\n"
+ "shll $16, %%edx\n"
+ "addl %%edx, %%eax\n"
+ : "=a"(result), "+d"(b)
+ : "a"(a)
+ : "%ecx"
+ );
+ return result;
+ }
+# endif /* i386 */
+#endif /* __GNUC__ */
+#endif /* !NO_ASSEMBLER */
+
+#ifdef FT_CONFIG_OPTION_INLINE_MULFIX
+# ifdef FT_MULFIX_ASSEMBLER
+# define FT_MULFIX_INLINED FT_MULFIX_ASSEMBLER
+# endif
+#endif
+
#define FT_BEGIN_STMNT do {
#define FT_END_STMNT } while ( 0 )
--- a/include/freetype/config/ftconfig.h
+++ b/include/freetype/config/ftconfig.h
@@ -225,6 +225,67 @@
#endif
+#if !defined(FT_CONFIG_OPTION_NO_ASSEMBLER)
+/* provide assembler fragments for performance-critical
+ * functions. these must be defined static __inline__
+ * with GCC
+ */
+#if defined(__GNUC__)
+
+# if defined(__arm__) && !defined(__thumb__)
+# define FT_MULFIX_ASSEMBLER FT_MulFix_arm
+ static __inline__ FT_Int32
+ FT_MulFix_arm( FT_Int32 a, FT_Int32 b )
+ {
+ register FT_Int32 t, t2;
+ asm __volatile__ (
+ "smull %1, %2, %4, %3\n\t" /* (lo=%1,hi=%2) = a*b */
+ "mov %0, %2, asr #31\n\t" /* %0 = (hi >> 31) */
+ "add %0, %0, #0x8000\n\t" /* %0 += 0x8000 */
+ "adds %1, %1, %0\n\t" /* %1 += %0 */
+ "adc %2, %2, #0\n\t" /* %2 += carry */
+ "mov %0, %1, lsr #16\n\t" /* %0 = %1 >> 16 */
+ "orr %0, %2, lsl #16\n\t" /* %0 |= %2 << 16 */
+ : "=r"(a), "=&r"(t2), "=&r"(t)
+ : "r"(a), "r"(b)
+ );
+ return a;
+ }
+# endif /* __arm__ */
+
+# if defined(i386)
+# define FT_MULFIX_ASSEMBLER FT_MulFix_i386
+ static __inline__ FT_Int32
+ FT_MulFix_i386( FT_Int32 a, FT_Int32 b )
+ {
+ register FT_Int32 result;
+
+ __asm__ __volatile__ (
+ "imul %%edx\n"
+ "movl %%edx, %%ecx\n"
+ "sarl $31, %%ecx\n"
+ "addl $0x8000, %%ecx\n"
+ "addl %%ecx, %%eax\n"
+ "adcl $0, %%edx\n"
+ "shrl $16, %%eax\n"
+ "shll $16, %%edx\n"
+ "addl %%edx, %%eax\n"
+ : "=a"(result), "+d"(b)
+ : "a"(a)
+ : "%ecx"
+ );
+ return result;
+ }
+# endif /* i386 */
+#endif /* __GNUC__ */
+#endif /* !NO_ASSEMBLER */
+
+#ifdef FT_CONFIG_OPTION_INLINE_MULFIX
+# ifdef FT_MULFIX_ASSEMBLER
+# define FT_MULFIX_INLINED FT_MULFIX_ASSEMBLER
+# endif
+#endif
+
/* determine whether we have a 64-bit int type for platforms without */
/* Autoconf */
--- a/include/freetype/config/ftoption.h
+++ b/include/freetype/config/ftoption.h
@@ -117,6 +117,26 @@
/*************************************************************************/
/* */
+ /* When this macro is defined, do not try to use an assembler version */
+ /* of performance-critical functions (e.g. FT_MulFix). you should only */
+ /* do that to verify that the assembler function works properly, or even */
+ /* to benchmarks the various implementations... */
+/* #define FT_CONFIG_OPTION_NO_ASSEMBLER */
+
+ /*************************************************************************/
+ /* */
+ /* When this macro is defined, try to use an inlined assembler version */
+ /* of the FT_MulFix function, which appears to be a hotspot when loading */
+ /* and hinting glyphs. */
+ /* */
+ /* note that if your compiler/cpu isn't supported, this will default to */
+ /* the standard and portable implementation found in src/base/ftcalc.c */
+ /* */
+#define FT_CONFIG_OPTION_INLINE_MULFIX
+
+
+ /*************************************************************************/
+ /* */
/* LZW-compressed file support. */
/* */
/* FreeType now handles font files that have been compressed with the */
--- a/include/freetype/freetype.h
+++ b/include/freetype/freetype.h
@@ -3468,10 +3468,13 @@
/* _second_ argument of this function; this can make a great */
/* difference. */
/* */
+#ifdef FT_MULFIX_INLINED
+# define FT_MulFix(a,b) FT_MULFIX_INLINED(a,b)
+#else
FT_EXPORT( FT_Long )
FT_MulFix( FT_Long a,
FT_Long b );
-
+#endif
/*************************************************************************/
/* */
--- a/src/base/ftcalc.c
+++ b/src/base/ftcalc.c
@@ -38,6 +38,9 @@
#include FT_INTERNAL_DEBUG_H
#include FT_INTERNAL_OBJECTS_H
+#ifdef FT_MULFIX_INLINED
+#undef FT_MulFix
+#endif
/* we need to define a 64-bits data type here */
@@ -193,6 +196,9 @@
FT_MulFix( FT_Long a,
FT_Long b )
{
+#ifdef FT_MULFIX_ASSEMBLER
+ return FT_MULFIX_ASSEMBLER(a,b);
+#else
FT_Int s = 1;
FT_Long c;
@@ -202,6 +208,7 @@
c = (FT_Long)( ( (FT_Int64)a * b + 0x8000L ) >> 16 );
return ( s > 0 ) ? c : -c ;
+#endif
}
@@ -413,30 +420,8 @@
FT_MulFix( FT_Long a,
FT_Long b )
{
- /* use inline assembly to speed up things a bit */
-
-#if defined( __GNUC__ ) && defined( i386 )
-
- FT_Long result;
-
-
- __asm__ __volatile__ (
- "imul %%edx\n"
- "movl %%edx, %%ecx\n"
- "sarl $31, %%ecx\n"
- "addl $0x8000, %%ecx\n"
- "addl %%ecx, %%eax\n"
- "adcl $0, %%edx\n"
- "shrl $16, %%eax\n"
- "shll $16, %%edx\n"
- "addl %%edx, %%eax\n"
- "mov %%eax, %0\n"
- : "=a"(result), "+d"(b)
- : "a"(a)
- : "%ecx"
- );
- return result;
-
+#ifdef FT_MULFIX_ASSEMBLER
+ return FT_MULFIX_ASSEMBLER(a,b);
#elif 0
/*