shithub: freetype+ttf2subf

Download patch

ref: f47d263f1bb01fe7701249fe5df4ac2e1534bd51
parent: 238bb38513dbc29081870757887c17c0c2c383e0
author: David Turner <[email protected]>
date: Mon Sep 1 22:21:58 EDT 2008

* include/freetype/ftoption.h, include/freetype/ftconfig.h,
    builds/unix/ftconfig.in, include/freetype/freetype.h,
    src/base/ftcalc.c:
    Make FT_MulFix an inlined function. Also provide an assembler
    implementation for ARM architectures. this is done to speedup
    FreeType a little (on x86 3% when loading+hinting, 10% when
    rendering, ARM savings are more important though).
    Disable this by undefining FT_CONFIG_OPTION_INLINE_MULFIX in
    ftconfig.h

git/fs: mount .git/fs: mount/attach disallowed
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,15 @@
 2008-09-01  david turner <[email protected]>
 
+	* include/freetype/ftoption.h, include/freetype/ftconfig.h,
+	builds/unix/ftconfig.in, include/freetype/freetype.h,
+	src/base/ftcalc.c:
+	Make FT_MulFix an inlined function. Also provide an assembler
+	implementation for ARM architectures. this is done to speedup
+	FreeType a little (on x86 3% when loading+hinting, 10% when
+	rendering, ARM savings are more important though).
+	Disable this by undefining FT_CONFIG_OPTION_INLINE_MULFIX in
+	ftconfig.h
+
 	* include/freetype/ftadvanc.h, src/base/ftadvanc.c,
 	include/freetype/config/ftheader.h, include/freetype/freetype.h,
 	src/base/Jamfile, src/base/rules.mk, src/cff/cffdrivr.c,
--- a/builds/unix/ftconfig.in
+++ b/builds/unix/ftconfig.in
@@ -197,6 +197,67 @@
 
 #endif /* FT_SIZEOF_LONG == 8 */
 
+#if !defined(FT_CONFIG_OPTION_NO_ASSEMBLER)
+/* provide assembler fragments for performance-critical
+ * functions. these must be defined static __inline__
+ * with GCC
+ */
+#if defined(__GNUC__)
+
+#  if defined(__arm__) && !defined(__thumb__)
+#    define FT_MULFIX_ASSEMBLER   FT_MulFix_arm
+    static __inline__ FT_Int32
+    FT_MulFix_arm( FT_Int32  a, FT_Int32  b )
+    {
+        register FT_Int32  t, t2;
+        asm __volatile__ (
+            "smull  %1, %2, %4, %3\n\t"   /* (lo=%1,hi=%2) = a*b */
+            "mov    %0, %2, asr #31\n\t"  /* %0  = (hi >> 31) */
+            "add    %0, %0, #0x8000\n\t"  /* %0 += 0x8000 */
+            "adds   %1, %1, %0\n\t"       /* %1 += %0 */
+            "adc    %2, %2, #0\n\t"       /* %2 += carry */
+            "mov    %0, %1, lsr #16\n\t"  /* %0  = %1 >> 16 */
+            "orr    %0, %2, lsl #16\n\t"  /* %0 |= %2 << 16 */
+            : "=r"(a), "=&r"(t2), "=&r"(t)
+            : "r"(a), "r"(b)
+            );
+        return a;
+    }
+#  endif /* __arm__ */
+
+#  if defined(i386)
+#    define FT_MULFIX_ASSEMBLER  FT_MulFix_i386
+    static __inline__ FT_Int32
+    FT_MulFix_i386( FT_Int32  a, FT_Int32  b )
+    {
+        register FT_Int32  result;
+
+        __asm__ __volatile__ (
+          "imul  %%edx\n"
+          "movl  %%edx, %%ecx\n"
+          "sarl  $31, %%ecx\n"
+          "addl  $0x8000, %%ecx\n"
+          "addl  %%ecx, %%eax\n"
+          "adcl  $0, %%edx\n"
+          "shrl  $16, %%eax\n"
+          "shll  $16, %%edx\n"
+          "addl  %%edx, %%eax\n"
+          : "=a"(result), "+d"(b)
+          : "a"(a)
+          : "%ecx"
+        );
+        return result;
+    }
+#  endif /* i386 */
+#endif /* __GNUC__ */
+#endif /* !NO_ASSEMBLER */
+
+#ifdef FT_CONFIG_OPTION_INLINE_MULFIX
+#  ifdef FT_MULFIX_ASSEMBLER
+#    define FT_MULFIX_INLINED   FT_MULFIX_ASSEMBLER
+#  endif
+#endif
+
 
 #define FT_BEGIN_STMNT  do {
 #define FT_END_STMNT    } while ( 0 )
--- a/include/freetype/config/ftconfig.h
+++ b/include/freetype/config/ftconfig.h
@@ -225,6 +225,67 @@
 
 #endif
 
+#if !defined(FT_CONFIG_OPTION_NO_ASSEMBLER)
+/* provide assembler fragments for performance-critical
+ * functions. these must be defined static __inline__
+ * with GCC
+ */
+#if defined(__GNUC__)
+
+#  if defined(__arm__) && !defined(__thumb__)
+#    define FT_MULFIX_ASSEMBLER   FT_MulFix_arm
+    static __inline__ FT_Int32
+    FT_MulFix_arm( FT_Int32  a, FT_Int32  b )
+    {
+        register FT_Int32  t, t2;
+        asm __volatile__ (
+            "smull  %1, %2, %4, %3\n\t"   /* (lo=%1,hi=%2) = a*b */
+            "mov    %0, %2, asr #31\n\t"  /* %0  = (hi >> 31) */
+            "add    %0, %0, #0x8000\n\t"  /* %0 += 0x8000 */
+            "adds   %1, %1, %0\n\t"       /* %1 += %0 */
+            "adc    %2, %2, #0\n\t"       /* %2 += carry */
+            "mov    %0, %1, lsr #16\n\t"  /* %0  = %1 >> 16 */
+            "orr    %0, %2, lsl #16\n\t"  /* %0 |= %2 << 16 */
+            : "=r"(a), "=&r"(t2), "=&r"(t)
+            : "r"(a), "r"(b)
+            );
+        return a;
+    }
+#  endif /* __arm__ */
+
+#  if defined(i386)
+#    define FT_MULFIX_ASSEMBLER  FT_MulFix_i386
+    static __inline__ FT_Int32
+    FT_MulFix_i386( FT_Int32  a, FT_Int32  b )
+    {
+        register FT_Int32  result;
+
+        __asm__ __volatile__ (
+          "imul  %%edx\n"
+          "movl  %%edx, %%ecx\n"
+          "sarl  $31, %%ecx\n"
+          "addl  $0x8000, %%ecx\n"
+          "addl  %%ecx, %%eax\n"
+          "adcl  $0, %%edx\n"
+          "shrl  $16, %%eax\n"
+          "shll  $16, %%edx\n"
+          "addl  %%edx, %%eax\n"
+          : "=a"(result), "+d"(b)
+          : "a"(a)
+          : "%ecx"
+        );
+        return result;
+    }
+#  endif /* i386 */
+#endif /* __GNUC__ */
+#endif /* !NO_ASSEMBLER */
+
+#ifdef FT_CONFIG_OPTION_INLINE_MULFIX
+#  ifdef FT_MULFIX_ASSEMBLER
+#    define FT_MULFIX_INLINED   FT_MULFIX_ASSEMBLER
+#  endif
+#endif
+
 
   /* determine whether we have a 64-bit int type for platforms without */
   /* Autoconf                                                          */
--- a/include/freetype/config/ftoption.h
+++ b/include/freetype/config/ftoption.h
@@ -117,6 +117,26 @@
 
   /*************************************************************************/
   /*                                                                       */
+  /* When this macro is defined, do not try to use an assembler version    */
+  /* of performance-critical functions (e.g. FT_MulFix). you should only   */
+  /* do that to verify that the assembler function works properly, or even */
+  /* to benchmarks the various implementations...                          */
+/* #define FT_CONFIG_OPTION_NO_ASSEMBLER */
+
+  /*************************************************************************/
+  /*                                                                       */
+  /* When this macro is defined, try to use an inlined assembler version   */
+  /* of the FT_MulFix function, which appears to be a hotspot when loading */
+  /* and hinting glyphs.                                                   */
+  /*                                                                       */
+  /* note that if your compiler/cpu isn't supported, this will default to  */
+  /* the standard and portable implementation found in src/base/ftcalc.c   */
+  /*                                                                       */
+#define FT_CONFIG_OPTION_INLINE_MULFIX
+
+
+  /*************************************************************************/
+  /*                                                                       */
   /* LZW-compressed file support.                                          */
   /*                                                                       */
   /*   FreeType now handles font files that have been compressed with the  */
--- a/include/freetype/freetype.h
+++ b/include/freetype/freetype.h
@@ -3468,10 +3468,13 @@
   /*    _second_ argument of this function; this can make a great          */
   /*    difference.                                                        */
   /*                                                                       */
+#ifdef FT_MULFIX_INLINED
+#  define  FT_MulFix(a,b)  FT_MULFIX_INLINED(a,b)
+#else
   FT_EXPORT( FT_Long )
   FT_MulFix( FT_Long  a,
              FT_Long  b );
-
+#endif
 
   /*************************************************************************/
   /*                                                                       */
--- a/src/base/ftcalc.c
+++ b/src/base/ftcalc.c
@@ -38,6 +38,9 @@
 #include FT_INTERNAL_DEBUG_H
 #include FT_INTERNAL_OBJECTS_H
 
+#ifdef  FT_MULFIX_INLINED
+#undef  FT_MulFix
+#endif
 
 /* we need to define a 64-bits data type here */
 
@@ -193,6 +196,9 @@
   FT_MulFix( FT_Long  a,
              FT_Long  b )
   {
+#ifdef FT_MULFIX_ASSEMBLER
+    return FT_MULFIX_ASSEMBLER(a,b);
+#else
     FT_Int   s = 1;
     FT_Long  c;
 
@@ -202,6 +208,7 @@
 
     c = (FT_Long)( ( (FT_Int64)a * b + 0x8000L ) >> 16 );
     return ( s > 0 ) ? c : -c ;
+#endif
   }
 
 
@@ -413,30 +420,8 @@
   FT_MulFix( FT_Long  a,
              FT_Long  b )
   {
-    /* use inline assembly to speed up things a bit */
-
-#if defined( __GNUC__ ) && defined( i386 )
-
-    FT_Long  result;
-
-
-    __asm__ __volatile__ (
-      "imul  %%edx\n"
-      "movl  %%edx, %%ecx\n"
-      "sarl  $31, %%ecx\n"
-      "addl  $0x8000, %%ecx\n"
-      "addl  %%ecx, %%eax\n"
-      "adcl  $0, %%edx\n"
-      "shrl  $16, %%eax\n"
-      "shll  $16, %%edx\n"
-      "addl  %%edx, %%eax\n"
-      "mov   %%eax, %0\n"
-      : "=a"(result), "+d"(b)
-      : "a"(a)
-      : "%ecx"
-    );
-    return result;
-
+#ifdef FT_MULFIX_ASSEMBLER
+    return FT_MULFIX_ASSEMBLER(a,b);
 #elif 0
 
     /*