shithub: freetype+ttf2subf

Download patch

ref: 281679de8591f6884ea904c9c812e520ba3cc5dc
parent: dc3ff31c3fd00946917b2f07d2f6a70da36afe35
author: David Turner <david@freetype.org>
date: Wed May 17 09:34:21 EDT 2006

* include/freetype/internal/tttypes.h, src/autofit/afangles.c,
        src/autofit/afcjk.c, src/autofit/afhints.c, src/autofit/aflatin.c,
        src/autofit/aftypes.h, src/base/ftcalc.c, src/base/ftoutln.c,
        src/gzip/ftgzip.c, src/psaux/psconv.c, src/truetype/ttgload.c,
        src/type1/t1gload.c:

          this is a major patch used to drastically improve the performance
          of loading glyphs. This both speeds up loading the glypn vector
          themselves and the auto-fitter.

          note that we've started using inline assembler with GCC to
          implement FT_MulFix, given that this function is so damn
          important for the engine's performance.

          the resulting speed-up is about 25%.

git/fs: mount .git/fs: mount/attach disallowed
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,22 @@
 2006-05-17  David Turner  <david@freetype.org>
 
+        * include/freetype/internal/tttypes.h, src/autofit/afangles.c,
+        src/autofit/afcjk.c, src/autofit/afhints.c, src/autofit/aflatin.c,
+        src/autofit/aftypes.h, src/base/ftcalc.c, src/base/ftoutln.c,
+        src/gzip/ftgzip.c, src/psaux/psconv.c, src/truetype/ttgload.c,
+        src/type1/t1gload.c:
+
+          this is a major patch used to drastically improve the performance
+          of loading glyphs. This both speeds up loading the glypn vector
+          themselves and the auto-fitter.
+
+          note that we've started using inline assembler with GCC to
+          implement FT_MulFix, given that this function is so damn
+          important for the engine's performance.
+
+          the resulting speed-up is about 25%.
+
+
         * src/ftccmap.c (FTC_CMapCache_Lookup): changed the threshold
         used to detect rogue clients from 4 to 16. This is to prevent
         some segmentation faults with fonts like KozMinProVI-Regular.otf
@@ -8,7 +25,7 @@
 2007-05-17  Werner Lemberg  <wl@gnu.org>
 
 	* src/cff/cffload.c (cff_font_done): Deallocate subfont array.  This
-	fixes the first part of Savannah bug #16590. 
+	fixes the first part of Savannah bug #16590.
 
 2006-05-16  Werner Lemberg  <wl@gnu.org>
 
@@ -52,8 +69,8 @@
 	Update handling of re_italic and re_bold.
 
 2006-05-11  Masatake YAMATO  <jet@gyve.org>
- 
-	* builds/unix/ftsystem.c (FT_Stream_Open): Check errno only if 
+
+	* builds/unix/ftsystem.c (FT_Stream_Open): Check errno only if
 	read system call returns -1.
 	Remove a redundant parenthesis.
 
--- a/include/freetype/internal/tttypes.h
+++ b/include/freetype/internal/tttypes.h
@@ -1521,6 +1521,10 @@
     FT_Vector        pp3;
     FT_Vector        pp4;
 
+    /* since version 2.2.1 */
+    FT_Byte*         cursor;
+    FT_Byte*         limit;
+
   } TT_LoaderRec;
 
 
--- a/src/autofit/afangles.c
+++ b/src/autofit/afangles.c
@@ -20,8 +20,95 @@
 #include "aftypes.h"
 
 
-#if 1
+  FT_LOCAL_DEF( FT_Int )
+  af_corner_is_flat( FT_Pos   x_in,
+                     FT_Pos   y_in,
+                     FT_Pos   x_out,
+                     FT_Pos   y_out )
+  {
+    FT_Pos  ax = x_in;
+    FT_Pos  ay = y_in;
 
+    FT_Pos  d_in, d_out, d_corner;
+
+    if ( ax < 0 ) ax = -ax;
+    if ( ay < 0 ) ay = -ay;
+    d_in = ax + ay;
+
+    ax = x_out; if ( ax < 0 ) ax = -ax;
+    ay = y_out; if ( ay < 0 ) ay = -ay;
+    d_out = ax+ay;
+
+    ax = x_out + x_in; if ( ax < 0 ) ax = -ax;
+    ay = y_out + y_in; if ( ay < 0 ) ay = -ay;
+    d_corner = ax+ay;
+
+    return ( d_in + d_out - d_corner ) < (d_corner >> 4);
+  }
+
+
+  FT_LOCAL_DEF( FT_Int )
+  af_corner_orientation( FT_Pos  x_in,
+                         FT_Pos  y_in,
+                         FT_Pos  x_out,
+                         FT_Pos  y_out )
+  {
+    FT_Pos   delta;
+
+    delta = x_in*y_out - y_in*x_out;
+
+    if ( delta == 0 )
+      return 0;
+    else
+      return 1 - 2*(delta < 0);
+  }
+
+
+  /* we're not using af_angle_atan anymore, but we keep the source
+   * code below just in case :-)
+   */
+#if 0
+
+ /* the trick here is to realize that we don't need an very accurate
+  * angle approximation. We're going to use the result of af_angle_atan
+  * to only compare the sign of angle differences, or see if its magnitude
+  * is very small.
+  *
+  * the approximation (dy*PI/(|dx|+|dy|))) should be enough, and much
+  * faster to compute.
+  */
+  FT_LOCAL_DEF( AF_Angle )
+  af_angle_atan( FT_Fixed  dx,
+                 FT_Fixed  dy )
+  {
+    AF_Angle  angle;
+    FT_Fixed  ax = dx;
+    FT_Fixed  ay = dy;
+
+    if ( ax < 0 ) ax = -ax;
+    if ( ay < 0 ) ay = -ay;
+
+    ax += ay;
+
+    if ( ax == 0 )
+      angle = 0;
+    else
+    {
+      angle = (AF_ANGLE_PI2*dy)/(ax+ay);
+      if ( dx < 0 )
+      {
+        if ( angle >= 0 )
+          angle = AF_ANGLE_PI - angle;
+        else
+          angle = -AF_ANGLE_PI - angle;
+      }
+    }
+
+    return angle;
+  }
+
+#elif 0
+
   /* the following table has been automatically generated with */
   /* the `mather.py' Python script                             */
 
@@ -124,225 +211,9 @@
   }
 
 
-#else /* 0 */
-
-/*
- * a python script used to generate the following table
- *
-
-import sys, math
-
-units = 256
-scale = units/math.pi
-comma = ""
-
-print ""
-print "table of arctan( 1/2^n ) for PI = " + repr( units / 65536.0 ) + " units"
-
-r = [-1] + range( 32 )
-
-for n in r:
-    if n >= 0:
-        x = 1.0 / ( 2.0 ** n )   # tangent value
-    else:
-        x = 2.0 ** ( -n )
-
-    angle  = math.atan( x )      # arctangent
-    angle2 = angle * scale       # arctangent in FT_Angle units
-
-    # determine which integer value for angle gives the best tangent
-    lo  = int( angle2 )
-    hi  = lo + 1
-    tlo = math.tan( lo / scale )
-    thi = math.tan( hi / scale )
-
-    errlo = abs( tlo - x )
-    errhi = abs( thi - x )
-
-    angle2 = hi
-    if errlo < errhi:
-        angle2 = lo
-
-    if angle2 <= 0:
-        break
-
-    sys.stdout.write( comma + repr( int( angle2 ) ) )
-    comma = ", "
-
-*
-* end of python script
-*/
-
-
-  /* this table was generated for AF_ANGLE_PI = 256 */
-#define AF_ANGLE_MAX_ITERS  8
-#define AF_TRIG_MAX_ITERS   8
-
-  static const FT_Fixed
-  af_angle_arctan_table[9] =
-  {
-    90, 64, 38, 20, 10, 5, 3, 1, 1
-  };
-
-
-  static FT_Int
-  af_angle_prenorm( FT_Vector*  vec )
-  {
-    FT_Fixed  x, y, z;
-    FT_Int    shift;
-
-
-    x = vec->x;
-    y = vec->y;
-
-    z     = ( ( x >= 0 ) ? x : - x ) | ( (y >= 0) ? y : -y );
-    shift = 0;
-
-    if ( z < ( 1L << 27 ) )
-    {
-      do
-      {
-        shift++;
-        z <<= 1;
-      } while ( z < ( 1L << 27 ) );
-
-      vec->x = x << shift;
-      vec->y = y << shift;
-    }
-    else if ( z > ( 1L << 28 ) )
-    {
-      do
-      {
-        shift++;
-        z >>= 1;
-      } while ( z > ( 1L << 28 ) );
-
-      vec->x = x >> shift;
-      vec->y = y >> shift;
-      shift  = -shift;
-    }
-    return shift;
-  }
-
-
-  static void
-  af_angle_pseudo_polarize( FT_Vector*  vec )
-  {
-    FT_Fixed         theta;
-    FT_Fixed         yi, i;
-    FT_Fixed         x, y;
-    const FT_Fixed  *arctanptr;
-
-
-    x = vec->x;
-    y = vec->y;
-
-    /* Get the vector into the right half plane */
-    theta = 0;
-    if ( x < 0 )
-    {
-      x = -x;
-      y = -y;
-      theta = AF_ANGLE_PI;
-    }
-
-    if ( y > 0 )
-      theta = -theta;
-
-    arctanptr = af_angle_arctan_table;
-
-    if ( y < 0 )
-    {
-      /* Rotate positive */
-      yi     = y + ( x << 1 );
-      x      = x - ( y << 1 );
-      y      = yi;
-      theta -= *arctanptr++;  /* Subtract angle */
-    }
-    else
-    {
-      /* Rotate negative */
-      yi     = y - ( x << 1 );
-      x      = x + ( y << 1 );
-      y      = yi;
-      theta += *arctanptr++;  /* Add angle */
-    }
-
-    i = 0;
-    do
-    {
-      if ( y < 0 )
-      {
-        /* Rotate positive */
-        yi     = y + ( x >> i );
-        x      = x - ( y >> i );
-        y      = yi;
-        theta -= *arctanptr++;
-      }
-      else
-      {
-        /* Rotate negative */
-        yi     = y - ( x >> i );
-        x      = x + ( y >> i );
-        y      = yi;
-        theta += *arctanptr++;
-      }
-    } while ( ++i < AF_TRIG_MAX_ITERS );
-
-#if 0
-    /* round theta */
-    if ( theta >= 0 )
-      theta =  FT_PAD_ROUND( theta, 2 );
-    else
-      theta = -FT_PAD_ROUND( -theta, 2 );
 #endif
 
-    vec->x = x;
-    vec->y = theta;
-  }
 
-
-  /* cf. documentation in fttrigon.h */
-
-  FT_LOCAL_DEF( AF_Angle )
-  af_angle_atan( FT_Fixed  dx,
-                 FT_Fixed  dy )
-  {
-    FT_Vector  v;
-
-
-    if ( dx == 0 && dy == 0 )
-      return 0;
-
-    v.x = dx;
-    v.y = dy;
-    af_angle_prenorm( &v );
-    af_angle_pseudo_polarize( &v );
-
-    return v.y;
-  }
-
-
-  FT_LOCAL_DEF( AF_Angle )
-  af_angle_diff( AF_Angle  angle1,
-                 AF_Angle  angle2 )
-  {
-    AF_Angle  delta = angle2 - angle1;
-
-
-    delta %= AF_ANGLE_2PI;
-    if ( delta < 0 )
-      delta += AF_ANGLE_2PI;
-
-    if ( delta > AF_ANGLE_PI )
-      delta -= AF_ANGLE_2PI;
-
-    return delta;
-  }
-
-#endif /* 0 */
-
-
   FT_LOCAL_DEF( void )
   af_sort_pos( FT_UInt  count,
                FT_Pos*  table )
@@ -387,47 +258,6 @@
       }
     }
   }
-
-
-#ifdef TEST
-
-#include <stdio.h>
-#include <math.h>
-
-int main( void )
-{
-  int  angle;
-  int  dist;
-
-
-  for ( dist = 100; dist < 1000; dist++ )
-  {
-    for ( angle = AF_ANGLE_PI; angle < AF_ANGLE_2PI * 4; angle++ )
-    {
-      double  a = ( angle * 3.1415926535 ) / ( 1.0 * AF_ANGLE_PI );
-      int     dx, dy, angle1, angle2, delta;
-
-
-      dx = dist * cos( a );
-      dy = dist * sin( a );
-
-      angle1 = ( ( atan2( dy, dx ) * AF_ANGLE_PI ) / 3.1415926535 );
-      angle2 = af_angle_atan( dx, dy );
-      delta  = ( angle2 - angle1 ) % AF_ANGLE_2PI;
-      if ( delta < 0 )
-        delta = -delta;
-
-      if ( delta >= 2 )
-      {
-        printf( "dist:%4d angle:%4d => (%4d,%4d) angle1:%4d angle2:%4d\n",
-                dist, angle, dx, dy, angle1, angle2 );
-      }
-    }
-  }
-  return 0;
-}
-
-#endif /* TEST */
 
 
 /* END */
--- a/src/autofit/afcjk.c
+++ b/src/autofit/afcjk.c
@@ -1437,7 +1437,9 @@
 
   static const AF_Script_UniRangeRec  af_cjk_uniranges[] =
   {
-    { 0x0100,  0xFFFF },
+#if 0
+    { 0x0100,  0xFFFF },  /* why ?? */
+#endif
     { 0x2E80,  0x2EFF },  /* CJK Radicals Supplement */
     { 0x2F00,  0x2FDF },  /* Kangxi Radicals */
     { 0x3000,  0x303F },  /* CJK Symbols and Punctuation */
--- a/src/autofit/afhints.c
+++ b/src/autofit/afhints.c
@@ -53,7 +53,9 @@
     }
 
     segment = axis->segments + axis->num_segments++;
+#if 0
     FT_ZERO( segment );
+#endif
 
   Exit:
     *asegment = segment;
@@ -272,54 +274,46 @@
                         FT_Pos  dy )
   {
 #if 1
-    AF_Direction  dir = AF_DIR_NONE;
+    FT_Pos        ll, ss;  /* long and short arm lengths */
+    AF_Direction  dir;     /* candidate direction        */
 
-
-    /* atan(1/12) == 4.7 degrees */
-
-    if ( dx < 0 )
+    if ( dy >= dx )
     {
-      if ( dy < 0 )
+      if ( dy >= -dx )
       {
-        if ( -dx * 12 < -dy )
-          dir = AF_DIR_DOWN;
-
-        else if ( -dy * 12 < -dx )
-          dir = AF_DIR_LEFT;
+        dir = AF_DIR_UP;
+        ll  = dy;
+        ss  = dx;
       }
-      else /* dy >= 0 */
+      else
       {
-        if ( -dx * 12 < dy )
-          dir = AF_DIR_UP;
-
-        else if ( dy * 12 < -dx )
-          dir = AF_DIR_LEFT;
+        dir = AF_DIR_LEFT;
+        ll  = -dx;
+        ss  = dy;
       }
     }
-    else /* dx >= 0 */
+    else /* dy < dx */
     {
-      if ( dy < 0 )
+      if ( dy >= -dx )
       {
-        if ( dx * 12 < -dy )
-          dir = AF_DIR_DOWN;
-
-        else if ( -dy * 12 < dx )
-          dir = AF_DIR_RIGHT;
+        dir = AF_DIR_RIGHT;
+        ll  = dx;
+        ss  = dy;
       }
-      else  /* dy >= 0 */
+      else
       {
-        if ( dx * 12 < dy )
-          dir = AF_DIR_UP;
-
-        else if ( dy * 12 < dx )
-          dir = AF_DIR_RIGHT;
+        dir = AF_DIR_DOWN;
+        ll  = dy;
+        ss  = dx;
       }
     }
 
-    return dir;
+    ss *= 12;
+    if ( ll <= FT_ABS(ss) )
+      dir = AF_DIR_NONE;
 
+    return dir;
 #else /* 0 */
-
     AF_Direction  dir;
     FT_Pos        ax = FT_ABS( dx );
     FT_Pos        ay = FT_ABS( dy );
@@ -341,7 +335,6 @@
     }
 
     return dir;
-
 #endif /* 0 */
 
   }
@@ -348,6 +341,7 @@
 
 
   /* compute all inflex points in a given glyph */
+#if 1
   static void
   af_glyph_hints_compute_inflections( AF_GlyphHints  hints )
   {
@@ -364,6 +358,118 @@
       AF_Point  end   = point;
       AF_Point  before;
       AF_Point  after;
+      FT_Pos    in_x, in_y, out_x, out_y;
+      AF_Angle  orient_prev, orient_cur;
+      FT_Int    finished = 0;
+
+
+      /* compute first segment in contour */
+      first = point;
+
+      start = end = first;
+      do
+      {
+        end = end->next;
+        if ( end == first )
+          goto Skip;
+
+        in_x = end->fx - start->fx;
+        in_y = end->fy - start->fy;
+
+      } while ( in_x == 0 && in_y == 0 );
+
+      /* extend the segment start whenever possible */
+      before = start;
+      do
+      {
+        do
+        {
+          start  = before;
+          before = before->prev;
+          if ( before == first )
+            goto Skip;
+
+          out_x = start->fx - before->fx;
+          out_y = start->fy - before->fy;
+
+        } while ( out_x == 0 && out_y == 0 );
+
+        orient_prev = af_corner_orientation( in_x, in_y, out_x, out_y );
+
+      } while ( orient_prev == 0 );
+
+      first = start;
+
+      in_x = out_x;
+      in_y = out_y;
+
+      /* now, process all segments in the contour */
+      do
+      {
+        /* first, extend current segment's end whenever possible */
+        after = end;
+        do
+        {
+          do
+          {
+            end   = after;
+            after = after->next;
+            if ( after == first )
+              finished = 1;
+
+            out_x = after->fx - end->fx;
+            out_y = after->fy - end->fy;
+
+          } while ( out_x == 0 && out_y == 0 );
+
+          orient_cur = af_corner_orientation( in_x, in_y, out_x, out_y );
+
+        } while ( orient_cur == 0 );
+
+        if ( ( orient_prev + orient_cur ) == 0 )
+        {
+          /* we have an inflection point here */
+          do
+          {
+            start->flags |= AF_FLAG_INFLECTION;
+            start = start->next;
+
+          } while ( start != end );
+
+          start->flags |= AF_FLAG_INFLECTION;
+        }
+
+        start     = end;
+        end       = after;
+
+        orient_prev = orient_cur;
+        in_x        = out_x;
+        in_y        = out_y;
+
+      } while ( !finished );
+
+    Skip:
+      ;
+    }
+  }
+
+#else /* old code */
+  static void
+  af_glyph_hints_compute_inflections( AF_GlyphHints  hints )
+  {
+    AF_Point*  contour       = hints->contours;
+    AF_Point*  contour_limit = contour + hints->num_contours;
+
+
+    /* do each contour separately */
+    for ( ; contour < contour_limit; contour++ )
+    {
+      AF_Point  point = contour[0];
+      AF_Point  first = point;
+      AF_Point  start = point;
+      AF_Point  end   = point;
+      AF_Point  before;
+      AF_Point  after;
       AF_Angle  angle_in, angle_seg, angle_out;
       AF_Angle  diff_in, diff_out;
       FT_Int    finished = 0;
@@ -454,6 +560,7 @@
       ;
     }
   }
+#endif /* old code */
 
 
   FT_LOCAL_DEF( void )
@@ -702,6 +809,14 @@
           }
           else if ( point->out_dir == point->in_dir )
           {
+#if 1
+            if ( point->out_dir != AF_DIR_NONE )
+              goto Is_Weak_Point;
+
+            if ( af_corner_is_flat( in_x, in_y, out_x, out_y ) )
+              goto Is_Weak_Point;
+
+#else /* old code */
             AF_Angle  angle_in, angle_out, delta;
 
 
@@ -715,6 +830,7 @@
 
             if ( delta < 2 && delta > -2 )
               goto Is_Weak_Point;
+#endif /* old code */
           }
           else if ( point->in_dir == -point->out_dir )
             goto Is_Weak_Point;
@@ -1152,7 +1268,7 @@
     AF_Point  points       = hints->points;
     AF_Point  points_limit = points + hints->num_points;
     AF_Point  point;
-    
+
 
     if ( dim == AF_DIMENSION_HORZ )
     {
--- a/src/autofit/aflatin.c
+++ b/src/autofit/aflatin.c
@@ -590,6 +590,7 @@
     FT_Memory     memory        = hints->memory;
     FT_Error      error         = AF_Err_Ok;
     AF_Segment    segment       = NULL;
+    AF_SegmentRec seg0;
     AF_Point*     contour       = hints->contours;
     AF_Point*     contour_limit = contour + hints->num_contours;
     AF_Direction  major_dir, segment_dir;
@@ -601,6 +602,10 @@
     FT_Pos    max_coord = -32000;
 #endif
 
+    FT_ZERO( &seg0 );
+    seg0.score = 32000;
+    seg0.flags = AF_EDGE_NORMAL;
+
     major_dir   = (AF_Direction)FT_ABS( axis->major_dir );
     segment_dir = major_dir;
 
@@ -742,15 +747,12 @@
           if ( error )
             goto Exit;
 
+          segment[0]        = seg0;
           segment->dir      = (FT_Char)segment_dir;
-          segment->flags    = AF_EDGE_NORMAL;
           min_pos = max_pos = point->u;
           segment->first    = point;
           segment->last     = point;
           segment->contour  = contour;
-          segment->score    = 32000;
-          segment->len      = 0;
-          segment->link     = NULL;
           on_edge           = 1;
 
 #ifdef AF_HINT_METRICS
@@ -810,14 +812,11 @@
         if ( error )
           goto Exit;
 
+        segment[0]     = seg0;
         segment->dir   = segment_dir;
-        segment->flags = AF_EDGE_NORMAL;
         segment->first = min_point;
         segment->last  = min_point;
         segment->pos   = min_pos;
-        segment->score = 32000;
-        segment->len   = 0;
-        segment->link  = NULL;
 
         segment = NULL;
       }
@@ -830,14 +829,11 @@
         if ( error )
           goto Exit;
 
+        segment[0]     = seg0;
         segment->dir   = segment_dir;
-        segment->flags = AF_EDGE_NORMAL;
         segment->first = max_point;
         segment->last  = max_point;
         segment->pos   = max_pos;
-        segment->score = 32000;
-        segment->len   = 0;
-        segment->link  = NULL;
 
         segment = NULL;
       }
@@ -926,7 +922,6 @@
 
       if ( seg2 )
       {
-        seg2->num_linked++;
         if ( seg2->link != seg1 )
         {
           seg1->link  = 0;
--- a/src/autofit/aftypes.h
+++ b/src/autofit/aftypes.h
@@ -118,6 +118,7 @@
 #define AF_ANGLE_PI4  ( AF_ANGLE_PI / 4 )
 
 
+#if 0
   /*
    *  compute the angle of a given 2-D vector
    */
@@ -126,7 +127,6 @@
                  FT_Pos  dy );
 
 
-#if 0
   /*
    *  compute `angle2 - angle1'; the result is always within
    *  the range [-AF_ANGLE_PI .. AF_ANGLE_PI - 1]
@@ -136,6 +136,27 @@
                  AF_Angle  angle2 );
 #endif /* 0 */
 
+
+ /* return TRUE if a corner is flat, or nearly flat, this is equivalent
+  * to say that the angle difference between the 'in' and 'out' vectors is
+  * very small
+  */
+  FT_LOCAL( FT_Int )
+  af_corner_is_flat( FT_Pos   x_in,
+                     FT_Pos   y_in,
+                     FT_Pos   x_out,
+                     FT_Pos   y_out );
+
+ /* return a value that can be -1, 0 or +1 depending on the orientation
+  * of a given corner. We're using the Cartesian coordinate system,
+  * with positive Ys going upwards. The function returns +1 when
+  * the corner turns to the left, -1 to the right, and 0 for undecided
+  */
+  FT_LOCAL( FT_Int )
+  af_corner_orientation( FT_Pos  x_in,
+                         FT_Pos  y_in,
+                         FT_Pos  x_out,
+                         FT_Pos  y_out );
 
 #define AF_ANGLE_DIFF( result, angle1, angle2 ) \
   FT_BEGIN_STMNT                                \
--- a/src/base/ftcalc.c
+++ b/src/base/ftcalc.c
@@ -396,7 +396,29 @@
   FT_MulFix( FT_Long  a,
              FT_Long  b )
   {
-#if 1
+    /* let's use inline assembly to speed things a bit */
+#if defined(__GNUC__) && defined(i386)
+
+    FT_Long  result;
+
+    __asm__ __volatile__ (
+        "imul  %%edx\n"
+        "movl  %%edx, %%ecx\n"
+        "sarl  $31, %%ecx\n"
+        "addl  $0x8000, %%ecx\n"
+        "addl  %%ecx, %%eax\n"
+        "adcl  $0, %%edx\n"
+        "shrl  $16, %%eax\n"
+        "shll  $16, %%edx\n"
+        "addl  %%edx, %%eax\n"
+        "mov  %%eax, %0\n"
+       : "=r"(result)
+       : "a"(a), "d"(b)
+       : "%ecx"
+     );
+     return result;
+
+#elif 1
     FT_Long   sa, sb;
     FT_ULong  ua, ub;
 
--- a/src/base/ftoutln.c
+++ b/src/base/ftoutln.c
@@ -732,7 +732,7 @@
     return ( n % 2 );
   }
 
-  
+
   static FT_Bool
   ft_contour_enclosed( FT_Outline*  outline,
                        FT_UShort    c )
@@ -1012,11 +1012,25 @@
         ++next;
     }
 
+#if 1
+    {
+      FT_Pos  dx1 = prev->x - xmin_point->x;
+      FT_Pos  dy1 = prev->y - xmin_point->y;
+      FT_Pos  dx2 = next->x - xmin_point->x;
+      FT_Pos  dy2 = next->y - xmin_point->y;
+
+      if ( dy1*dx2 > dy2*dx1 )
+        return FT_ORIENTATION_POSTSCRIPT;
+      else
+        return FT_ORIENTATION_TRUETYPE;
+    }
+#else
     if ( FT_Atan2( prev->x - xmin_point->x, prev->y - xmin_point->y ) >
          FT_Atan2( next->x - xmin_point->x, next->y - xmin_point->y ) )
       return FT_ORIENTATION_POSTSCRIPT;
     else
       return FT_ORIENTATION_TRUETYPE;
+#endif
   }
 
 
--- a/src/gzip/ftgzip.c
+++ b/src/gzip/ftgzip.c
@@ -554,6 +554,27 @@
   }
 
 
+  static FT_ULong
+  ft_gzip_get_uncompressed_size( FT_Stream  stream )
+  {
+    FT_Error  error;
+    FT_ULong  old_pos;
+    FT_ULong  result = 0;
+
+    old_pos = stream->pos;
+    if ( !FT_Stream_Seek( stream, stream->size - 4 ) )
+    {
+      result = (FT_ULong)FT_Stream_ReadLong( stream, &error );
+      if ( error )
+        result = 0;
+
+      FT_Stream_Seek( stream, old_pos );
+    }
+
+    return result;
+  }
+
+
   FT_EXPORT_DEF( FT_Error )
   FT_Stream_OpenGzip( FT_Stream  stream,
                       FT_Stream  source )
@@ -584,6 +605,47 @@
       }
 
       stream->descriptor.pointer = zip;
+    }
+
+   /* ok, here's a trick to try to dramatically improve the performance
+    * of dealing with small files. If the original stream size is less
+    * than a certain threshold, we try to load the whole font file in
+    * memory. this saves us from the 32KB buffer needed to inflate the
+    * file anyway, plus the two 4KB intermediate input/output buffers
+    * used in the FT_GZipFile structure.
+    */
+    {
+      FT_ULong    zip_size = ft_gzip_get_uncompressed_size( source );
+
+      if ( zip_size != 0 && zip_size < 40*1024 )
+      {
+        FT_Byte*  zip_buff;
+
+        if ( !FT_ALLOC( zip_buff, zip_size ) )
+        {
+          FT_ULong  count;
+
+          count = ft_gzip_file_io( zip, 0, zip_buff, zip_size );
+          if ( count == zip_size )
+          {
+            ft_gzip_file_done( zip );
+            FT_FREE( zip );
+
+            stream->descriptor.pointer = NULL;
+
+            stream->size  = zip_size;
+            stream->pos   = 0;
+            stream->base  = zip_buff;
+            stream->read  = NULL;
+            stream->close = ft_gzip_stream_close;
+
+            goto Exit;
+          }
+          ft_gzip_file_io( zip, 0, NULL, 0 );
+          FT_FREE( zip_buff );
+        }
+        error = 0;
+      }
     }
 
     stream->size  = 0x7FFFFFFFL;  /* don't know the real size! */
--- a/src/psaux/psconv.c
+++ b/src/psaux/psconv.c
@@ -331,12 +331,49 @@
                           FT_UInt    n )
   {
     FT_Byte*  p;
-    FT_UInt   r = 0;
+    FT_UInt   r   = 0;
+    FT_UInt   w   = 0;
+    FT_UInt   pad = 0x01;
 
 
     n *= 2;
-    for ( p = *cursor; r < n && p < limit; p++ )
+#if 1
+    p  = *cursor;
+    if ( n > (FT_UInt)(limit-p) )
+      n = (FT_UInt)(limit - p);
+
+   /* we try to process two nibbles at a time to be as fast as possible
+    */
+    for ( ; r < n; r++ )
     {
+      FT_UInt  c = p[r];
+
+      if ( IS_PS_SPACE(c) )
+        continue;
+
+      if ( c OP 0x80 )
+        break;
+
+      c = ft_char_table[ c & 0x7F ];
+      if ( (unsigned)c >= 16 )
+        break;
+
+      pad = (pad << 4) | c;
+      if ( pad & 0x100 )
+      {
+        buffer[w++] = (FT_Byte)pad;
+        pad         = 0x01;
+      }
+    }
+
+    if ( pad != 0x01 )
+      buffer[w++] = (FT_Byte)(pad << 4);
+
+    *cursor = p+r;
+    return w;
+#else
+    for ( r = 0; r < n; r++ )
+    {
       FT_Char  c;
 
 
@@ -348,10 +385,10 @@
 
       c = ft_char_table[*p & 0x7f];
 
-      if ( c < 0 || c >= 16 )
+      if ( (unsigned)c >= 16 )
         break;
 
-      if ( r % 2 )
+      if ( r & 1 )
       {
         *buffer = (FT_Byte)(*buffer + c);
         buffer++;
@@ -365,6 +402,7 @@
     *cursor = p;
 
     return ( r + 1 ) / 2;
+#endif
   }
 
 
@@ -377,9 +415,25 @@
   {
     FT_Byte*   p;
     FT_UInt    r;
-    FT_UShort  s = *seed;
+    FT_UInt    s = *seed;
 
+#if 1
+    p = *cursor;
+    if ( n > (FT_UInt)(limit - p) )
+      n = (FT_UInt)(limit - p);
 
+    for ( r = 0; r < n; r++ )
+    {
+      FT_UInt  val = p[r];
+      FT_UInt  b   = ( val ^ (s >> 8) );
+
+      s         = ( (val + s)*52845U + 22719 ) & 0xFFFFU;
+      buffer[r] = (FT_Byte) b;
+    }
+
+    *cursor = p + n;
+    *seed   = (FT_UShort)s;
+#else
     for ( r = 0, p = *cursor; r < n && p < limit; r++, p++ )
     {
       FT_Byte  b = (FT_Byte)( *p ^ ( s >> 8 ) );
@@ -388,9 +442,9 @@
       s = (FT_UShort)( ( *p + s ) * 52845U + 22719 );
       *buffer++ = b;
     }
-
     *cursor = p;
     *seed   = s;
+#endif
 
     return r;
   }
--- a/src/truetype/ttgload.c
+++ b/src/truetype/ttgload.c
@@ -188,6 +188,9 @@
     if ( FT_STREAM_SEEK( offset ) || FT_FRAME_ENTER( byte_count ) )
       return error;
 
+    loader->cursor = stream->cursor;
+    loader->limit  = stream->limit;
+
     return TT_Err_Ok;
   }
 
@@ -205,19 +208,19 @@
   FT_CALLBACK_DEF( FT_Error )
   TT_Load_Glyph_Header( TT_Loader  loader )
   {
-    FT_Stream  stream   = loader->stream;
-    FT_Int     byte_len = loader->byte_len - 10;
+    FT_Byte*   p     = loader->cursor;
+    FT_Byte*   limit = loader->limit;
 
 
-    if ( byte_len < 0 )
+    if ( p + 10 > limit )
       return TT_Err_Invalid_Outline;
 
-    loader->n_contours = FT_GET_SHORT();
+    loader->n_contours = FT_NEXT_SHORT(p);
 
-    loader->bbox.xMin = FT_GET_SHORT();
-    loader->bbox.yMin = FT_GET_SHORT();
-    loader->bbox.xMax = FT_GET_SHORT();
-    loader->bbox.yMax = FT_GET_SHORT();
+    loader->bbox.xMin = FT_NEXT_SHORT(p);
+    loader->bbox.yMin = FT_NEXT_SHORT(p);
+    loader->bbox.xMax = FT_NEXT_SHORT(p);
+    loader->bbox.yMax = FT_NEXT_SHORT(p);
 
     FT_TRACE5(( "  # of contours: %d\n", loader->n_contours ));
     FT_TRACE5(( "  xMin: %4d  xMax: %4d\n", loader->bbox.xMin,
@@ -224,7 +227,7 @@
                                             loader->bbox.xMax ));
     FT_TRACE5(( "  yMin: %4d  yMax: %4d\n", loader->bbox.yMin,
                                             loader->bbox.yMax ));
-    loader->byte_len = byte_len;
+    loader->cursor = p;
 
     return TT_Err_Ok;
   }
@@ -234,7 +237,8 @@
   TT_Load_Simple_Glyph( TT_Loader  load )
   {
     FT_Error        error;
-    FT_Stream       stream     = load->stream;
+    FT_Byte*        p          = load->cursor;
+    FT_Byte*        limit      = load->limit;
     FT_GlyphLoader  gloader    = load->gloader;
     FT_Int          n_contours = load->n_contours;
     FT_Outline*     outline;
@@ -241,7 +245,6 @@
     TT_Face         face       = (TT_Face)load->face;
     FT_UShort       n_ins;
     FT_Int          n, n_points;
-    FT_Int          byte_len   = load->byte_len;
 
     FT_Byte         *flag, *flag_limit;
     FT_Byte         c, count;
@@ -260,12 +263,11 @@
     cont_limit = cont + n_contours;
 
     /* check space for contours array + instructions count */
-    byte_len -= 2 * ( n_contours + 1 );
-    if ( byte_len < 0 )
+    if ( n_contours >= 0xFFF || p + (n_contours+1)*2 > limit )
       goto Invalid_Outline;
 
     for ( ; cont < cont_limit; cont++ )
-      cont[0] = FT_GET_USHORT();
+      cont[0] = FT_NEXT_USHORT(p);
 
     n_points = 0;
     if ( n_contours > 0 )
@@ -287,8 +289,11 @@
     load->glyph->control_len  = 0;
     load->glyph->control_data = 0;
 
-    n_ins = FT_GET_USHORT();
+    if ( p+2 > limit )
+      goto Invalid_Outline;
 
+    n_ins = FT_NEXT_USHORT(p);
+
     FT_TRACE5(( "  Instructions size: %u\n", n_ins ));
 
     if ( n_ins > face->max_profile.maxSizeOfInstructions )
@@ -298,8 +303,7 @@
       goto Fail;
     }
 
-    byte_len -= (FT_Int)n_ins;
-    if ( byte_len < 0 )
+    if ( (limit - p) < n_ins )
     {
       FT_TRACE0(( "TT_Load_Simple_Glyph: Instruction count mismatch!\n" ));
       error = TT_Err_Too_Many_Hints;
@@ -313,12 +317,12 @@
       load->glyph->control_len  = n_ins;
       load->glyph->control_data = load->exec->glyphIns;
 
-      FT_MEM_COPY( load->exec->glyphIns, stream->cursor, (FT_Long)n_ins );
+      FT_MEM_COPY( load->exec->glyphIns, p, (FT_Long)n_ins );
     }
 
 #endif /* TT_CONFIG_OPTION_BYTECODE_INTERPRETER */
 
-    stream->cursor += (FT_Int)n_ins;
+    p += n_ins;
 
     /* reading the point tags */
     flag       = (FT_Byte*)outline->tags;
@@ -328,16 +332,16 @@
 
     while ( flag < flag_limit )
     {
-      if ( --byte_len < 0 )
+      if ( p+1 > limit )
         goto Invalid_Outline;
 
-      *flag++ = c = FT_GET_BYTE();
+      *flag++ = c = FT_NEXT_BYTE(p);
       if ( c & 8 )
       {
-        if ( --byte_len < 0 )
+        if ( p+1 > limit )
           goto Invalid_Outline;
 
-        count = FT_GET_BYTE();
+        count = FT_NEXT_BYTE(p);
         if ( flag + (FT_Int)count > flag_limit )
           goto Invalid_Outline;
 
@@ -346,23 +350,6 @@
       }
     }
 
-    /* check that there is enough room to load the coordinates */
-    for ( flag = (FT_Byte*)outline->tags; flag < flag_limit; flag++ )
-    {
-      if ( *flag & 2 )
-        byte_len -= 1;
-      else if ( ( *flag & 16 ) == 0 )
-        byte_len -= 2;
-
-      if ( *flag & 4 )
-        byte_len -= 1;
-      else if ( ( *flag & 32 ) == 0 )
-        byte_len -= 2;
-    }
-
-    if ( byte_len < 0 )
-      goto Invalid_Outline;
-
     /* reading the X coordinates */
 
     vec       = outline->points;
@@ -377,13 +364,21 @@
 
       if ( *flag & 2 )
       {
-        y = (FT_Pos)FT_GET_BYTE();
+        if ( p+1 > limit )
+          goto Invalid_Outline;
+
+        y = (FT_Pos)FT_NEXT_BYTE(p);
         if ( ( *flag & 16 ) == 0 )
           y = -y;
       }
       else if ( ( *flag & 16 ) == 0 )
-        y = (FT_Pos)FT_GET_SHORT();
+      {
+        if ( p+2 > limit )
+          goto Invalid_Outline;
 
+        y = (FT_Pos)FT_NEXT_SHORT(p);
+      }
+
       x     += y;
       vec->x = x;
     }
@@ -402,13 +397,21 @@
 
       if ( *flag & 4 )
       {
-        y = (FT_Pos)FT_GET_BYTE();
+        if ( p+1 > limit )
+          goto Invalid_Outline;
+
+        y = (FT_Pos)FT_NEXT_BYTE(p);
         if ( ( *flag & 32 ) == 0 )
           y = -y;
       }
       else if ( ( *flag & 32 ) == 0 )
-        y = (FT_Pos)FT_GET_SHORT();
+      {
+        if ( p+2 > limit )
+          goto Invalid_Outline;
 
+        y = (FT_Pos)FT_NEXT_SHORT(p);
+      }
+
       x     += y;
       vec->y = x;
     }
@@ -420,7 +423,7 @@
     outline->n_points   = (FT_UShort)n_points;
     outline->n_contours = (FT_Short) n_contours;
 
-    load->byte_len = byte_len;
+    load->cursor = p;
 
   Fail:
     return error;
@@ -435,11 +438,11 @@
   TT_Load_Composite_Glyph( TT_Loader  loader )
   {
     FT_Error        error;
-    FT_Stream       stream  = loader->stream;
+    FT_Byte*        p       = loader->cursor;
+    FT_Byte*        limit   = loader->limit;
     FT_GlyphLoader  gloader = loader->gloader;
     FT_SubGlyph     subglyph;
     FT_UInt         num_subglyphs;
-    FT_Int          byte_len = loader->byte_len;
 
 
     num_subglyphs = 0;
@@ -447,6 +450,7 @@
     do
     {
       FT_Fixed  xx, xy, yy, yx;
+      FT_UInt   count;
 
 
       /* check that we can load a new subglyph */
@@ -455,8 +459,7 @@
         goto Fail;
 
       /* check space */
-      byte_len -= 4;
-      if ( byte_len < 0 )
+      if ( p+4 > limit )
         goto Invalid_Composite;
 
       subglyph = gloader->current.subglyphs + num_subglyphs;
@@ -463,33 +466,33 @@
 
       subglyph->arg1 = subglyph->arg2 = 0;
 
-      subglyph->flags = FT_GET_USHORT();
-      subglyph->index = FT_GET_USHORT();
+      subglyph->flags = FT_NEXT_USHORT(p);
+      subglyph->index = FT_NEXT_USHORT(p);
 
       /* check space */
-      byte_len -= 2;
+      count = 2;
       if ( subglyph->flags & ARGS_ARE_WORDS )
-        byte_len -= 2;
+        count += 2;
       if ( subglyph->flags & WE_HAVE_A_SCALE )
-        byte_len -= 2;
+        count += 2;
       else if ( subglyph->flags & WE_HAVE_AN_XY_SCALE )
-        byte_len -= 4;
+        count += 4;
       else if ( subglyph->flags & WE_HAVE_A_2X2 )
-        byte_len -= 8;
+        count += 8;
 
-      if ( byte_len < 0 )
+      if ( p + count > limit )
         goto Invalid_Composite;
 
       /* read arguments */
       if ( subglyph->flags & ARGS_ARE_WORDS )
       {
-        subglyph->arg1 = FT_GET_SHORT();
-        subglyph->arg2 = FT_GET_SHORT();
+        subglyph->arg1 = FT_NEXT_SHORT(p);
+        subglyph->arg2 = FT_NEXT_SHORT(p);
       }
       else
       {
-        subglyph->arg1 = FT_GET_CHAR();
-        subglyph->arg2 = FT_GET_CHAR();
+        subglyph->arg1 = FT_NEXT_CHAR(p);
+        subglyph->arg2 = FT_NEXT_CHAR(p);
       }
 
       /* read transform */
@@ -498,20 +501,20 @@
 
       if ( subglyph->flags & WE_HAVE_A_SCALE )
       {
-        xx = (FT_Fixed)FT_GET_SHORT() << 2;
+        xx = (FT_Fixed)FT_NEXT_SHORT(p) << 2;
         yy = xx;
       }
       else if ( subglyph->flags & WE_HAVE_AN_XY_SCALE )
       {
-        xx = (FT_Fixed)FT_GET_SHORT() << 2;
-        yy = (FT_Fixed)FT_GET_SHORT() << 2;
+        xx = (FT_Fixed)FT_NEXT_SHORT(p) << 2;
+        yy = (FT_Fixed)FT_NEXT_SHORT(p) << 2;
       }
       else if ( subglyph->flags & WE_HAVE_A_2X2 )
       {
-        xx = (FT_Fixed)FT_GET_SHORT() << 2;
-        yx = (FT_Fixed)FT_GET_SHORT() << 2;
-        xy = (FT_Fixed)FT_GET_SHORT() << 2;
-        yy = (FT_Fixed)FT_GET_SHORT() << 2;
+        xx = (FT_Fixed)FT_NEXT_SHORT(p) << 2;
+        yx = (FT_Fixed)FT_NEXT_SHORT(p) << 2;
+        xy = (FT_Fixed)FT_NEXT_SHORT(p) << 2;
+        yy = (FT_Fixed)FT_NEXT_SHORT(p) << 2;
       }
 
       subglyph->transform.xx = xx;
@@ -533,12 +536,12 @@
       /* we will process them later...                          */
       /*                                                        */
       loader->ins_pos = (FT_ULong)( FT_STREAM_POS() +
-                                    stream->cursor - stream->limit );
+                                    p - limit );
     }
 
 #endif
 
-    loader->byte_len = byte_len;
+    loader->cursor = p;
 
   Fail:
     return error;
--- a/src/type1/t1gload.c
+++ b/src/type1/t1gload.c
@@ -310,11 +310,14 @@
 
 #if 1
         /* apply the font matrix, if any */
-        FT_Outline_Transform( &glyph->root.outline, &font_matrix );
+        if ( font_matrix.xx != 0x10000L || font_matrix.yy != font_matrix.xx ||
+             font_matrix.xy != 0        || font_matrix.yx != 0              )
+          FT_Outline_Transform( &glyph->root.outline, &font_matrix );
 
-        FT_Outline_Translate( &glyph->root.outline,
-                              font_offset.x,
-                              font_offset.y );
+        if ( font_offset.x || font_offset.y )
+          FT_Outline_Translate( &glyph->root.outline,
+                                font_offset.x,
+                                font_offset.y );
 
         advance.x = metrics->horiAdvance;
         advance.y = 0;