ref: 89929ec6b961456dba8fe3126815a0d49e2e9cb8
parent: f66d48e923c077807f301a46532390478b256eee
author: David Turner <[email protected]>
date: Tue Jul 16 09:36:07 EDT 2013
[smooth] Improve performance. Provide a work-around for an ARM-specific performance bug in GCC. This speeds up the rasterizer by more than 5%. Also slightly optimize `set_gray_cell' and `gray_record_cell' (which also improves performance on other platforms by a tiny bit (<1%). * src/smooth/ftgrays.c (FT_DIV_MOD): New macro. Use it where appropriate. (gray_record_cell, gray_set_cell, gray_move_to, gray_convert_glyph_inner): Streamline condition handling.
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,21 @@
2013-07-16 David Turner <[email protected]>
+ [smooth] Improve performance.
+
+ Provide a work-around for an ARM-specific performance bug in GCC.
+ This speeds up the rasterizer by more than 5%.
+
+ Also slightly optimize `set_gray_cell' and `gray_record_cell' (which
+ also improves performance on other platforms by a tiny bit (<1%).
+
+ * src/smooth/ftgrays.c (FT_DIV_MOD): New macro.
+ Use it where appropriate.
+
+ (gray_record_cell, gray_set_cell, gray_move_to,
+ gray_convert_glyph_inner): Streamline condition handling.
+
+2013-07-16 David Turner <[email protected]>
+
[truetype] Add assembler code for TT_MulFix14 and TT_DotFix14.
This patch provides slightly optimized versions for ARM, x86, and
--- a/src/smooth/ftgrays.c
+++ b/src/smooth/ftgrays.c
@@ -310,6 +310,40 @@
#endif
+ /* Compute `divident / divisor' and return both its quotient and */
+ /* remainder, cast to a specific type. This macro also ensures that */
+ /* the remainder is always positive. */
+#define FT_DIV_MOD( type, dividend, divisor, quotient, remainder ) \
+ FT_BEGIN_STMNT \
+ (quotient) = (type)( (dividend) / (divisor) ); \
+ (remainder) = (type)( (dividend) % (divisor) ); \
+ if ( (remainder) < 0 ) \
+ { \
+ (quotient)--; \
+ (remainder) += (type)(divisor); \
+ } \
+ FT_END_STMNT
+
+#ifdef __arm__
+ /* Work around a bug specific to GCC which make the compiler fail to */
+ /* optimize a division and modulo operation on the same parameters */
+ /* into a single call to `__aeabi_idivmod'. See */
+ /* */
+ /* http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43721 */
+#undef FT_DIV_MOD
+#define FT_DIV_MOD( type, dividend, divisor, quotient, remainder ) \
+ FT_BEGIN_STMNT \
+ (quotient) = (type)( (dividend) / (divisor) ); \
+ (remainder) = (type)( (dividend) - (quotient) * (divisor) ); \
+ if ( (remainder) < 0 ) \
+ { \
+ (quotient)--; \
+ (remainder) += (type)(divisor); \
+ } \
+ FT_END_STMNT
+#endif /* __arm__ */
+
+
/*************************************************************************/
/* */
/* TYPE DEFINITIONS */
@@ -548,7 +582,7 @@
static void
gray_record_cell( RAS_ARG )
{
- if ( !ras.invalid && ( ras.area | ras.cover ) )
+ if ( ras.area | ras.cover )
{
PCell cell = gray_find_cell( RAS_VAR );
@@ -597,12 +631,12 @@
ras.area = 0;
ras.cover = 0;
- }
+ ras.ex = ex;
+ ras.ey = ey;
- ras.ex = ex;
- ras.ey = ey;
- ras.invalid = ( (unsigned)ey >= (unsigned)ras.count_ey ||
- ex >= ras.count_ex );
+ ras.invalid = ( (unsigned)ey >= (unsigned)ras.count_ey ||
+ ex >= ras.count_ex );
+ }
}
@@ -686,13 +720,7 @@
dx = -dx;
}
- delta = (TCoord)( p / dx );
- mod = (TCoord)( p % dx );
- if ( mod < 0 )
- {
- delta--;
- mod += (TCoord)dx;
- }
+ FT_DIV_MOD( TCoord, p, dx, delta, mod );
ras.area += (TArea)(( fx1 + first ) * delta);
ras.cover += delta;
@@ -706,14 +734,8 @@
TCoord lift, rem;
- p = ONE_PIXEL * ( y2 - y1 + delta );
- lift = (TCoord)( p / dx );
- rem = (TCoord)( p % dx );
- if ( rem < 0 )
- {
- lift--;
- rem += (TCoord)dx;
- }
+ p = ONE_PIXEL * ( y2 - y1 + delta );
+ FT_DIV_MOD( TCoord, p, dx, lift, rem );
mod -= (int)dx;
@@ -763,9 +785,6 @@
dx = to_x - ras.x;
dy = to_y - ras.y;
- /* XXX: we should do something about the trivial case where dx == 0, */
- /* as it happens very often! */
-
/* perform vertical clipping */
{
TCoord min, max;
@@ -844,13 +863,7 @@
dy = -dy;
}
- delta = (int)( p / dy );
- mod = (int)( p % dy );
- if ( mod < 0 )
- {
- delta--;
- mod += (TCoord)dy;
- }
+ FT_DIV_MOD( int, p, dy, delta, mod );
x = ras.x + delta;
gray_render_scanline( RAS_VAR_ ey1, ras.x, fy1, x, (TCoord)first );
@@ -861,13 +874,7 @@
if ( ey1 != ey2 )
{
p = ONE_PIXEL * dx;
- lift = (int)( p / dy );
- rem = (int)( p % dy );
- if ( rem < 0 )
- {
- lift--;
- rem += (int)dy;
- }
+ FT_DIV_MOD( int, p, dy, lift, rem );
mod -= (int)dy;
while ( ey1 != ey2 )
@@ -1171,7 +1178,8 @@
/* record current cell, if any */
- gray_record_cell( RAS_VAR );
+ if ( !ras.invalid )
+ gray_record_cell( RAS_VAR );
/* start to a new position */
x = UPSCALE( to->x );
@@ -1781,7 +1789,8 @@
if ( ft_setjmp( ras.jump_buffer ) == 0 )
{
error = FT_Outline_Decompose( &ras.outline, &func_interface, &ras );
- gray_record_cell( RAS_VAR );
+ if ( !ras.invalid )
+ gray_record_cell( RAS_VAR );
}
else
error = FT_THROW( Memory_Overflow );