shithub: freetype+ttf2subf

Download patch

ref: 23ce5d4575c4802ea05d9eaee4da526f802ab3b8
parent: 2c2b3734e7295c31572cd272d23498f3d50f580c
author: David Turner <[email protected]>
date: Fri Feb 22 13:28:11 EST 2002

adding new & experimental charmap processor

git/fs: mount .git/fs: mount/attach disallowed
--- /dev/null
+++ b/src/sfnt/ttcmap0.c
@@ -1,0 +1,1248 @@
+/***************************************************************************/
+/*                                                                         */
+/*  ttcmap.c                                                               */
+/*                                                                         */
+/*    TrueType character mapping table (cmap) support (body).              */
+/*                                                                         */
+/*  Copyright 1996-2001 by                                                 */
+/*  David Turner, Robert Wilhelm, and Werner Lemberg.                      */
+/*                                                                         */
+/*  This file is part of the FreeType project, and may only be used,       */
+/*  modified, and distributed under the terms of the FreeType project      */
+/*  license, LICENSE.TXT.  By continuing to use, modify, or distribute     */
+/*  this file you indicate that you have read the license and              */
+/*  understand and accept it fully.                                        */
+/*                                                                         */
+/***************************************************************************/
+
+
+#include <ft2build.h>
+#include FT_INTERNAL_DEBUG_H
+#include "ttload.h"
+#include "ttcmap.h"
+
+#include "sferrors.h"
+
+  /*************************************************************************/
+  /*                                                                       */
+  /* The macro FT_COMPONENT is used in trace mode.  It is an implicit      */
+  /* parameter of the FT_TRACE() and FT_ERROR() macros, used to print/log  */
+  /* messages during execution.                                            */
+  /*                                                                       */
+#undef  FT_COMPONENT
+#define FT_COMPONENT  trace_ttcmap
+
+
+
+#define  TT_PEEK_Short   FT_PEEK_SHORT
+#define  TT_PEEK_UShort  FT_PEEK16_UBE
+#define  TT_PEEK_Long    FT_PEEK32_BE
+#define  TT_PEEK_ULong   FT_PEEK32_UBE
+
+#define  TT_NEXT_Short   FT_NEXT_SHORT_BE
+#define  TT_NEXT_UShort  FT_NEXT_USHORT_BE
+#define  TT_NEXT_Long    FT_NEXT_LONG_BE
+#define  TT_NEXT_ULong   FT_NEXT_ULONG_BE
+
+ /************************************************************************/
+ /************************************************************************/
+ /*****                                                              *****/
+ /*****                          FORMAT 0                            *****/
+ /*****                                                              *****/
+ /************************************************************************/
+ /************************************************************************/
+
+ /*************************************************************************
+  *
+  *  TABLE OVERVIEW:
+  *  ---------------
+  *
+  *    NAME        OFFSET         TYPE          DESCRIPTION
+  *
+  *    format      0              USHORT        must be 0
+  *    length      2              USHORT        table length in bytes
+  *    language    4              USHORT        Mac language code
+  *    glyph_ids   6              BYTE[256]     array of glyph indices
+  *                262
+  */
+
+#ifdef TT_CONFIG_CMAP_FORMAT_0
+
+  static void
+  tt_cmap0_validate( FT_Byte*      table,
+                     FT_Validator  valid )
+  {
+    FT_Byte*  p      = table + 2;  /* skip format */
+    FT_UInt   length = TT_NEXT_USHORT(p);
+
+    if ( table + length > valid->limit || length < 262 )
+      TOO_SHORT;
+
+    /* check glyph indices whenever necessary */
+    if ( valid->level >= FT_VALIDATE_TIGHT )
+    {
+      FT_UInt  n, index;
+
+      for ( n = 0; n < 256; n++ )
+      {
+        index = *p++;
+        if ( index >= valid->num_glyphs )
+          INVALID_DATA;
+      }
+    }
+  }
+
+
+  static FT_UInt
+  tt_cmap0_char_index( FT_Byte*   table,
+                       FT_ULong   char_code )
+  {
+    return  ( char_code < 256 ? table[6+char_code] : 0 );
+  }
+
+
+  static FT_ULong
+  tt_cmap0_char_next( FT_Byte*  table,
+                      FT_ULong  char_code,
+                      FT_UInt   *agindex )
+  {
+    FT_ULong  result = 0;
+    FT_UInt   gindex = 0;
+
+    table += 6;  /* go to glyph ids */
+    while ( ++char_code < 256 )
+    {
+      gindex = table[char_code];
+      if ( gindex != 0 )
+      {
+        result = char_code;
+        break;
+      }
+    }
+
+    if ( agindex )
+      *agindex = gindex;
+
+    return result;
+  }
+
+  static const TT_Cmap_ClassRec  tt_cmap0_class_rec =
+  {
+    (TT_CMap_ValidateFunc)  tt_cmap0_validate,
+    (TT_CMap_CharIndexFunc) tt_cmap0_char_index,
+    (TT_CMap_CharNextFunc)  tt_cmap0_char_next
+  };
+
+#endif /* TT_CONFIG_CMAP_FORMAT_0 */
+
+
+ /************************************************************************/
+ /************************************************************************/
+ /*****                                                              *****/
+ /*****                          FORMAT 2                            *****/
+ /*****                                                              *****/
+ /*****  This is used for certain CJK encodings that encode text     *****/
+ /*****  in a mixed 8/16 bits along the following lines:             *****/
+ /*****                                                              *****/
+ /*****  * certain byte values correspond to an 8-bit character code *****/
+ /*****    (typicall in the range 0..127 for ASCII compatibility)    *****/
+ /*****                                                              *****/
+ /*****  * certain byte values signal the first byte of a 2-byte     *****/
+ /*****    character code (but these values are also valid as the    *****/
+ /*****    second byte of a 2-byte character)                        *****/
+ /*****                                                              *****/
+ /*****  the following charmap lookup and iteration function all     *****/
+ /*****  assume that the value "charcode" correspond to following:   *****/
+ /*****                                                              *****/
+ /*****     - for one byte characters, "charcode" is simply the      *****/
+ /*****       character code                                         *****/
+ /*****                                                              *****/
+ /*****     - for two byte characters, "charcode" is the 2-byte      *****/
+ /*****       character code in big endian format. More exactly:     *****/
+ /*****                                                              *****/
+ /*****           (charcode >> 8)    is the first byte value         *****/
+ /*****           (charcode & 0xFF)  is the second byte value        *****/
+ /*****                                                              *****/
+ /*****   note that not all values of "charcode" are valid           *****/
+ /*****   according to these rules, and the function moderately      *****/
+ /*****   check the arguments..                                      *****/
+ /*****                                                              *****/
+ /************************************************************************/
+ /************************************************************************/
+
+ /*************************************************************************
+  *
+  *  TABLE OVERVIEW:
+  *  ---------------
+  *
+  *    NAME        OFFSET         TYPE            DESCRIPTION
+  *
+  *    format      0              USHORT          must be 2
+  *    length      2              USHORT          table length in bytes
+  *    language    4              USHORT          Mac language code
+  *    keys        6              USHORT[256]     sub-header keys
+  *    subs        518            SUBHEAD[NSUBS]  sub-headers array
+  *    glyph_ids   518+NSUB*8     USHORT[]        glyph id array
+  *    
+  *   the 'keys' table is used to map charcode high-bytes to sub-headers.
+  *   the value of 'NSUBS' is the number of sub-headers defined in the
+  *   table and is computed by finding the maximum of the 'keys' table.
+  *
+  *   note that for any N, keys[n] is a byte offset within the subs table,
+  *   i.e. it is the corresponding sub-header index multiplied by 8.
+  *
+  *   each sub-header has the following format:
+  *
+  *    NAME        OFFSET         TYPE            DESCRIPTION
+  *
+  *    first       0              USHORT          first valid low-byte
+  *    count       2              USHORT          number of valid low-bytes
+  *    delta       4              SHORT           see below
+  *    offset      6              USHORT          see below
+  *
+  *  a sub-header defines, for each high-byte, the range of valid low-bytes
+  *  within the charmap. note that the range defined by 'first' and 'count'
+  *  must be completely included in the interval [0..255] according to the
+  *  specification
+  *
+  *  if a character code is contained within a given sub-header, then mapping
+  *  it to a glyph index is done as follows:
+  *
+  *  * the value of 'offset' is read. this is a _byte_ distance from the
+  *    location of the 'offset' field itself into a slice of the 'glyph_ids'
+  *    table. Let's call it 'slice' (it's a USHORT[] too)
+  *
+  *  * the value 'slice[ char.lo - first ]' is read. If it is 0, there is
+  *    no glyph for the charcode. Otherwise, the value of 'delta' is added
+  *    to it (modulo 65536) to form a new glyph index
+  *
+  *  it is up to the validation routine to check that all offsets fall within
+  *  the glyph ids table (and not within the 'subs' table itself or outside
+  *  of the CMap).
+  */
+
+#ifdef TT_CONFIG_CMAP_FORMAT_2
+
+  static void
+  tt_cmap2_validate( FT_Byte*      table,
+                     FT_Validator  valid )
+  {
+    FT_Byte*  p      = table + 2;  /* skip format */
+    FT_UInt   length = PEEK_UShort(p);
+    FT_UInt   n, max_subs;
+    FT_Byte*  keys;        /* keys table */
+    FT_Byte*  subs;        /* sub-headers */
+    FT_Byte*  glyph_ids;   /* glyph id array */
+
+
+    if ( table + length > valid->limit || length < 6+512 )
+      TOO_SHORT;
+
+    keys = table + 6;
+
+    /* parse keys to compute sub-headers count */
+    p = keys;
+    for ( n = 0; n < 256; n++ )
+    {
+      FT_UInt  index = TT_NEXT_USHORT(p);
+
+      /* value must be multiple of 8 */
+      if ( valid->level >= FT_VALIDATE_PARANOID && ( index & 7 ) != 0 )
+        INVALID_DATA;
+
+      index >>= 3;
+
+      if ( index > max_subs )
+        max_subs = index;
+    }
+
+    subs      = p;
+    glyph_ids = subs + (max_subs + 1)*8;
+    if ( glyph_ids > valid->limit )
+      TOO_SHORT;
+
+    /* parse sub-headers */
+    for ( n = 0; n <= max_subs; n++ )
+    {
+      FT_UInt   first_code, code_count, offset;
+      FT_Int    delta;
+      FT_Byte*  ids;
+
+
+      first_code = TT_NEXT_USHORT(p);
+      code_count = TT_NEXT_USHORT(p);
+      delta      = TT_NEXT_SHORT(p);
+      offset     = TT_NEXT_USHORT(p);
+
+      /* check range within 0..255 */
+      if ( valid->level >= FT_VALIDATE_PARANOID )
+      {
+        if ( first_code >= 256 || first_code + code_count > 256 )
+          INVALID_DATA;
+      }
+
+      /* check offset */
+      if ( offset != 0 )
+      {
+        ids = p - 2 + offset;
+        if ( ids < glyph_ids || ids + code_count*2 > table + length )
+          INVALID_DATA;
+
+        /* check glyph ids */
+        if ( valid->level >= FT_VALIDATE_TIGHT )
+        {
+          FT_Byte*  limit = p + code_count*2;
+          FT_UInt   index;
+
+          for ( ; p < limit; )
+          {
+            index = TT_NEXT_USHORT(p);
+            if ( index != 0 )
+            {
+              index = (index + delta) & 0xFFFFU;
+              if ( index >= valid->num_glyphs )
+                INVALID_GLYPH_ID
+            }
+          }
+        }
+      }
+    }
+  }
+
+
+  /* return sub header corresponding to a given character code */
+  /* NULL on invalid charcode..                                */
+  static FT_Byte*
+  tt_cmap2_get_subheader( FT_Byte*  table,
+                          FT_ULong  char_code )
+  {
+    FT_Byte*  result = NULL;
+
+    if ( char_code < 0x10000 )
+    {
+      FT_UInt  char_lo = (FT_UInt)( char_code & 0xFF );
+      FT_UInt  char_hi = (FT_UInt)( char_code >> 8 );
+      FT_Byte* p       = table + 6;  /* keys table */
+      FT_Byte* subs    = p + 512;    /* subheaders table */
+      FT_Byte* sub;
+
+
+      if ( char_hi == 0 )
+      {
+        /* an 8-bit character code -- we use subHeader 0 in this case */
+        /* to test wheteher the character code is in the charmap      */
+        /*                                                            */
+        sub = subs;  /* jump to first sub-header */
+
+        /* check that the sub-header for this byte is 0, which */
+        /* indicates that it's really a valid one-byte value   */
+        /* Otherwise, return 0                                 */
+        /*                                                     */
+        p  += char_lo*2;
+        if ( PEEK_UShort(p) != 0 )
+          goto Exit;
+      }
+      else
+      {
+        /* a 16-bit character code */
+        p  += char_hi*2;                   /* jump to key entry */
+        sub = subs + PEEK_UShort(p);       /* jump to sub-header */
+
+        /* check that the hi byte isn't a valid one-byte value */
+        if ( sub == subs )
+          goto Exit;
+      }
+      result = sub;
+    }
+  Exit:
+    return result;
+  }
+
+
+  static FT_UInt
+  tt_cmap2_char_index( FT_Byte*   table,
+                       FT_ULong   char_code )
+  {
+    FT_UInt   result  = 0;
+    FT_Byte*  subheader;
+
+    subheader = tt_cmap2_get_subheader( table, char_code );
+    if ( subheader )
+    {
+      FT_Byte*  p     = subheader;
+      FT_UInt   index = (FT_UInt)(char_code & 0xFF);
+      FT_UInt   start, count;
+      FT_Int    delta;
+      FT_UInt   offset;
+
+      start  = TT_NEXT_USHORT(p);
+      count  = TT_NEXT_USHORT(p);
+      delta  = TT_NEXT_SHORT(p);
+      offset = PEEK_UShort(p);
+
+      index -= start;
+      if ( index < count && offset != 0 )
+      {
+        p    += offset + 2*index;
+        index = PEEK_UShort(p);
+
+        if ( index == 0 )
+          goto Exit;
+
+        result = (FT_UInt)( index + delta ) & 0xFFFFU;
+      }
+    }
+
+  Exit:
+    return result;
+  }
+
+
+ /* return first valid charcode in a format 2 sub-header */
+  static FT_ULong
+  tt_cmap2_subheader_first( FT_Byte*  subheader,
+                            FT_UInt   char_hi,
+                            FT_UInt  *agindex )
+  {
+    FT_ULong  result    = 0;
+    FT_UInt   n, gindex = 0;
+    FT_Byte*  p         = subheader;
+
+    FT_UInt   start  = TT_NEXT_USHORT(p);
+    FT_UInt   count  = TT_NEXT_USHORT(p);
+
+    if ( count > 0 )
+    {
+      FT_Int    delta  = TT_NEXT_SHORT(p);
+      FT_UInt   offset = TT_NEXT_USHORT(p);
+
+      if ( offset == 0 )
+      {
+        /* simple difference, compute directly */
+        result = char_hi*256 + start;
+        gindex = (FT_UInt)( start + delta ) & 0xFFFFU;
+      }
+      else
+      {
+        FT_UInt  i, index;
+
+        /* parse glyph id table for non-0 indices */
+        p += offset - 2;
+        for (; i < count; i++ )
+        {
+          index = TT_NEXT_USHORT(p);
+          if ( index != 0 )
+          {
+            result = char_hi*256 + start + i;
+            gindex = (FT_UInt)(index + delta) & 0xFFFFU;
+            break;
+          }
+        }
+      }
+    }
+
+    if ( agindex )
+      *agindex = gindex;
+
+    return result;
+  }
+
+
+  static FT_UInt
+  tt_cmap2_char_next( FT_Byte*  table,
+                      FT_ULong  char_code,
+                      FT_UInt   *agindex )
+  {
+    FT_UInt   result    = 0;
+    FT_UInt   n, gindex = 0;
+    FT_Byte*  subheader;
+    FT_Byte*  p;
+
+    ++char_code;
+    for (;;)
+    {
+      subheader = tt_cmap2_get_subheader( table, char_code );
+      if ( subheader )
+      {
+        FT_Byte*  p       = subheader;
+        FT_UInt   start   = TT_NEXT_USHORT(p);
+        FT_UInt   count   = TT_NEXT_USHORT(p);
+        FT_Int    delta   = TT_NEXT_SHORT(p);
+        FT_UInt   offset  = PEEK_UShort(p);
+        FT_UInt   char_lo = (FT_UInt)( char_code & 0xFF );
+        FT_UInt   pos, index;
+
+        if ( offset == 0 )
+          goto Next_SubHeader:
+
+        if ( char_lo < start )
+        {
+          char_lo = start;
+          pos     = 0;
+        }
+        else
+          pos = (FT_UInt)( char_lo - start );
+
+        p        += offset + pos*2;
+        char_code = (char_code & -256) + char_lo;
+
+        for ( ; pos < count; pos++, char_code++ )
+        {
+          index = TT_NEXT_USHORT(p);
+
+          if ( index != 0 )
+          {
+            gindex = ( index + delta ) & 0xFFFFU;
+            if ( gindex != 0 )
+            {
+              result = char_code;
+              goto Exit;
+            }
+          }
+        }
+      }
+
+      /* jump to next sub-header, i.e. higher byte value */
+    Next_SubHeader:
+      char_code = (char_code & -256) + 256;
+      if ( char_code >= 0x10000U )
+        break;
+    }
+
+  Exit:
+    if ( agindex )
+      *agindex = gindex;
+
+    return result;
+  }
+
+  static const TT_Cmap_ClassRec  tt_cmap2_class_rec =
+  {
+    (TT_CMap_ValidateFunc)  tt_cmap2_validate,
+    (TT_CMap_CharIndexFunc) tt_cmap2_char_index,
+    (TT_CMap_CharNextFunc)  tt_cmap2_char_next
+  };
+
+#endif /* TT_CONFIG_CMAP_FORMAT_2 */
+
+
+ /************************************************************************/
+ /************************************************************************/
+ /*****                                                              *****/
+ /*****                          FORMAT 4                            *****/
+ /*****                                                              *****/
+ /************************************************************************/
+ /************************************************************************/
+
+#ifdef TT_CONFIG_CMAP_FORMAT_4
+
+  static void
+  tt_cmap4_validate( FT_Byte*      table,
+                     FT_Validator  valid )
+  {
+    FT_Byte*  p      = table + 2;  /* skip format */
+    FT_UInt   length = TT_NEXT_USHORT(p);
+    FT_Byte   *ends, *starts, *offsets, *glyph_ids;
+    FT_UInt   n, num_segs;
+
+    if ( table + length > valid->limit || length < 16 )
+      TOO_SHORT;
+
+    p += 2;  /* skip language */
+
+    num_segs = TT_NEXT_USHORT(p);   /* read segCountX2 */
+
+    if ( valid->level >= FT_VALIDATE_PARANOID )
+    {
+      /* check that we have an even value here */
+      if ( num_segs & 1 )
+        INVALID_DATA;
+    }
+
+    num_segs /= 2;
+
+    /* check the search parameters - even though we never use them */
+    /*                                                             */
+    if ( valid->level >= FT_VALIDATE_PARANOID )
+    {
+      /* check the values of 'searchRange', 'entrySelector', 'rangeShift' */
+      FT_UInt  search_range   = TT_NEXT_USHORT(p);
+      FT_UInt  entry_selector = TT_NEXT_USHORT(p);
+      FT_UInt  range_shift    = TT_NEXT_USHORT(p);
+
+      if ( (search_range | range_shift) & 1 )  /* must be even values */
+        INVALID_DATA;
+
+      search_range /= 2;
+      range_shift  /= 2;
+
+      /* 'search range' is the greatest power of 2 that is <= num_segs */
+
+      if ( search_range > num_segs                ||
+           search_range*2 < num_segs              ||
+           search_range + range_shift != num_segs ||
+           search_range != (1 << entry_selector)  )
+        INVALID_DATA;
+    }
+    else
+      p += 6;
+
+    ends      = p;
+    starts    = ends + num_segs*2 + 2;
+    offsets   = starts + num_segs*4;
+    glyph_ids = offsets + num_segs*2;
+
+    if ( glyph_ids >= table + length )
+      TOO_SHORT;
+
+    /* check last segment, its end count must be FFFF */
+    if ( valid->level >= FT_VALIDATE_PARANOID )
+    {
+      p = ends + (num_segs-1)*2;
+      if ( PEEK_UShort(p) != 0xFFFFU )
+        INVALID_DATA;
+    }
+
+    /* check that segments are sorted in increasing order and do not overlap */
+    /* check also the offsets..                                              */
+    {
+      FT_UInt  start, end, last = 0,offset, n;
+
+      for ( n = 0; n < num_segs; n++ )
+      {
+        p = starts + n*2;  start  = PEEK_UShort(p);
+        p = ends   + n*2;  end    = PEEK_UShort(p);
+        p = offsets + n*2; offset = PEEK_UShort(p);
+
+        if ( end > start )
+          INVALID_DATA;
+
+        if ( n > 0 && start <= last )
+          INVALID_DATA;
+
+        if ( offset )
+        {
+          p += offset;  /* start of glyph id array */
+
+          /* check that we point within the glyph ids table only */
+          if ( p < glyph_ids || p + (end - start + 1) > table + length )
+            INVALID_DATA;
+
+          /* XXXX: check glyph ids !! */
+        }
+        last = end;
+      }
+    }
+  }
+
+
+
+  static FT_UInt
+  tt_cmap4_char_index( FT_Byte*  table,
+                       FT_ULong  char_code )
+  {
+    FT_UInt   result = 0;
+
+    if ( char_code < 0x10000U )
+    {
+      FT_Byte*  p;
+      FT_UInt   start, end, index, num_segs2;
+      FT_Int    delta, segment;
+      FT_UInt   code = (FT_UInt)char_code;
+
+      p         = table + 6;
+      num_segs2 = PEEK_UShort(p);
+
+      p = table + 14;               /* ends table   */
+      q = table + 16 + num_segs2;   /* starts table */
+
+      for ( n = 0; n < num_segs2; n += 2 )
+      {
+        FT_UInt  end   = TT_NEXT_USHORT(p);
+        FT_UInt  start = TT_NEXT_USHORT(q);
+
+        if ( code < start )
+          break;
+
+        if ( code <= end )
+        {
+          index = (FT_UInt)( char_code - start );
+
+          p  = q + num_segs2 - 2; delta  = PEEK_Short(p);
+          p += num_segs2;         offset = PEEK_UShort(p);
+
+          if ( offset != 0 )
+          {
+            p    += offset + 2*index;
+            index = PEEK_UShort(p);
+          }
+
+          if ( index != 0 )
+            result = (FT_UInt)( index + delta ) & 0xFFFFU;
+        }
+      }
+    }
+    return result;
+  }
+
+
+
+  static FT_ULong
+  tt_cmap4_char_next( FT_Byte*  table,
+                      FT_ULong  char_code,
+                      FT_UInt  *agindex )
+  {
+    FT_ULong  result = 0;
+    FT_UInt   gindex = 0;
+    FT_Byte*  p;
+    FT_UInt   code, num_segs2;
+
+    ++char_code;
+    if ( char_code >= 0x10000U )
+      goto Exit;
+
+    code      = (FT_UInt)char_code;
+    p         = table + 6;
+    num_segs2 = PEEK_UShort(p) & -2;  /* ensure even-ness */
+
+    for (;;)
+    {
+      FT_UInt   start, end, index, n;
+      FT_Int    delta;
+
+      p = table + 14;              /* ends table  */
+      q = table + 16 + num_segs2;  /* starts table */
+
+      for ( n = 0; n < num_segs2; n += 2 )
+      {
+        FT_UInt  end   = TT_NEXT_USHORT(p);
+        FT_UInt  start = TT_NEXT_USHORT(q);
+
+        if ( code < start )
+          code = start;
+
+        if ( code <= end )
+        {
+          p  = q + num_segs2 - 2; delta  = PEEK_Short(p);
+          p += num_segs2;         offset = PEEK_UShort(p);
+
+          if ( offset != 0 )
+          {
+            /* parse the glyph ids array for non-0 index */
+            p += offset + (code - start)*2;
+            while ( code <= end )
+            {
+              gindex = TT_NEXT_USHORT(p);
+              if ( gindex != 0 )
+              {
+                gindex = (FT_UInt)( gindex + delta ) & 0xFFFFU;
+                if ( gindex != 0 )
+                  break;
+              }
+              code++;
+            }
+          }
+          else
+            gindex = (FT_UInt)( code + delta ) & 0xFFFFU;
+
+          if ( gindex == 0 )
+            break;
+
+          result = code;
+          goto Exit;
+        }
+      }
+
+      /* loop to next trial charcode */
+      if ( code >= 0xFFFFU )
+        break;
+
+      code++;
+    }
+    return result;
+
+  Exit:
+    if ( agindex )
+      *agindex = gindex;
+
+    return result;
+  }
+
+  static const TT_Cmap_ClassRec  tt_cmap4_class_rec =
+  {
+    (TT_CMap_ValidateFunc)  tt_cmap4_validate,
+    (TT_CMap_CharIndexFunc) tt_cmap4_char_index,
+    (TT_CMap_CharNextFunc)  tt_cmap4_char_next
+  };
+
+#endif /* TT_CONFIG_CMAP_FORMAT_4 */
+
+ /************************************************************************/
+ /************************************************************************/
+ /*****                                                              *****/
+ /*****                          FORMAT 6                            *****/
+ /*****                                                              *****/
+ /************************************************************************/
+ /************************************************************************/
+
+#ifdef TT_CONFIG_CMAP_FORMAT_6
+
+  static void
+  tt_cmap6_validate( FT_Byte*      table,
+                     FT_Validator  valid )
+  {
+    FT_Byte*  p = table + 2;
+    FT_UInt   length, start, count;
+
+    if ( table + 10 > valid->limit )
+      INVALID_TOO_SHORT;
+
+    length = TT_NEXT_USHORT(p);
+    p     += 2;  /* skip language */
+    start  = TT_NEXT_USHORT(p);
+    count  = TT_NEXT_USHORT(p);
+
+    if ( table + length > valid->limit || length < 10 + count*2 )
+      INVALID_TOO_SHORT;
+
+    /* check glyph indices */
+    if ( valid->level >= FT_VALIDATE_TIGHT )
+    {
+      FT_UInt  gindex;
+
+      for ( ; count > 0; count-- )
+      {
+        gindex = TT_NEXT_USHORT(p);
+        if ( gindex >= valid->num_glyphs )
+          INVALID_GLYPH_ID;
+      }
+    }
+  }
+
+
+  static FT_UInt
+  tt_cmap6_char_index( FT_Byte*   table,
+                       FT_ULong   char_code )
+  {
+    FT_UInt   result = 0;
+    FT_Byte*  p      = table + 6;
+    FT_UInt   start  = TT_NEXT_USHORT(p);
+    FT_UInt   count  = TT_NEXT_USHORT(p);
+    FT_UInt   index  = (FT_UInt)( char_code - start );
+
+    if ( index < count )
+    {
+      p += 2*index;
+      result = PEEK_UShort(p);
+    }
+    return result;
+  }
+
+
+  static FT_ULong
+  tt_cmap6_char_next( FT_Byte*    table,
+                      FT_ULong    char_code,
+                      FT_UInt    *agindex )
+  {
+    FT_ULong  result = 0;
+    FT_UInt   gindex = 0;
+    FT_Byte*  p      = table + 6;
+    FT_UInt   start  = TT_NEXT_USHORT(p);
+    FT_UInt   count  = TT_NEXT_USHORT(p);
+    FT_UInt   code, index;
+
+    char_code++;
+    if ( char_code >= 0x10000U )
+      goto Exit;
+
+    if ( char_code < start )
+      char_code = start;
+
+    index = (FT_UInt)( char_code - start );
+    p    += 2*index;
+
+    for ( ; index < count; index++ )
+    {
+      gindex = TT_NEXT_USHORT(p);
+      if ( gindex != 0 )
+      {
+        result = char_code;
+        break;
+      }
+      char_code++;
+    }
+
+  Exit:
+    if ( agindex )
+      *agindex = gindex;
+
+    return result;
+  }
+
+  static const TT_Cmap_ClassRec  tt_cmap6_class_rec =
+  {
+    (TT_CMap_ValidateFunc)  tt_cmap6_validate,
+    (TT_CMap_CharIndexFunc) tt_cmap6_char_index,
+    (TT_CMap_CharNextFunc)  tt_cmap6_char_next
+  };
+
+#endif /* TT_CONFIG_CMAP_FORMAT_6 */
+
+
+ /************************************************************************/
+ /************************************************************************/
+ /*****                                                              *****/
+ /*****                          FORMAT 8                            *****/
+ /*****                                                              *****/
+ /*****  It's hard to completely understand what the OpenType        *****/
+ /*****  spec says about this format, but here are my conclusion     *****/
+ /*****                                                              *****/
+ /*****  the purpose of this format is to easily map UTF-16 text     *****/
+ /*****  to glyph indices. Basically, the 'char_code' must be in     *****/
+ /*****  one of the following formats:                               *****/
+ /*****                                                              *****/
+ /*****    - a 16-bit value that isn't part of the Unicode           *****/
+ /*****      Surrogates Area (i.e. U+D800-U+DFFF)                    *****/
+ /*****                                                              *****/
+ /*****    - a 32-bit value, made of two surrogate values, i.e.      *****/
+ /*****      if "char_code = (char_hi << 16) | char_lo", then        *****/
+ /*****      both 'char_hi' and 'char_lo' must be in the Surrogates  *****/
+ /*****      Area.                                                   *****/
+ /*****                                                              *****/
+ /*****  The 'is32' table embedded in the charmap indicates          *****/
+ /*****  wether a given 16-bit value is in the surrogates area       *****/
+ /*****  or not..                                                    *****/
+ /*****                                                              *****/
+ /*****  so, for any given "char_code", we can assert the following  *****/
+ /*****                                                              *****/
+ /*****   if 'char_hi == 0' then we must have 'is32[char_lo] == 0'   *****/
+ /*****                                                              *****/
+ /*****   if 'char_hi != 0' then we must have both                   *****/
+ /*****   'is32[char_hi] != 0' and 'is32[char_lo] != 0'              *****/
+ /*****                                                              *****/
+ /*****                                                              *****/
+ /************************************************************************/
+ /************************************************************************/
+
+#ifdef TT_CONFIG_CMAP_FORMAT_8
+
+  static void
+  tt_cmap8_validate( FT_Byte*      table,
+                     FT_Validator  valid )
+  {
+    FT_Byte*   p = table + 4;
+    FT_Byte*   is32;
+    FT_ULong   length;
+    FT_ULong   num_groups;
+
+    if ( table + 16 + 8192 > valid->limit )
+      INVALID_TOO_SHORT;
+
+    length = TT_NEXT_ULONG(p);
+    if ( table + length > valid->limit || length < 16 + 8192 )
+      INVALID_TOO_SHORT;
+
+    is32       = p + 4;        /* skip language     */
+    p          = is32 + 8192;  /* skip 'is32' array */
+    num_groups = TT_NEXT_ULONG(p);
+
+    if ( p + num_groups*12 > valid->limit )
+      INVALID_TOO_SHORT;
+
+    /* check groups, they must be in increasing order */
+    {
+      FT_ULong  n, start, end, start_id, count, last = 0;
+
+      for ( n = 0; n < num_groups; n++ )
+      {
+        FT_Bytes* q;
+        FT_UInt   hi, lo;
+
+        start    = TT_NEXT_ULONG(p);
+        end      = TT_NEXT_ULONG(p);
+        start_id = TT_NEXT_ULONG(p);
+
+        if ( start > end )
+          INVALID_DATA;
+
+        if ( n > 0 && start <= last )
+          INVALID_DATA;
+
+        if ( valid->level >= FT_VALIDATE_TIGHT )
+        {
+          if ( start_id + end - start >= valid->num_glyphs )
+            INVALID_GLYPH_ID;
+
+          count = (FT_ULong)(end - start + 1);
+
+          if ( start & ~0xFFFFU )
+          {
+            /* start_hi != 0, check that is32[i] is 1 for each i in */
+            /* the 'hi' and 'lo' of the range [start..end]          */
+            for ( ; count > 0; count--, start++ )
+            {
+              hi = (FT_UInt)(start >> 16);
+              lo = (FT_UInt)(start & 0xFFFFU);
+  
+              if ( is32[ hi >> 3 ] & (0x80 >> (hi & 7)) == 0 )
+                INVALID_DATA;
+  
+              if ( is32[ lo >> 3 ] & (0x80 >> (lo & 7)) == 0 )
+                INVALID_DATA;
+            }
+          }
+          else
+          {
+            /* start_hi == 0, check that is32[i] is 0 for each i in */
+            /* the range [start..end]                               */
+  
+            /* end_hi cannot be != 0 !! */
+            if ( end & ~0xFFFFU )
+              INVALID_DATA;
+  
+            for ( ; count > 0; count--, start++ )
+            {
+              lo = (FT_UInt)(start & 0xFFFFU);
+  
+              if ( is32[ lo >> 3 ] & (0x80 >> (lo & 7)) != 0 )
+                INVALID_DATA;
+            }
+          }
+        }
+      }
+    }
+  }
+
+
+  static FT_UInt
+  tt_cmap8_char_index( FT_Byte*   table,
+                       FT_ULong   char_code )
+  {
+    FT_UInt   result     = 0;
+    FT_Byte*  p          = table + 12 + 8192;
+    FT_ULong  num_groups = TT_NEXT_ULONG(p);
+    FT_ULong  n, start, end, start_id;
+    
+    for ( ; num_groups > 0; num_groups-- )
+    {
+      start    = TT_NEXT_ULONG(p);
+      end      = TT_NEXT_ULONG(p);
+      start_id = TT_NEXT_ULONG(p);
+      
+      if ( char_code < start )
+        break;
+        
+      if ( char_code <= end )
+      {
+        result = start_id + char_code - start;
+        break;
+      }
+    }
+    return result;
+  }
+
+
+  static FT_ULong
+  tt_cmap8_char_next( FT_Byte*   table,
+                      FT_ULong   char_code,
+                      FT_UInt   *agindex )
+  {
+    FT_ULong   result     = 0;
+    FT_UInt    gindex     = 0;
+    FT_Byte*   p          = table + 12 + 8192;
+    FT_ULong   num_groups = TT_NEXT_USHORT(p);
+    FT_ULong   n, start, end, start_id;
+    
+    ++char_code;
+    p = table + 16 + 8192;
+    
+    for ( n = 0; n < num_groups++; n++ )
+    {
+      start    = TT_NEXT_ULONG(p);
+      end      = TT_NEXT_ULONG(p);
+      start_id = TT_NEXT_ULONG(p);
+      
+      if ( char_code < start )
+        char_code = start;
+      
+      if ( char_code <= end )
+      {
+        gindex = (FT_UInt)(char_code - start + start_id);
+        if ( gindex != 0 )
+        {
+          result = char_code;
+          goto Exit;
+        }
+      }
+    }
+    
+  Exit:
+    if ( agindex )
+      *agindex = gindex;
+    
+    return result;
+  }                      
+
+
+  static const TT_Cmap_ClassRec  tt_cmap8_class_rec =
+  {
+    (TT_CMap_ValidateFunc)  tt_cmap8_validate,
+    (TT_CMap_CharIndexFunc) tt_cmap8_char_index,
+    (TT_CMap_CharNextFunc)  tt_cmap8_char_next
+  };
+
+#endif /* TT_CONFIG_CMAP_FORMAT_8 */
+
+ /************************************************************************/
+ /************************************************************************/
+ /*****                                                              *****/
+ /*****                          FORMAT 10                           *****/
+ /*****                                                              *****/
+ /************************************************************************/
+ /************************************************************************/
+
+#ifdef TT_CONFIG_CMAP_FORMAT_10
+
+  static void
+  tt_cmap10_validate( FT_Byte*      table,
+                      FT_Validator  valid )
+  {
+    FT_Byte*  p = table + 2;
+    FT_ULong  length, start, count;
+
+    if ( table + 20 > valid->limit )
+      INVALID_TOO_SHORT;
+
+    length = TT_NEXT_USHORT(p);
+    p     += 4;  /* skip language */
+    start  = TT_NEXT_ULONG(p);
+    count  = TT_NEXT_ULONG(p);
+
+    if ( table + length > valid->limit || length < 20 + count*2 )
+      INVALID_TOO_SHORT;
+
+    /* check glyph indices */
+    if ( valid->level >= FT_VALIDATE_TIGHT )
+    {
+      FT_UInt  gindex;
+
+      for ( ; count > 0; count-- )
+      {
+        gindex = TT_NEXT_USHORT(p);
+        if ( gindex >= valid->num_glyphs )
+          INVALID_GLYPH_ID;
+      }
+    }
+  }
+
+
+  static FT_UInt
+  tt_cmap10_char_index( FT_Byte*   table,
+                        FT_ULong   char_code )
+  {
+    FT_UInt   result = 0;
+    FT_Byte*  p      = table + 12;
+    FT_ULong  start  = TT_NEXT_ULONG(p);
+    FT_ULong  count  = TT_NEXT_ULONG(p);
+    FT_ULong  index  = (FT_ULong)( char_code - start );
+
+    if ( index < count )
+    {
+      p += 2*index;
+      result = PEEK_UShort(p);
+    }
+    return result;
+  }
+
+
+  static FT_ULong
+  tt_cmap10_char_next( FT_Byte*    table,
+                       FT_ULong    char_code,
+                       FT_UInt    *agindex )
+  {
+    FT_ULong  result = 0;
+    FT_UInt   gindex = 0;
+    FT_Byte*  p      = table + 12;
+    FT_ULong  start  = TT_NEXT_ULONG(p);
+    FT_ULong  count  = TT_NEXT_ULONG(p);
+    FT_ULong  index;
+
+    char_code++;
+    if ( char_code >= 0x10000U )
+      goto Exit;
+
+    if ( char_code < start )
+      char_code = start;
+
+    index = (FT_ULong)( char_code - start );
+    p    += 2*index;
+
+    for ( ; index < count; index++ )
+    {
+      gindex = TT_NEXT_USHORT(p);
+      if ( gindex != 0 )
+      {
+        result = char_code;
+        break;
+      }
+      char_code++;
+    }
+
+  Exit:
+    if ( agindex )
+      *agindex = gindex;
+
+    return result;
+  }
+
+  static const TT_Cmap_ClassRec  tt_cmap10_class_rec =
+  {
+    (TT_CMap_ValidateFunc)  tt_cmap10_validate,
+    (TT_CMap_CharIndexFunc) tt_cmap10_char_index,
+    (TT_CMap_CharNextFunc)  tt_cmap10_char_next
+  };
+
+#endif /* TT_CONFIG_CMAP_FORMAT_10 */
+
+
+ /************************************************************************/
+ /************************************************************************/
+ /*****                                                              *****/
+ /*****                          FORMAT 12                           *****/
+ /*****                                                              *****/
+ /************************************************************************/
+ /************************************************************************/
+
+#ifdef TT_CONFIG_CMAP_FORMAT_12
+
+  static void
+  tt_cmap12_validate( FT_Byte*      table,
+                      FT_Validator  valid )
+  {
+  }                      
+
+
+  static FT_UInt
+  tt_cmap12_char_index( FT_Byte*   table,
+                        FT_ULong   char_code )
+  {
+  }                        
+
+
+  static FT_ULong
+  tt_cmap12_char_next( FT_Byte*   table,
+                       FT_ULong   char_code,
+                       FT_UInt   *agindex )
+  {
+  }                       
+
+
+  static const TT_Cmap_ClassRec  tt_cmap12_class_rec =
+  {
+    (TT_CMap_ValidateFunc)  tt_cmap12_validate,
+    (TT_CMap_CharIndexFunc) tt_cmap12_char_index,
+    (TT_CMap_CharNextFunc)  tt_cmap12_char_next
+  };
+
+#endif /* TT_CONFIG_CMAP_FORMAT_12 */
+
--- /dev/null
+++ b/src/sfnt/ttcmap0.h
@@ -1,0 +1,45 @@
+/***************************************************************************/
+/*                                                                         */
+/*  ttcmap.h                                                               */
+/*                                                                         */
+/*    TrueType character mapping table (cmap) support (specification).     */
+/*                                                                         */
+/*  Copyright 1996-2001 by                                                 */
+/*  David Turner, Robert Wilhelm, and Werner Lemberg.                      */
+/*                                                                         */
+/*  This file is part of the FreeType project, and may only be used,       */
+/*  modified, and distributed under the terms of the FreeType project      */
+/*  license, LICENSE.TXT.  By continuing to use, modify, or distribute     */
+/*  this file you indicate that you have read the license and              */
+/*  understand and accept it fully.                                        */
+/*                                                                         */
+/***************************************************************************/
+
+
+#ifndef __TTCMAP_H__
+#define __TTCMAP_H__
+
+
+#include <ft2build.h>
+#include FT_INTERNAL_TRUETYPE_TYPES_H
+#include FT_INTERNAL_OBJECTS_H
+
+
+FT_BEGIN_HEADER
+
+  FT_LOCAL FT_Error
+  TT_CharMap_Load( TT_Face        face,
+                   TT_CMapTable*  cmap,
+                   FT_Stream      input );
+
+  FT_LOCAL FT_Error
+  TT_CharMap_Free( TT_Face        face,
+                   TT_CMapTable*  cmap );
+
+
+FT_END_HEADER
+
+#endif /* __TTCMAP_H__ */
+
+
+/* END */