ref: 23ce5d4575c4802ea05d9eaee4da526f802ab3b8
parent: 2c2b3734e7295c31572cd272d23498f3d50f580c
author: David Turner <[email protected]>
date: Fri Feb 22 13:28:11 EST 2002
adding new & experimental charmap processor
--- /dev/null
+++ b/src/sfnt/ttcmap0.c
@@ -1,0 +1,1248 @@
+/***************************************************************************/
+/* */
+/* ttcmap.c */
+/* */
+/* TrueType character mapping table (cmap) support (body). */
+/* */
+/* Copyright 1996-2001 by */
+/* David Turner, Robert Wilhelm, and Werner Lemberg. */
+/* */
+/* This file is part of the FreeType project, and may only be used, */
+/* modified, and distributed under the terms of the FreeType project */
+/* license, LICENSE.TXT. By continuing to use, modify, or distribute */
+/* this file you indicate that you have read the license and */
+/* understand and accept it fully. */
+/* */
+/***************************************************************************/
+
+
+#include <ft2build.h>
+#include FT_INTERNAL_DEBUG_H
+#include "ttload.h"
+#include "ttcmap.h"
+
+#include "sferrors.h"
+
+ /*************************************************************************/
+ /* */
+ /* The macro FT_COMPONENT is used in trace mode. It is an implicit */
+ /* parameter of the FT_TRACE() and FT_ERROR() macros, used to print/log */
+ /* messages during execution. */
+ /* */
+#undef FT_COMPONENT
+#define FT_COMPONENT trace_ttcmap
+
+
+
+#define TT_PEEK_Short FT_PEEK_SHORT
+#define TT_PEEK_UShort FT_PEEK16_UBE
+#define TT_PEEK_Long FT_PEEK32_BE
+#define TT_PEEK_ULong FT_PEEK32_UBE
+
+#define TT_NEXT_Short FT_NEXT_SHORT_BE
+#define TT_NEXT_UShort FT_NEXT_USHORT_BE
+#define TT_NEXT_Long FT_NEXT_LONG_BE
+#define TT_NEXT_ULong FT_NEXT_ULONG_BE
+
+ /************************************************************************/
+ /************************************************************************/
+ /***** *****/
+ /***** FORMAT 0 *****/
+ /***** *****/
+ /************************************************************************/
+ /************************************************************************/
+
+ /*************************************************************************
+ *
+ * TABLE OVERVIEW:
+ * ---------------
+ *
+ * NAME OFFSET TYPE DESCRIPTION
+ *
+ * format 0 USHORT must be 0
+ * length 2 USHORT table length in bytes
+ * language 4 USHORT Mac language code
+ * glyph_ids 6 BYTE[256] array of glyph indices
+ * 262
+ */
+
+#ifdef TT_CONFIG_CMAP_FORMAT_0
+
+ static void
+ tt_cmap0_validate( FT_Byte* table,
+ FT_Validator valid )
+ {
+ FT_Byte* p = table + 2; /* skip format */
+ FT_UInt length = TT_NEXT_USHORT(p);
+
+ if ( table + length > valid->limit || length < 262 )
+ TOO_SHORT;
+
+ /* check glyph indices whenever necessary */
+ if ( valid->level >= FT_VALIDATE_TIGHT )
+ {
+ FT_UInt n, index;
+
+ for ( n = 0; n < 256; n++ )
+ {
+ index = *p++;
+ if ( index >= valid->num_glyphs )
+ INVALID_DATA;
+ }
+ }
+ }
+
+
+ static FT_UInt
+ tt_cmap0_char_index( FT_Byte* table,
+ FT_ULong char_code )
+ {
+ return ( char_code < 256 ? table[6+char_code] : 0 );
+ }
+
+
+ static FT_ULong
+ tt_cmap0_char_next( FT_Byte* table,
+ FT_ULong char_code,
+ FT_UInt *agindex )
+ {
+ FT_ULong result = 0;
+ FT_UInt gindex = 0;
+
+ table += 6; /* go to glyph ids */
+ while ( ++char_code < 256 )
+ {
+ gindex = table[char_code];
+ if ( gindex != 0 )
+ {
+ result = char_code;
+ break;
+ }
+ }
+
+ if ( agindex )
+ *agindex = gindex;
+
+ return result;
+ }
+
+ static const TT_Cmap_ClassRec tt_cmap0_class_rec =
+ {
+ (TT_CMap_ValidateFunc) tt_cmap0_validate,
+ (TT_CMap_CharIndexFunc) tt_cmap0_char_index,
+ (TT_CMap_CharNextFunc) tt_cmap0_char_next
+ };
+
+#endif /* TT_CONFIG_CMAP_FORMAT_0 */
+
+
+ /************************************************************************/
+ /************************************************************************/
+ /***** *****/
+ /***** FORMAT 2 *****/
+ /***** *****/
+ /***** This is used for certain CJK encodings that encode text *****/
+ /***** in a mixed 8/16 bits along the following lines: *****/
+ /***** *****/
+ /***** * certain byte values correspond to an 8-bit character code *****/
+ /***** (typicall in the range 0..127 for ASCII compatibility) *****/
+ /***** *****/
+ /***** * certain byte values signal the first byte of a 2-byte *****/
+ /***** character code (but these values are also valid as the *****/
+ /***** second byte of a 2-byte character) *****/
+ /***** *****/
+ /***** the following charmap lookup and iteration function all *****/
+ /***** assume that the value "charcode" correspond to following: *****/
+ /***** *****/
+ /***** - for one byte characters, "charcode" is simply the *****/
+ /***** character code *****/
+ /***** *****/
+ /***** - for two byte characters, "charcode" is the 2-byte *****/
+ /***** character code in big endian format. More exactly: *****/
+ /***** *****/
+ /***** (charcode >> 8) is the first byte value *****/
+ /***** (charcode & 0xFF) is the second byte value *****/
+ /***** *****/
+ /***** note that not all values of "charcode" are valid *****/
+ /***** according to these rules, and the function moderately *****/
+ /***** check the arguments.. *****/
+ /***** *****/
+ /************************************************************************/
+ /************************************************************************/
+
+ /*************************************************************************
+ *
+ * TABLE OVERVIEW:
+ * ---------------
+ *
+ * NAME OFFSET TYPE DESCRIPTION
+ *
+ * format 0 USHORT must be 2
+ * length 2 USHORT table length in bytes
+ * language 4 USHORT Mac language code
+ * keys 6 USHORT[256] sub-header keys
+ * subs 518 SUBHEAD[NSUBS] sub-headers array
+ * glyph_ids 518+NSUB*8 USHORT[] glyph id array
+ *
+ * the 'keys' table is used to map charcode high-bytes to sub-headers.
+ * the value of 'NSUBS' is the number of sub-headers defined in the
+ * table and is computed by finding the maximum of the 'keys' table.
+ *
+ * note that for any N, keys[n] is a byte offset within the subs table,
+ * i.e. it is the corresponding sub-header index multiplied by 8.
+ *
+ * each sub-header has the following format:
+ *
+ * NAME OFFSET TYPE DESCRIPTION
+ *
+ * first 0 USHORT first valid low-byte
+ * count 2 USHORT number of valid low-bytes
+ * delta 4 SHORT see below
+ * offset 6 USHORT see below
+ *
+ * a sub-header defines, for each high-byte, the range of valid low-bytes
+ * within the charmap. note that the range defined by 'first' and 'count'
+ * must be completely included in the interval [0..255] according to the
+ * specification
+ *
+ * if a character code is contained within a given sub-header, then mapping
+ * it to a glyph index is done as follows:
+ *
+ * * the value of 'offset' is read. this is a _byte_ distance from the
+ * location of the 'offset' field itself into a slice of the 'glyph_ids'
+ * table. Let's call it 'slice' (it's a USHORT[] too)
+ *
+ * * the value 'slice[ char.lo - first ]' is read. If it is 0, there is
+ * no glyph for the charcode. Otherwise, the value of 'delta' is added
+ * to it (modulo 65536) to form a new glyph index
+ *
+ * it is up to the validation routine to check that all offsets fall within
+ * the glyph ids table (and not within the 'subs' table itself or outside
+ * of the CMap).
+ */
+
+#ifdef TT_CONFIG_CMAP_FORMAT_2
+
+ static void
+ tt_cmap2_validate( FT_Byte* table,
+ FT_Validator valid )
+ {
+ FT_Byte* p = table + 2; /* skip format */
+ FT_UInt length = PEEK_UShort(p);
+ FT_UInt n, max_subs;
+ FT_Byte* keys; /* keys table */
+ FT_Byte* subs; /* sub-headers */
+ FT_Byte* glyph_ids; /* glyph id array */
+
+
+ if ( table + length > valid->limit || length < 6+512 )
+ TOO_SHORT;
+
+ keys = table + 6;
+
+ /* parse keys to compute sub-headers count */
+ p = keys;
+ for ( n = 0; n < 256; n++ )
+ {
+ FT_UInt index = TT_NEXT_USHORT(p);
+
+ /* value must be multiple of 8 */
+ if ( valid->level >= FT_VALIDATE_PARANOID && ( index & 7 ) != 0 )
+ INVALID_DATA;
+
+ index >>= 3;
+
+ if ( index > max_subs )
+ max_subs = index;
+ }
+
+ subs = p;
+ glyph_ids = subs + (max_subs + 1)*8;
+ if ( glyph_ids > valid->limit )
+ TOO_SHORT;
+
+ /* parse sub-headers */
+ for ( n = 0; n <= max_subs; n++ )
+ {
+ FT_UInt first_code, code_count, offset;
+ FT_Int delta;
+ FT_Byte* ids;
+
+
+ first_code = TT_NEXT_USHORT(p);
+ code_count = TT_NEXT_USHORT(p);
+ delta = TT_NEXT_SHORT(p);
+ offset = TT_NEXT_USHORT(p);
+
+ /* check range within 0..255 */
+ if ( valid->level >= FT_VALIDATE_PARANOID )
+ {
+ if ( first_code >= 256 || first_code + code_count > 256 )
+ INVALID_DATA;
+ }
+
+ /* check offset */
+ if ( offset != 0 )
+ {
+ ids = p - 2 + offset;
+ if ( ids < glyph_ids || ids + code_count*2 > table + length )
+ INVALID_DATA;
+
+ /* check glyph ids */
+ if ( valid->level >= FT_VALIDATE_TIGHT )
+ {
+ FT_Byte* limit = p + code_count*2;
+ FT_UInt index;
+
+ for ( ; p < limit; )
+ {
+ index = TT_NEXT_USHORT(p);
+ if ( index != 0 )
+ {
+ index = (index + delta) & 0xFFFFU;
+ if ( index >= valid->num_glyphs )
+ INVALID_GLYPH_ID
+ }
+ }
+ }
+ }
+ }
+ }
+
+
+ /* return sub header corresponding to a given character code */
+ /* NULL on invalid charcode.. */
+ static FT_Byte*
+ tt_cmap2_get_subheader( FT_Byte* table,
+ FT_ULong char_code )
+ {
+ FT_Byte* result = NULL;
+
+ if ( char_code < 0x10000 )
+ {
+ FT_UInt char_lo = (FT_UInt)( char_code & 0xFF );
+ FT_UInt char_hi = (FT_UInt)( char_code >> 8 );
+ FT_Byte* p = table + 6; /* keys table */
+ FT_Byte* subs = p + 512; /* subheaders table */
+ FT_Byte* sub;
+
+
+ if ( char_hi == 0 )
+ {
+ /* an 8-bit character code -- we use subHeader 0 in this case */
+ /* to test wheteher the character code is in the charmap */
+ /* */
+ sub = subs; /* jump to first sub-header */
+
+ /* check that the sub-header for this byte is 0, which */
+ /* indicates that it's really a valid one-byte value */
+ /* Otherwise, return 0 */
+ /* */
+ p += char_lo*2;
+ if ( PEEK_UShort(p) != 0 )
+ goto Exit;
+ }
+ else
+ {
+ /* a 16-bit character code */
+ p += char_hi*2; /* jump to key entry */
+ sub = subs + PEEK_UShort(p); /* jump to sub-header */
+
+ /* check that the hi byte isn't a valid one-byte value */
+ if ( sub == subs )
+ goto Exit;
+ }
+ result = sub;
+ }
+ Exit:
+ return result;
+ }
+
+
+ static FT_UInt
+ tt_cmap2_char_index( FT_Byte* table,
+ FT_ULong char_code )
+ {
+ FT_UInt result = 0;
+ FT_Byte* subheader;
+
+ subheader = tt_cmap2_get_subheader( table, char_code );
+ if ( subheader )
+ {
+ FT_Byte* p = subheader;
+ FT_UInt index = (FT_UInt)(char_code & 0xFF);
+ FT_UInt start, count;
+ FT_Int delta;
+ FT_UInt offset;
+
+ start = TT_NEXT_USHORT(p);
+ count = TT_NEXT_USHORT(p);
+ delta = TT_NEXT_SHORT(p);
+ offset = PEEK_UShort(p);
+
+ index -= start;
+ if ( index < count && offset != 0 )
+ {
+ p += offset + 2*index;
+ index = PEEK_UShort(p);
+
+ if ( index == 0 )
+ goto Exit;
+
+ result = (FT_UInt)( index + delta ) & 0xFFFFU;
+ }
+ }
+
+ Exit:
+ return result;
+ }
+
+
+ /* return first valid charcode in a format 2 sub-header */
+ static FT_ULong
+ tt_cmap2_subheader_first( FT_Byte* subheader,
+ FT_UInt char_hi,
+ FT_UInt *agindex )
+ {
+ FT_ULong result = 0;
+ FT_UInt n, gindex = 0;
+ FT_Byte* p = subheader;
+
+ FT_UInt start = TT_NEXT_USHORT(p);
+ FT_UInt count = TT_NEXT_USHORT(p);
+
+ if ( count > 0 )
+ {
+ FT_Int delta = TT_NEXT_SHORT(p);
+ FT_UInt offset = TT_NEXT_USHORT(p);
+
+ if ( offset == 0 )
+ {
+ /* simple difference, compute directly */
+ result = char_hi*256 + start;
+ gindex = (FT_UInt)( start + delta ) & 0xFFFFU;
+ }
+ else
+ {
+ FT_UInt i, index;
+
+ /* parse glyph id table for non-0 indices */
+ p += offset - 2;
+ for (; i < count; i++ )
+ {
+ index = TT_NEXT_USHORT(p);
+ if ( index != 0 )
+ {
+ result = char_hi*256 + start + i;
+ gindex = (FT_UInt)(index + delta) & 0xFFFFU;
+ break;
+ }
+ }
+ }
+ }
+
+ if ( agindex )
+ *agindex = gindex;
+
+ return result;
+ }
+
+
+ static FT_UInt
+ tt_cmap2_char_next( FT_Byte* table,
+ FT_ULong char_code,
+ FT_UInt *agindex )
+ {
+ FT_UInt result = 0;
+ FT_UInt n, gindex = 0;
+ FT_Byte* subheader;
+ FT_Byte* p;
+
+ ++char_code;
+ for (;;)
+ {
+ subheader = tt_cmap2_get_subheader( table, char_code );
+ if ( subheader )
+ {
+ FT_Byte* p = subheader;
+ FT_UInt start = TT_NEXT_USHORT(p);
+ FT_UInt count = TT_NEXT_USHORT(p);
+ FT_Int delta = TT_NEXT_SHORT(p);
+ FT_UInt offset = PEEK_UShort(p);
+ FT_UInt char_lo = (FT_UInt)( char_code & 0xFF );
+ FT_UInt pos, index;
+
+ if ( offset == 0 )
+ goto Next_SubHeader:
+
+ if ( char_lo < start )
+ {
+ char_lo = start;
+ pos = 0;
+ }
+ else
+ pos = (FT_UInt)( char_lo - start );
+
+ p += offset + pos*2;
+ char_code = (char_code & -256) + char_lo;
+
+ for ( ; pos < count; pos++, char_code++ )
+ {
+ index = TT_NEXT_USHORT(p);
+
+ if ( index != 0 )
+ {
+ gindex = ( index + delta ) & 0xFFFFU;
+ if ( gindex != 0 )
+ {
+ result = char_code;
+ goto Exit;
+ }
+ }
+ }
+ }
+
+ /* jump to next sub-header, i.e. higher byte value */
+ Next_SubHeader:
+ char_code = (char_code & -256) + 256;
+ if ( char_code >= 0x10000U )
+ break;
+ }
+
+ Exit:
+ if ( agindex )
+ *agindex = gindex;
+
+ return result;
+ }
+
+ static const TT_Cmap_ClassRec tt_cmap2_class_rec =
+ {
+ (TT_CMap_ValidateFunc) tt_cmap2_validate,
+ (TT_CMap_CharIndexFunc) tt_cmap2_char_index,
+ (TT_CMap_CharNextFunc) tt_cmap2_char_next
+ };
+
+#endif /* TT_CONFIG_CMAP_FORMAT_2 */
+
+
+ /************************************************************************/
+ /************************************************************************/
+ /***** *****/
+ /***** FORMAT 4 *****/
+ /***** *****/
+ /************************************************************************/
+ /************************************************************************/
+
+#ifdef TT_CONFIG_CMAP_FORMAT_4
+
+ static void
+ tt_cmap4_validate( FT_Byte* table,
+ FT_Validator valid )
+ {
+ FT_Byte* p = table + 2; /* skip format */
+ FT_UInt length = TT_NEXT_USHORT(p);
+ FT_Byte *ends, *starts, *offsets, *glyph_ids;
+ FT_UInt n, num_segs;
+
+ if ( table + length > valid->limit || length < 16 )
+ TOO_SHORT;
+
+ p += 2; /* skip language */
+
+ num_segs = TT_NEXT_USHORT(p); /* read segCountX2 */
+
+ if ( valid->level >= FT_VALIDATE_PARANOID )
+ {
+ /* check that we have an even value here */
+ if ( num_segs & 1 )
+ INVALID_DATA;
+ }
+
+ num_segs /= 2;
+
+ /* check the search parameters - even though we never use them */
+ /* */
+ if ( valid->level >= FT_VALIDATE_PARANOID )
+ {
+ /* check the values of 'searchRange', 'entrySelector', 'rangeShift' */
+ FT_UInt search_range = TT_NEXT_USHORT(p);
+ FT_UInt entry_selector = TT_NEXT_USHORT(p);
+ FT_UInt range_shift = TT_NEXT_USHORT(p);
+
+ if ( (search_range | range_shift) & 1 ) /* must be even values */
+ INVALID_DATA;
+
+ search_range /= 2;
+ range_shift /= 2;
+
+ /* 'search range' is the greatest power of 2 that is <= num_segs */
+
+ if ( search_range > num_segs ||
+ search_range*2 < num_segs ||
+ search_range + range_shift != num_segs ||
+ search_range != (1 << entry_selector) )
+ INVALID_DATA;
+ }
+ else
+ p += 6;
+
+ ends = p;
+ starts = ends + num_segs*2 + 2;
+ offsets = starts + num_segs*4;
+ glyph_ids = offsets + num_segs*2;
+
+ if ( glyph_ids >= table + length )
+ TOO_SHORT;
+
+ /* check last segment, its end count must be FFFF */
+ if ( valid->level >= FT_VALIDATE_PARANOID )
+ {
+ p = ends + (num_segs-1)*2;
+ if ( PEEK_UShort(p) != 0xFFFFU )
+ INVALID_DATA;
+ }
+
+ /* check that segments are sorted in increasing order and do not overlap */
+ /* check also the offsets.. */
+ {
+ FT_UInt start, end, last = 0,offset, n;
+
+ for ( n = 0; n < num_segs; n++ )
+ {
+ p = starts + n*2; start = PEEK_UShort(p);
+ p = ends + n*2; end = PEEK_UShort(p);
+ p = offsets + n*2; offset = PEEK_UShort(p);
+
+ if ( end > start )
+ INVALID_DATA;
+
+ if ( n > 0 && start <= last )
+ INVALID_DATA;
+
+ if ( offset )
+ {
+ p += offset; /* start of glyph id array */
+
+ /* check that we point within the glyph ids table only */
+ if ( p < glyph_ids || p + (end - start + 1) > table + length )
+ INVALID_DATA;
+
+ /* XXXX: check glyph ids !! */
+ }
+ last = end;
+ }
+ }
+ }
+
+
+
+ static FT_UInt
+ tt_cmap4_char_index( FT_Byte* table,
+ FT_ULong char_code )
+ {
+ FT_UInt result = 0;
+
+ if ( char_code < 0x10000U )
+ {
+ FT_Byte* p;
+ FT_UInt start, end, index, num_segs2;
+ FT_Int delta, segment;
+ FT_UInt code = (FT_UInt)char_code;
+
+ p = table + 6;
+ num_segs2 = PEEK_UShort(p);
+
+ p = table + 14; /* ends table */
+ q = table + 16 + num_segs2; /* starts table */
+
+ for ( n = 0; n < num_segs2; n += 2 )
+ {
+ FT_UInt end = TT_NEXT_USHORT(p);
+ FT_UInt start = TT_NEXT_USHORT(q);
+
+ if ( code < start )
+ break;
+
+ if ( code <= end )
+ {
+ index = (FT_UInt)( char_code - start );
+
+ p = q + num_segs2 - 2; delta = PEEK_Short(p);
+ p += num_segs2; offset = PEEK_UShort(p);
+
+ if ( offset != 0 )
+ {
+ p += offset + 2*index;
+ index = PEEK_UShort(p);
+ }
+
+ if ( index != 0 )
+ result = (FT_UInt)( index + delta ) & 0xFFFFU;
+ }
+ }
+ }
+ return result;
+ }
+
+
+
+ static FT_ULong
+ tt_cmap4_char_next( FT_Byte* table,
+ FT_ULong char_code,
+ FT_UInt *agindex )
+ {
+ FT_ULong result = 0;
+ FT_UInt gindex = 0;
+ FT_Byte* p;
+ FT_UInt code, num_segs2;
+
+ ++char_code;
+ if ( char_code >= 0x10000U )
+ goto Exit;
+
+ code = (FT_UInt)char_code;
+ p = table + 6;
+ num_segs2 = PEEK_UShort(p) & -2; /* ensure even-ness */
+
+ for (;;)
+ {
+ FT_UInt start, end, index, n;
+ FT_Int delta;
+
+ p = table + 14; /* ends table */
+ q = table + 16 + num_segs2; /* starts table */
+
+ for ( n = 0; n < num_segs2; n += 2 )
+ {
+ FT_UInt end = TT_NEXT_USHORT(p);
+ FT_UInt start = TT_NEXT_USHORT(q);
+
+ if ( code < start )
+ code = start;
+
+ if ( code <= end )
+ {
+ p = q + num_segs2 - 2; delta = PEEK_Short(p);
+ p += num_segs2; offset = PEEK_UShort(p);
+
+ if ( offset != 0 )
+ {
+ /* parse the glyph ids array for non-0 index */
+ p += offset + (code - start)*2;
+ while ( code <= end )
+ {
+ gindex = TT_NEXT_USHORT(p);
+ if ( gindex != 0 )
+ {
+ gindex = (FT_UInt)( gindex + delta ) & 0xFFFFU;
+ if ( gindex != 0 )
+ break;
+ }
+ code++;
+ }
+ }
+ else
+ gindex = (FT_UInt)( code + delta ) & 0xFFFFU;
+
+ if ( gindex == 0 )
+ break;
+
+ result = code;
+ goto Exit;
+ }
+ }
+
+ /* loop to next trial charcode */
+ if ( code >= 0xFFFFU )
+ break;
+
+ code++;
+ }
+ return result;
+
+ Exit:
+ if ( agindex )
+ *agindex = gindex;
+
+ return result;
+ }
+
+ static const TT_Cmap_ClassRec tt_cmap4_class_rec =
+ {
+ (TT_CMap_ValidateFunc) tt_cmap4_validate,
+ (TT_CMap_CharIndexFunc) tt_cmap4_char_index,
+ (TT_CMap_CharNextFunc) tt_cmap4_char_next
+ };
+
+#endif /* TT_CONFIG_CMAP_FORMAT_4 */
+
+ /************************************************************************/
+ /************************************************************************/
+ /***** *****/
+ /***** FORMAT 6 *****/
+ /***** *****/
+ /************************************************************************/
+ /************************************************************************/
+
+#ifdef TT_CONFIG_CMAP_FORMAT_6
+
+ static void
+ tt_cmap6_validate( FT_Byte* table,
+ FT_Validator valid )
+ {
+ FT_Byte* p = table + 2;
+ FT_UInt length, start, count;
+
+ if ( table + 10 > valid->limit )
+ INVALID_TOO_SHORT;
+
+ length = TT_NEXT_USHORT(p);
+ p += 2; /* skip language */
+ start = TT_NEXT_USHORT(p);
+ count = TT_NEXT_USHORT(p);
+
+ if ( table + length > valid->limit || length < 10 + count*2 )
+ INVALID_TOO_SHORT;
+
+ /* check glyph indices */
+ if ( valid->level >= FT_VALIDATE_TIGHT )
+ {
+ FT_UInt gindex;
+
+ for ( ; count > 0; count-- )
+ {
+ gindex = TT_NEXT_USHORT(p);
+ if ( gindex >= valid->num_glyphs )
+ INVALID_GLYPH_ID;
+ }
+ }
+ }
+
+
+ static FT_UInt
+ tt_cmap6_char_index( FT_Byte* table,
+ FT_ULong char_code )
+ {
+ FT_UInt result = 0;
+ FT_Byte* p = table + 6;
+ FT_UInt start = TT_NEXT_USHORT(p);
+ FT_UInt count = TT_NEXT_USHORT(p);
+ FT_UInt index = (FT_UInt)( char_code - start );
+
+ if ( index < count )
+ {
+ p += 2*index;
+ result = PEEK_UShort(p);
+ }
+ return result;
+ }
+
+
+ static FT_ULong
+ tt_cmap6_char_next( FT_Byte* table,
+ FT_ULong char_code,
+ FT_UInt *agindex )
+ {
+ FT_ULong result = 0;
+ FT_UInt gindex = 0;
+ FT_Byte* p = table + 6;
+ FT_UInt start = TT_NEXT_USHORT(p);
+ FT_UInt count = TT_NEXT_USHORT(p);
+ FT_UInt code, index;
+
+ char_code++;
+ if ( char_code >= 0x10000U )
+ goto Exit;
+
+ if ( char_code < start )
+ char_code = start;
+
+ index = (FT_UInt)( char_code - start );
+ p += 2*index;
+
+ for ( ; index < count; index++ )
+ {
+ gindex = TT_NEXT_USHORT(p);
+ if ( gindex != 0 )
+ {
+ result = char_code;
+ break;
+ }
+ char_code++;
+ }
+
+ Exit:
+ if ( agindex )
+ *agindex = gindex;
+
+ return result;
+ }
+
+ static const TT_Cmap_ClassRec tt_cmap6_class_rec =
+ {
+ (TT_CMap_ValidateFunc) tt_cmap6_validate,
+ (TT_CMap_CharIndexFunc) tt_cmap6_char_index,
+ (TT_CMap_CharNextFunc) tt_cmap6_char_next
+ };
+
+#endif /* TT_CONFIG_CMAP_FORMAT_6 */
+
+
+ /************************************************************************/
+ /************************************************************************/
+ /***** *****/
+ /***** FORMAT 8 *****/
+ /***** *****/
+ /***** It's hard to completely understand what the OpenType *****/
+ /***** spec says about this format, but here are my conclusion *****/
+ /***** *****/
+ /***** the purpose of this format is to easily map UTF-16 text *****/
+ /***** to glyph indices. Basically, the 'char_code' must be in *****/
+ /***** one of the following formats: *****/
+ /***** *****/
+ /***** - a 16-bit value that isn't part of the Unicode *****/
+ /***** Surrogates Area (i.e. U+D800-U+DFFF) *****/
+ /***** *****/
+ /***** - a 32-bit value, made of two surrogate values, i.e. *****/
+ /***** if "char_code = (char_hi << 16) | char_lo", then *****/
+ /***** both 'char_hi' and 'char_lo' must be in the Surrogates *****/
+ /***** Area. *****/
+ /***** *****/
+ /***** The 'is32' table embedded in the charmap indicates *****/
+ /***** wether a given 16-bit value is in the surrogates area *****/
+ /***** or not.. *****/
+ /***** *****/
+ /***** so, for any given "char_code", we can assert the following *****/
+ /***** *****/
+ /***** if 'char_hi == 0' then we must have 'is32[char_lo] == 0' *****/
+ /***** *****/
+ /***** if 'char_hi != 0' then we must have both *****/
+ /***** 'is32[char_hi] != 0' and 'is32[char_lo] != 0' *****/
+ /***** *****/
+ /***** *****/
+ /************************************************************************/
+ /************************************************************************/
+
+#ifdef TT_CONFIG_CMAP_FORMAT_8
+
+ static void
+ tt_cmap8_validate( FT_Byte* table,
+ FT_Validator valid )
+ {
+ FT_Byte* p = table + 4;
+ FT_Byte* is32;
+ FT_ULong length;
+ FT_ULong num_groups;
+
+ if ( table + 16 + 8192 > valid->limit )
+ INVALID_TOO_SHORT;
+
+ length = TT_NEXT_ULONG(p);
+ if ( table + length > valid->limit || length < 16 + 8192 )
+ INVALID_TOO_SHORT;
+
+ is32 = p + 4; /* skip language */
+ p = is32 + 8192; /* skip 'is32' array */
+ num_groups = TT_NEXT_ULONG(p);
+
+ if ( p + num_groups*12 > valid->limit )
+ INVALID_TOO_SHORT;
+
+ /* check groups, they must be in increasing order */
+ {
+ FT_ULong n, start, end, start_id, count, last = 0;
+
+ for ( n = 0; n < num_groups; n++ )
+ {
+ FT_Bytes* q;
+ FT_UInt hi, lo;
+
+ start = TT_NEXT_ULONG(p);
+ end = TT_NEXT_ULONG(p);
+ start_id = TT_NEXT_ULONG(p);
+
+ if ( start > end )
+ INVALID_DATA;
+
+ if ( n > 0 && start <= last )
+ INVALID_DATA;
+
+ if ( valid->level >= FT_VALIDATE_TIGHT )
+ {
+ if ( start_id + end - start >= valid->num_glyphs )
+ INVALID_GLYPH_ID;
+
+ count = (FT_ULong)(end - start + 1);
+
+ if ( start & ~0xFFFFU )
+ {
+ /* start_hi != 0, check that is32[i] is 1 for each i in */
+ /* the 'hi' and 'lo' of the range [start..end] */
+ for ( ; count > 0; count--, start++ )
+ {
+ hi = (FT_UInt)(start >> 16);
+ lo = (FT_UInt)(start & 0xFFFFU);
+
+ if ( is32[ hi >> 3 ] & (0x80 >> (hi & 7)) == 0 )
+ INVALID_DATA;
+
+ if ( is32[ lo >> 3 ] & (0x80 >> (lo & 7)) == 0 )
+ INVALID_DATA;
+ }
+ }
+ else
+ {
+ /* start_hi == 0, check that is32[i] is 0 for each i in */
+ /* the range [start..end] */
+
+ /* end_hi cannot be != 0 !! */
+ if ( end & ~0xFFFFU )
+ INVALID_DATA;
+
+ for ( ; count > 0; count--, start++ )
+ {
+ lo = (FT_UInt)(start & 0xFFFFU);
+
+ if ( is32[ lo >> 3 ] & (0x80 >> (lo & 7)) != 0 )
+ INVALID_DATA;
+ }
+ }
+ }
+ }
+ }
+ }
+
+
+ static FT_UInt
+ tt_cmap8_char_index( FT_Byte* table,
+ FT_ULong char_code )
+ {
+ FT_UInt result = 0;
+ FT_Byte* p = table + 12 + 8192;
+ FT_ULong num_groups = TT_NEXT_ULONG(p);
+ FT_ULong n, start, end, start_id;
+
+ for ( ; num_groups > 0; num_groups-- )
+ {
+ start = TT_NEXT_ULONG(p);
+ end = TT_NEXT_ULONG(p);
+ start_id = TT_NEXT_ULONG(p);
+
+ if ( char_code < start )
+ break;
+
+ if ( char_code <= end )
+ {
+ result = start_id + char_code - start;
+ break;
+ }
+ }
+ return result;
+ }
+
+
+ static FT_ULong
+ tt_cmap8_char_next( FT_Byte* table,
+ FT_ULong char_code,
+ FT_UInt *agindex )
+ {
+ FT_ULong result = 0;
+ FT_UInt gindex = 0;
+ FT_Byte* p = table + 12 + 8192;
+ FT_ULong num_groups = TT_NEXT_USHORT(p);
+ FT_ULong n, start, end, start_id;
+
+ ++char_code;
+ p = table + 16 + 8192;
+
+ for ( n = 0; n < num_groups++; n++ )
+ {
+ start = TT_NEXT_ULONG(p);
+ end = TT_NEXT_ULONG(p);
+ start_id = TT_NEXT_ULONG(p);
+
+ if ( char_code < start )
+ char_code = start;
+
+ if ( char_code <= end )
+ {
+ gindex = (FT_UInt)(char_code - start + start_id);
+ if ( gindex != 0 )
+ {
+ result = char_code;
+ goto Exit;
+ }
+ }
+ }
+
+ Exit:
+ if ( agindex )
+ *agindex = gindex;
+
+ return result;
+ }
+
+
+ static const TT_Cmap_ClassRec tt_cmap8_class_rec =
+ {
+ (TT_CMap_ValidateFunc) tt_cmap8_validate,
+ (TT_CMap_CharIndexFunc) tt_cmap8_char_index,
+ (TT_CMap_CharNextFunc) tt_cmap8_char_next
+ };
+
+#endif /* TT_CONFIG_CMAP_FORMAT_8 */
+
+ /************************************************************************/
+ /************************************************************************/
+ /***** *****/
+ /***** FORMAT 10 *****/
+ /***** *****/
+ /************************************************************************/
+ /************************************************************************/
+
+#ifdef TT_CONFIG_CMAP_FORMAT_10
+
+ static void
+ tt_cmap10_validate( FT_Byte* table,
+ FT_Validator valid )
+ {
+ FT_Byte* p = table + 2;
+ FT_ULong length, start, count;
+
+ if ( table + 20 > valid->limit )
+ INVALID_TOO_SHORT;
+
+ length = TT_NEXT_USHORT(p);
+ p += 4; /* skip language */
+ start = TT_NEXT_ULONG(p);
+ count = TT_NEXT_ULONG(p);
+
+ if ( table + length > valid->limit || length < 20 + count*2 )
+ INVALID_TOO_SHORT;
+
+ /* check glyph indices */
+ if ( valid->level >= FT_VALIDATE_TIGHT )
+ {
+ FT_UInt gindex;
+
+ for ( ; count > 0; count-- )
+ {
+ gindex = TT_NEXT_USHORT(p);
+ if ( gindex >= valid->num_glyphs )
+ INVALID_GLYPH_ID;
+ }
+ }
+ }
+
+
+ static FT_UInt
+ tt_cmap10_char_index( FT_Byte* table,
+ FT_ULong char_code )
+ {
+ FT_UInt result = 0;
+ FT_Byte* p = table + 12;
+ FT_ULong start = TT_NEXT_ULONG(p);
+ FT_ULong count = TT_NEXT_ULONG(p);
+ FT_ULong index = (FT_ULong)( char_code - start );
+
+ if ( index < count )
+ {
+ p += 2*index;
+ result = PEEK_UShort(p);
+ }
+ return result;
+ }
+
+
+ static FT_ULong
+ tt_cmap10_char_next( FT_Byte* table,
+ FT_ULong char_code,
+ FT_UInt *agindex )
+ {
+ FT_ULong result = 0;
+ FT_UInt gindex = 0;
+ FT_Byte* p = table + 12;
+ FT_ULong start = TT_NEXT_ULONG(p);
+ FT_ULong count = TT_NEXT_ULONG(p);
+ FT_ULong index;
+
+ char_code++;
+ if ( char_code >= 0x10000U )
+ goto Exit;
+
+ if ( char_code < start )
+ char_code = start;
+
+ index = (FT_ULong)( char_code - start );
+ p += 2*index;
+
+ for ( ; index < count; index++ )
+ {
+ gindex = TT_NEXT_USHORT(p);
+ if ( gindex != 0 )
+ {
+ result = char_code;
+ break;
+ }
+ char_code++;
+ }
+
+ Exit:
+ if ( agindex )
+ *agindex = gindex;
+
+ return result;
+ }
+
+ static const TT_Cmap_ClassRec tt_cmap10_class_rec =
+ {
+ (TT_CMap_ValidateFunc) tt_cmap10_validate,
+ (TT_CMap_CharIndexFunc) tt_cmap10_char_index,
+ (TT_CMap_CharNextFunc) tt_cmap10_char_next
+ };
+
+#endif /* TT_CONFIG_CMAP_FORMAT_10 */
+
+
+ /************************************************************************/
+ /************************************************************************/
+ /***** *****/
+ /***** FORMAT 12 *****/
+ /***** *****/
+ /************************************************************************/
+ /************************************************************************/
+
+#ifdef TT_CONFIG_CMAP_FORMAT_12
+
+ static void
+ tt_cmap12_validate( FT_Byte* table,
+ FT_Validator valid )
+ {
+ }
+
+
+ static FT_UInt
+ tt_cmap12_char_index( FT_Byte* table,
+ FT_ULong char_code )
+ {
+ }
+
+
+ static FT_ULong
+ tt_cmap12_char_next( FT_Byte* table,
+ FT_ULong char_code,
+ FT_UInt *agindex )
+ {
+ }
+
+
+ static const TT_Cmap_ClassRec tt_cmap12_class_rec =
+ {
+ (TT_CMap_ValidateFunc) tt_cmap12_validate,
+ (TT_CMap_CharIndexFunc) tt_cmap12_char_index,
+ (TT_CMap_CharNextFunc) tt_cmap12_char_next
+ };
+
+#endif /* TT_CONFIG_CMAP_FORMAT_12 */
+
--- /dev/null
+++ b/src/sfnt/ttcmap0.h
@@ -1,0 +1,45 @@
+/***************************************************************************/
+/* */
+/* ttcmap.h */
+/* */
+/* TrueType character mapping table (cmap) support (specification). */
+/* */
+/* Copyright 1996-2001 by */
+/* David Turner, Robert Wilhelm, and Werner Lemberg. */
+/* */
+/* This file is part of the FreeType project, and may only be used, */
+/* modified, and distributed under the terms of the FreeType project */
+/* license, LICENSE.TXT. By continuing to use, modify, or distribute */
+/* this file you indicate that you have read the license and */
+/* understand and accept it fully. */
+/* */
+/***************************************************************************/
+
+
+#ifndef __TTCMAP_H__
+#define __TTCMAP_H__
+
+
+#include <ft2build.h>
+#include FT_INTERNAL_TRUETYPE_TYPES_H
+#include FT_INTERNAL_OBJECTS_H
+
+
+FT_BEGIN_HEADER
+
+ FT_LOCAL FT_Error
+ TT_CharMap_Load( TT_Face face,
+ TT_CMapTable* cmap,
+ FT_Stream input );
+
+ FT_LOCAL FT_Error
+ TT_CharMap_Free( TT_Face face,
+ TT_CMapTable* cmap );
+
+
+FT_END_HEADER
+
+#endif /* __TTCMAP_H__ */
+
+
+/* END */