diff options
author | Bernhard Haumacher <[email protected]> | 2020-05-10 14:09:51 +0200 |
---|---|---|
committer | Sven Göthel <[email protected]> | 2024-02-03 02:00:45 +0100 |
commit | 24a4f764c1f7b07c81a31991bf65808c2406e5bd (patch) | |
tree | d5cfe67cd09aaa16aa8feed04edf08a8b4f040e8 | |
parent | 468a75d6b84b0cdcbd860f8a52492725be8771d9 (diff) |
Improved documentation of Cmap table.
8 files changed, 331 insertions, 6 deletions
diff --git a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat.java b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat.java index 1d4bb48bd..e4e2e9ff7 100644 --- a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat.java +++ b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat.java @@ -23,6 +23,8 @@ import java.io.DataInput; import java.io.IOException; /** + * Entry in the {@link CmapTable}. + * * @author <a href="mailto:[email protected]">David Schweinsberg</a> */ public abstract class CmapFormat { @@ -64,8 +66,20 @@ public abstract class CmapFormat { } } + /** + * The format version. + * + * @see CmapFormat0 + * @see CmapFormat2 + * @see CmapFormat4 + * @see CmapFormat6 + * @see CmapFormat12 + */ protected abstract int getFormat(); + /** + * The length in bytes of the subtable. + */ public abstract int getLength(); protected abstract int getLanguage(); @@ -75,6 +89,12 @@ public abstract class CmapFormat { public abstract Range getRange(int index) throws ArrayIndexOutOfBoundsException; + /** + * Maps the given character to the index of the glyph to use for this + * character. + * + * @see GlyfTable#getDescription(int) + */ public abstract int mapCharCode(int charCode); @Override diff --git a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat0.java b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat0.java index ed3d6cd6c..451a5204f 100644 --- a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat0.java +++ b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat0.java @@ -23,14 +23,32 @@ import java.io.DataInput; import java.io.IOException; /** + * Format 0: Byte encoding table + * + * <p> * Simple Macintosh cmap table, mapping only the ASCII character set to glyphs. + * </p> + * + * @see "https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-0-byte-encoding-table" * * @author <a href="mailto:[email protected]">David Schweinsberg</a> */ public class CmapFormat0 extends CmapFormat { + /** + * uint16 + * + * @see #getLength() + */ private final int _length; + + /** + * uint16 + * + * @see #getLanguage() + */ private final int _language; + private final int[] _glyphIdArray = new int[256]; CmapFormat0(DataInput di) throws IOException { diff --git a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat12.java b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat12.java index 530470cae..2554d30bb 100644 --- a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat12.java +++ b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat12.java @@ -21,6 +21,21 @@ import java.io.DataInput; import java.io.IOException; /** + * Format 12: Segmented coverage + * + * <p> + * This is the standard character-to-glyph-index mapping table for the Windows + * platform for fonts supporting Unicode supplementary-plane characters (U+10000 + * to U+10FFFF). + * </p> + * + * <p> + * Format 12 is similar to {@link CmapFormat4 format 4} in that it defines + * segments for sparse representation. It differs, however, in that it uses + * 32-bit character codes. + * </p> + * + * @see "https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-12-segmented-coverage" * * @author <a href="mailto:[email protected]">David Schweinsberg</a> */ diff --git a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat2.java b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat2.java index 41fbbb88a..7b3265810 100644 --- a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat2.java +++ b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat2.java @@ -23,23 +23,97 @@ import java.io.DataInput; import java.io.IOException; /** - * High-byte mapping through table cmap format. + * Format 2: High-byte mapping through table. + * + * @see "https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-2-high-byte-mapping-through-table" + * * @author <a href="mailto:[email protected]">David Schweinsberg</a> */ public class CmapFormat2 extends CmapFormat { - private static class SubHeader { + static class SubHeader { + /** + * uint16 + * + * First valid low byte for this SubHeader. + * + * @see #_entryCount + */ int _firstCode; + + /** + * uint16 + * + * Number of valid low bytes for this SubHeader. + * + * <p> + * The {@link #_firstCode} and {@link #_entryCount} values specify a + * subrange that begins at {@link #_firstCode} and has a length equal to + * the value of {@link #_entryCount}. This subrange stays within the + * 0-255 range of the byte being mapped. Bytes outside of this subrange + * are mapped to glyph index 0 (missing glyph). The offset of the byte + * within this subrange is then used as index into a corresponding + * subarray of {@link #_glyphIndexArray}. This subarray is also of + * length {@link #_entryCount}. The value of the {@link #_idRangeOffset} + * is the number of bytes past the actual location of the + * {@link #_idRangeOffset} word where the {@link #_glyphIndexArray} + * element corresponding to {@link #_firstCode} appears. + * </p> + * <p> + * Finally, if the value obtained from the subarray is not 0 (which + * indicates the missing glyph), you should add {@link #_idDelta} to it + * in order to get the glyphIndex. The value {@link #_idDelta} permits + * the same subarray to be used for several different subheaders. The + * {@link #_idDelta} arithmetic is modulo 65536. + * </p> + */ int _entryCount; + + /** + * @see #_entryCount + */ short _idDelta; + + /** + * @see #_entryCount + */ int _idRangeOffset; + int _arrayIndex; } + /** + * uint16 + * + * @see #getLength() + */ private final int _length; + + /** + * uint16 + * + * @see #getLanguage() + */ private final int _language; + + /** + * uint16[256] + * + * Array that maps high bytes to subHeaders: value is subHeader index × 8. + */ private final int[] _subHeaderKeys = new int[256]; + + /** + * Variable-length array of SubHeader records. + */ private final SubHeader[] _subHeaders; + + /** + * uint16 + * + * Variable-length array containing subarrays used for mapping the low byte + * of 2-byte characters. + */ private final int[] _glyphIndexArray; CmapFormat2(DataInput di) throws IOException { diff --git a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat4.java b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat4.java index a371b5d6a..a9d0e1378 100644 --- a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat4.java +++ b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat4.java @@ -24,21 +24,92 @@ import java.io.IOException; import java.util.Arrays; /** + * Format 4: Segment mapping to delta values + * + * <p> + * This is the standard character-to-glyph-index mapping table for the Windows + * platform for fonts that support Unicode BMP characters. + * </p> + * + * <p> + * This format is used when the character codes for the characters represented + * by a font fall into several contiguous ranges, possibly with holes in some or + * all of the ranges (that is, some of the codes in a range may not have a + * representation in the font). The format-dependent data is divided into three + * parts, which must occur in the following order: + * </p> + * <ol> + * <li>A four-word header gives parameters for an optimized search of the + * segment list; + * <li>Four parallel arrays describe the segments (one segment for each + * contiguous range of codes); + * <li>A variable-length array of glyph IDs (unsigned words). + * </ol> + * + * @see "https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-4-segment-mapping-to-delta-values" + * * @author <a href="mailto:[email protected]">David Schweinsberg</a> */ public class CmapFormat4 extends CmapFormat { + /** + * uint16 length This is the length in bytes of the subtable. + */ private final int _length; + + /** + * uint16 language + */ private final int _language; + + /** + * uint16 segCountX2 2 × segCount. + */ private final int _segCountX2; + + /** + * uint16 searchRange 2 × (2**floor(log2(segCount))) + */ private final int _searchRange; + + /** + * uint16 entrySelector log2(searchRange/2) + */ private final int _entrySelector; + + /** + * uint16 rangeShift 2 × segCount - searchRange + */ private final int _rangeShift; + + /** + * uint16 endCode[segCount] End characterCode for each segment, last=0xFFFF. + */ private final int[] _endCode; + + /** + * uint16 startCode[segCount] Start character code for each segment. + */ private final int[] _startCode; + + /** + * int16 idDelta[segCount] Delta for all character codes in segment. + */ private final int[] _idDelta; + + /** + * uint16 idRangeOffset[segCount] Offsets into glyphIdArray or 0 + */ private final int[] _idRangeOffset; + + /** + * uint16 glyphIdArray[ ] Glyph index array (arbitrary length) + */ private final int[] _glyphIdArray; + + /** + * @see #_segCountX2 + */ private final int _segCount; CmapFormat4(DataInput di) throws IOException { diff --git a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat6.java b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat6.java index 80039aac0..daf12675c 100644 --- a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat6.java +++ b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapFormat6.java @@ -23,6 +23,9 @@ import java.io.IOException; /** * Format 6: Trimmed table mapping + * + * @see "https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-6-trimmed-table-mapping" + * * @author <a href="mailto:[email protected]">David Schweinsberg</a> */ public class CmapFormat6 extends CmapFormat { diff --git a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapIndexEntry.java b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapIndexEntry.java index 102ea2f71..99de551f5 100644 --- a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapIndexEntry.java +++ b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapIndexEntry.java @@ -54,13 +54,35 @@ import java.io.DataInput; import java.io.IOException; /** + * Encoding record. + * + * <p> + * The array of encoding records specifies particular encodings and the offset + * to the subtable for each encoding. + * </p> + * * @author <a href="mailto:[email protected]">David Schweinsberg</a> */ public class CmapIndexEntry implements Comparable<CmapIndexEntry> { + /** + * @see #getPlatformId() + */ private int _platformId; + + /** + * @see #getEncodingId() + */ private int _encodingId; + + /** + * @see #getOffset() + */ private int _offset; + + /** + * @see #getFormat() + */ private CmapFormat _format; CmapIndexEntry(DataInput di) throws IOException { @@ -69,14 +91,43 @@ public class CmapIndexEntry implements Comparable<CmapIndexEntry> { _offset = di.readInt(); } + /** + * uint16 + * + * Platform ID. + * + * <p> + * Complete details on platform IDs and platform-specific encoding and + * language IDs are provided in the {@link NameTable}. + * </p> + * + * @see xxxx + */ public int getPlatformId() { return _platformId; } + /** + * uint16 + * + * Platform-specific encoding ID. + * + * <p> + * The platform ID and platform-specific encoding ID in the encoding record + * are used to specify a particular character encoding. In the case of the + * Macintosh platform, a language field within the mapping subtable is also + * used for this purpose. + * </p> + */ public int getEncodingId() { return _encodingId; } + /** + * Offset32 + * + * Byte offset from beginning of table to the subtable for this encoding. + */ public int getOffset() { return _offset; } diff --git a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapTable.java b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapTable.java index 35fdb1c22..3a2dffca2 100644 --- a/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapTable.java +++ b/src/jogl/classes/jogamp/graph/font/typecast/ot/table/CmapTable.java @@ -55,21 +55,81 @@ import java.io.IOException; import java.util.Arrays; /** + * Character to Glyph Index Mapping Table + * + * <p> + * This table defines the mapping of character codes to the glyph index values + * used in the font. It may contain more than one subtable, in order to support + * more than one character encoding scheme. + * </p> + * + * <h2>Overview</h2> + * + * <p> + * This table defines mapping of character codes to a default glyph index. + * Different subtables may be defined that each contain mappings for different + * character encoding schemes. The table header indicates the character + * encodings for which subtables are present. + * </p> + * + * <p> + * Regardless of the encoding scheme, character codes that do not correspond to + * any glyph in the font should be mapped to glyph index 0. The glyph at this + * location must be a special glyph representing a missing character, commonly + * known as .notdef. + * </p> + * + * <p> + * Each subtable is in one of seven possible formats and begins with a format + * field indicating the format used. The first four formats — formats 0, 2, 4 + * and 6 — were originally defined prior to Unicode 2.0. These formats allow for + * 8-bit single-byte, 8-bit multi-byte, and 16-bit encodings. With the + * introduction of supplementary planes in Unicode 2.0, the Unicode addressable + * code space extends beyond 16 bits. To accommodate this, three additional + * formats were added — formats 8, 10 and 12 — that allow for 32-bit encoding + * schemes. + * </p> + * + * <p> + * Other enhancements in Unicode led to the addition of other subtable formats. + * Subtable format 13 allows for an efficient mapping of many characters to a + * single glyph; this is useful for “last-resort” fonts that provide fallback + * rendering for all possible Unicode characters with a distinct fallback glyph + * for different Unicode ranges. Subtable format 14 provides a unified mechanism + * for supporting Unicode variation sequences. + * </p> + * * @author <a href="mailto:[email protected]">David Schweinsberg</a> */ public class CmapTable implements Table { - private int _version; + /** + * @see #getVersion() + */ + public static final int VERSION = 0x0000; + + private int _version = VERSION; private int _numTables; private CmapIndexEntry[] _entries; + /** + * Creates a {@link CmapTable}. + * + * @param di The reader to read from. + */ public CmapTable(DataInput di) throws IOException { _version = di.readUnsignedShort(); _numTables = di.readUnsignedShort(); long bytesRead = 4; - _entries = new CmapIndexEntry[_numTables]; - + // Get each of the index entries + + // Note: The encoding record entries in the 'cmap' header must be sorted + // first by platform ID, then by platform-specific encoding ID, and then + // by the language field in the corresponding subtable. Each platform + // ID, platform-specific encoding ID, and subtable language combination + // may appear only once in the 'cmap' table. + _entries = new CmapIndexEntry[_numTables]; for (int i = 0; i < _numTables; i++) { _entries[i] = new CmapIndexEntry(di); bytesRead += 8; @@ -79,7 +139,7 @@ public class CmapTable implements Table { Arrays.sort(_entries); // Get each of the tables - int lastOffset = 0; + int lastOffset = -1; CmapFormat lastFormat = null; for (int i = 0; i < _numTables; i++) { if (_entries[i].getOffset() == lastOffset) { @@ -107,10 +167,23 @@ public class CmapTable implements Table { return cmap; } + /** + * uint16 Table version number ({@link #VERSION}}). + * + * <p> + * Note: The 'cmap' table version number remains at {@link #VERSION} for + * fonts that make use of the newer subtable formats. + * </p> + */ public int getVersion() { return _version; } + /** + * uint16 + * + * Number of encoding tables that follow. + */ public int getNumTables() { return _numTables; } |