CoCalc -- CMap.java

GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/classes/sun/font/CMap.java
⁴⁷¹⁵² views
1
/*
2
 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.  Oracle designates this
8
 * particular file as subject to the "Classpath" exception as provided
9
 * by Oracle in the LICENSE file that accompanied this code.
10
 *
11
 * This code is distributed in the hope that it will be useful, but WITHOUT
12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14
 * version 2 for more details (a copy is included in the LICENSE file that
15
 * accompanied this code).
16
 *
17
 * You should have received a copy of the GNU General Public License version
18
 * 2 along with this work; if not, write to the Free Software Foundation,
19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
 *
21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
 * or visit www.oracle.com if you need additional information or have any
23
 * questions.
24
 */
25

26
package sun.font;
27

28
import java.nio.ByteBuffer;
29
import java.nio.CharBuffer;
30
import java.nio.IntBuffer;
31
import java.util.Locale;
32
import java.nio.charset.*;
33

34
/*
35
 * A tt font has a CMAP table which is in turn made up of sub-tables which
36
 * describe the char to glyph mapping in (possibly) multiple ways.
37
 * CMAP subtables are described by 3 values.
38
 * 1. Platform ID (eg 3=Microsoft, which is the id we look for in JDK)
39
 * 2. Encoding (eg 0=symbol, 1=unicode)
40
 * 3. TrueType subtable format (how the char->glyph mapping for the encoding
41
 * is stored in the subtable). See the TrueType spec. Format 4 is required
42
 * by MS in fonts for windows. Its uses segmented mapping to delta values.
43
 * Most typically we see are (3,1,4) :
44
 * CMAP Platform ID=3 is what we use.
45
 * Encodings that are used in practice by JDK on Solaris are
46
 *  symbol (3,0)
47
 *  unicode (3,1)
48
 *  GBK (3,5) (note that solaris zh fonts report 3,4 but are really 3,5)
49
 * The format for almost all subtables is 4. However the solaris (3,5)
50
 * encodings are typically in format 2.
51
 */
52
abstract class CMap {
53

54
//     static char WingDings_b2c[] = {
55
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
56
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
57
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
58
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
59
//         0xfffd, 0xfffd, 0x2702, 0x2701, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
60
//         0xfffd, 0x2706, 0x2709, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
61
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
62
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2707, 0x270d,
63
//         0xfffd, 0x270c, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
64
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
65
//         0xfffd, 0x2708, 0xfffd, 0xfffd, 0x2744, 0xfffd, 0x271e, 0xfffd,
66
//         0x2720, 0x2721, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
67
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
68
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
69
//         0xfffd, 0x2751, 0x2752, 0xfffd, 0xfffd, 0x2756, 0xfffd, 0xfffd,
70
//         0xfffd, 0xfffd, 0xfffd, 0x2740, 0x273f, 0x275d, 0x275e, 0xfffd,
71
//         0xfffd, 0x2780, 0x2781, 0x2782, 0x2783, 0x2784, 0x2785, 0x2786,
72
//         0x2787, 0x2788, 0x2789, 0xfffd, 0x278a, 0x278b, 0x278c, 0x278d,
73
//         0x278e, 0x278f, 0x2790, 0x2791, 0x2792, 0x2793, 0xfffd, 0xfffd,
74
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
75
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x274d, 0xfffd,
76
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2736, 0x2734, 0xfffd, 0x2735,
77
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x272a, 0x2730, 0xfffd,
78
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
79
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x27a5, 0xfffd, 0x27a6, 0xfffd,
80
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
81
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
82
//         0x27a2, 0xfffd, 0xfffd, 0xfffd, 0x27b3, 0xfffd, 0xfffd, 0xfffd,
83
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
84
//         0x27a1, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
85
//         0x27a9, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
86
//         0xfffd, 0xfffd, 0xfffd, 0x2717, 0x2713, 0xfffd, 0xfffd, 0xfffd,
87
//    };
88

89
//     static char Symbols_b2c[] = {
90
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
91
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
92
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
93
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
94
//         0xfffd, 0xfffd, 0x2200, 0xfffd, 0x2203, 0xfffd, 0xfffd, 0x220d,
95
//         0xfffd, 0xfffd, 0x2217, 0xfffd, 0xfffd, 0x2212, 0xfffd, 0xfffd,
96
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
97
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
98
//         0x2245, 0x0391, 0x0392, 0x03a7, 0x0394, 0x0395, 0x03a6, 0x0393,
99
//         0x0397, 0x0399, 0x03d1, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f,
100
//         0x03a0, 0x0398, 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03c2, 0x03a9,
101
//         0x039e, 0x03a8, 0x0396, 0xfffd, 0x2234, 0xfffd, 0x22a5, 0xfffd,
102
//         0xfffd, 0x03b1, 0x03b2, 0x03c7, 0x03b4, 0x03b5, 0x03c6, 0x03b3,
103
//         0x03b7, 0x03b9, 0x03d5, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03bf,
104
//         0x03c0, 0x03b8, 0x03c1, 0x03c3, 0x03c4, 0x03c5, 0x03d6, 0x03c9,
105
//         0x03be, 0x03c8, 0x03b6, 0xfffd, 0xfffd, 0xfffd, 0x223c, 0xfffd,
106
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
107
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
108
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
109
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
110
//         0xfffd, 0x03d2, 0xfffd, 0x2264, 0x2215, 0x221e, 0xfffd, 0xfffd,
111
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
112
//         0x2218, 0xfffd, 0xfffd, 0x2265, 0xfffd, 0x221d, 0xfffd, 0x2219,
113
//         0xfffd, 0x2260, 0x2261, 0x2248, 0x22ef, 0x2223, 0xfffd, 0xfffd,
114
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2297, 0x2295, 0x2205, 0x2229,
115
//         0x222a, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209,
116
//         0xfffd, 0x2207, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x221a, 0x22c5,
117
//         0xfffd, 0x2227, 0x2228, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
118
//         0x22c4, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2211, 0xfffd, 0xfffd,
119
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
120
//         0xfffd, 0xfffd, 0x222b, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
121
//         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
122
//     };
123

124
    static final short ShiftJISEncoding = 2;
125
    static final short GBKEncoding      = 3;
126
    static final short Big5Encoding     = 4;
127
    static final short WansungEncoding  = 5;
128
    static final short JohabEncoding    = 6;
129
    static final short MSUnicodeSurrogateEncoding = 10;
130

131
    static final char noSuchChar = (char)0xfffd;
132
    static final int SHORTMASK = 0x0000ffff;
133
    static final int INTMASK   = 0x7fffffff;
134

135
    static final char[][] converterMaps = new char[7][];
136

137
    /*
138
     * Unicode->other encoding translation array. A pre-computed look up
139
     * which can be shared across all fonts using that encoding.
140
     * Using this saves running character coverters repeatedly.
141
     */
142
    char[] xlat;
143

144
    static CMap initialize(TrueTypeFont font) {
145

146
        CMap cmap = null;
147

148
        int offset, platformID, encodingID=-1;
149

150
        int three0=0, three1=0, three2=0, three3=0, three4=0, three5=0,
151
            three6=0, three10=0;
152
        boolean threeStar = false;
153

154
        ByteBuffer cmapBuffer = font.getTableBuffer(TrueTypeFont.cmapTag);
155
        int cmapTableOffset = font.getTableSize(TrueTypeFont.cmapTag);
156
        short numberSubTables = cmapBuffer.getShort(2);
157

158
        /* locate the offsets of all 3,*  (ie Microsoft platform) encodings */
159
        for (int i=0; i<numberSubTables; i++) {
160
            cmapBuffer.position(i * 8 + 4);
161
            platformID = cmapBuffer.getShort();
162
            if (platformID == 3) {
163
                threeStar = true;
164
                encodingID = cmapBuffer.getShort();
165
                offset     = cmapBuffer.getInt();
166
                switch (encodingID) {
167
                case 0:  three0  = offset; break; // MS Symbol encoding
168
                case 1:  three1  = offset; break; // MS Unicode cmap
169
                case 2:  three2  = offset; break; // ShiftJIS cmap.
170
                case 3:  three3  = offset; break; // GBK cmap
171
                case 4:  three4  = offset; break; // Big 5 cmap
172
                case 5:  three5  = offset; break; // Wansung
173
                case 6:  three6  = offset; break; // Johab
174
                case 10: three10 = offset; break; // MS Unicode surrogates
175
                }
176
            }
177
        }
178

179
        /* This defines the preference order for cmap subtables */
180
        if (threeStar) {
181
            if (three10 != 0) {
182
                cmap = createCMap(cmapBuffer, three10, null);
183
            }
184
            else if  (three0 != 0) {
185
                /* The special case treatment of these fonts leads to
186
                 * anomalies where a user can view "wingdings" and "wingdings2"
187
                 * and the latter shows all its code points in the unicode
188
                 * private use area at 0xF000->0XF0FF and the former shows
189
                 * a scattered subset of its glyphs that are known mappings to
190
                 * unicode code points.
191
                 * The primary purpose of these mappings was to facilitate
192
                 * display of symbol chars etc in composite fonts, however
193
                 * this is not needed as all these code points are covered
194
                 * by Lucida Sans Regular.
195
                 * Commenting this out reduces the role of these two files
196
                 * (assuming that they continue to be used in font.properties)
197
                 * to just one of contributing to the overall composite
198
                 * font metrics, and also AWT can still access the fonts.
199
                 * Clients which explicitly accessed these fonts as names
200
                 * "Symbol" and "Wingdings" (ie as physical fonts) and
201
                 * expected to see a scattering of these characters will
202
                 * see them now as missing. How much of a problem is this?
203
                 * Perhaps we could still support this mapping just for
204
                 * "Symbol.ttf" but I suspect some users would prefer it
205
                 * to be mapped in to the Latin range as that is how
206
                 * the "symbol" font is used in native apps.
207
                 */
208
//              String name = font.platName.toLowerCase(Locale.ENGLISH);
209
//              if (name.endsWith("symbol.ttf")) {
210
//                  cmap = createSymbolCMap(cmapBuffer, three0, Symbols_b2c);
211
//              } else if (name.endsWith("wingding.ttf")) {
212
//                  cmap = createSymbolCMap(cmapBuffer, three0, WingDings_b2c);
213
//              } else {
214
                    cmap = createCMap(cmapBuffer, three0, null);
215
//              }
216
            }
217
            else if (three1 != 0) {
218
                cmap = createCMap(cmapBuffer, three1, null);
219
            }
220
            else if (three2 != 0) {
221
                cmap = createCMap(cmapBuffer, three2,
222
                                  getConverterMap(ShiftJISEncoding));
223
            }
224
            else if (three3 != 0) {
225
                cmap = createCMap(cmapBuffer, three3,
226
                                  getConverterMap(GBKEncoding));
227
            }
228
            else if (three4 != 0) {
229
                /* GB2312 TrueType fonts on Solaris have wrong encoding ID for
230
                 * cmap table, these fonts have EncodingID 4 which is Big5
231
                 * encoding according the TrueType spec, but actually the
232
                 * fonts are using gb2312 encoding, have to use this
233
                 * workaround to make Solaris zh_CN locale work.  -sherman
234
                 */
235
                if (FontUtilities.isSolaris && font.platName != null &&
236
                    (font.platName.startsWith(
237
                     "/usr/openwin/lib/locale/zh_CN.EUC/X11/fonts/TrueType") ||
238
                     font.platName.startsWith(
239
                     "/usr/openwin/lib/locale/zh_CN/X11/fonts/TrueType") ||
240
                     font.platName.startsWith(
241
                     "/usr/openwin/lib/locale/zh/X11/fonts/TrueType"))) {
242
                    cmap = createCMap(cmapBuffer, three4,
243
                                       getConverterMap(GBKEncoding));
244
                }
245
                else {
246
                    cmap = createCMap(cmapBuffer, three4,
247
                                      getConverterMap(Big5Encoding));
248
                }
249
            }
250
            else if (three5 != 0) {
251
                cmap = createCMap(cmapBuffer, three5,
252
                                  getConverterMap(WansungEncoding));
253
            }
254
            else if (three6 != 0) {
255
                cmap = createCMap(cmapBuffer, three6,
256
                                  getConverterMap(JohabEncoding));
257
            }
258
        } else {
259
            /* No 3,* subtable was found. Just use whatever is the first
260
             * table listed. Not very useful but maybe better than
261
             * rejecting the font entirely?
262
             */
263
            cmap = createCMap(cmapBuffer, cmapBuffer.getInt(8), null);
264
        }
265
        return cmap;
266
    }
267

268
    /* speed up the converting by setting the range for double
269
     * byte characters;
270
     */
271
    static char[] getConverter(short encodingID) {
272
        int dBegin = 0x8000;
273
        int dEnd   = 0xffff;
274
        String encoding;
275

276
        switch (encodingID) {
277
        case ShiftJISEncoding:
278
            dBegin = 0x8140;
279
            dEnd   = 0xfcfc;
280
            encoding = "SJIS";
281
            break;
282
        case GBKEncoding:
283
            dBegin = 0x8140;
284
            dEnd   = 0xfea0;
285
            encoding = "GBK";
286
            break;
287
        case Big5Encoding:
288
            dBegin = 0xa140;
289
            dEnd   = 0xfefe;
290
            encoding = "Big5";
291
            break;
292
        case WansungEncoding:
293
            dBegin = 0xa1a1;
294
            dEnd   = 0xfede;
295
            encoding = "EUC_KR";
296
            break;
297
        case JohabEncoding:
298
            dBegin = 0x8141;
299
            dEnd   = 0xfdfe;
300
            encoding = "Johab";
301
            break;
302
        default:
303
            return null;
304
        }
305

306
        try {
307
            char[] convertedChars = new char[65536];
308
            for (int i=0; i<65536; i++) {
309
                convertedChars[i] = noSuchChar;
310
            }
311

312
            byte[] inputBytes = new byte[(dEnd-dBegin+1)*2];
313
            char[] outputChars = new char[(dEnd-dBegin+1)];
314

315
            int j = 0;
316
            int firstByte;
317
            if (encodingID == ShiftJISEncoding) {
318
                for (int i = dBegin; i <= dEnd; i++) {
319
                    firstByte = (i >> 8 & 0xff);
320
                    if (firstByte >= 0xa1 && firstByte <= 0xdf) {
321
                        //sjis halfwidth katakana
322
                        inputBytes[j++] = (byte)0xff;
323
                        inputBytes[j++] = (byte)0xff;
324
                    } else {
325
                        inputBytes[j++] = (byte)firstByte;
326
                        inputBytes[j++] = (byte)(i & 0xff);
327
                    }
328
                }
329
            } else {
330
                for (int i = dBegin; i <= dEnd; i++) {
331
                    inputBytes[j++] = (byte)(i>>8 & 0xff);
332
                    inputBytes[j++] = (byte)(i & 0xff);
333
                }
334
            }
335

336
            Charset.forName(encoding).newDecoder()
337
            .onMalformedInput(CodingErrorAction.REPLACE)
338
            .onUnmappableCharacter(CodingErrorAction.REPLACE)
339
            .replaceWith("\u0000")
340
            .decode(ByteBuffer.wrap(inputBytes, 0, inputBytes.length),
341
                    CharBuffer.wrap(outputChars, 0, outputChars.length),
342
                    true);
343

344
            // ensure single byte ascii
345
            for (int i = 0x20; i <= 0x7e; i++) {
346
                convertedChars[i] = (char)i;
347
            }
348

349
            //sjis halfwidth katakana
350
            if (encodingID == ShiftJISEncoding) {
351
                for (int i = 0xa1; i <= 0xdf; i++) {
352
                    convertedChars[i] = (char)(i - 0xa1 + 0xff61);
353
                }
354
            }
355

356
            /* It would save heap space (approx 60Kbytes for each of these
357
             * converters) if stored only valid ranges (ie returned
358
             * outputChars directly. But this is tricky since want to
359
             * include the ASCII range too.
360
             */
361
//          System.err.println("oc.len="+outputChars.length);
362
//          System.err.println("cc.len="+convertedChars.length);
363
//          System.err.println("dbegin="+dBegin);
364
            System.arraycopy(outputChars, 0, convertedChars, dBegin,
365
                             outputChars.length);
366

367
            //return convertedChars;
368
            /* invert this map as now want it to map from Unicode
369
             * to other encoding.
370
             */
371
            char [] invertedChars = new char[65536];
372
            for (int i=0;i<65536;i++) {
373
                if (convertedChars[i] != noSuchChar) {
374
                    invertedChars[convertedChars[i]] = (char)i;
375
                }
376
            }
377
            return invertedChars;
378

379
        } catch (Exception e) {
380
            e.printStackTrace();
381
        }
382
        return null;
383
    }
384

385
    /*
386
     * The returned array maps to unicode from some other 2 byte encoding
387
     * eg for a 2byte index which represents a SJIS char, the indexed
388
     * value is the corresponding unicode char.
389
     */
390
    static char[] getConverterMap(short encodingID) {
391
        if (converterMaps[encodingID] == null) {
392
           converterMaps[encodingID] = getConverter(encodingID);
393
        }
394
        return converterMaps[encodingID];
395
    }
396

397

398
    static CMap createCMap(ByteBuffer buffer, int offset, char[] xlat) {
399
        /* First do a sanity check that this cmap subtable is contained
400
         * within the cmap table.
401
         */
402
        int subtableFormat = buffer.getChar(offset);
403
        long subtableLength;
404
        if (subtableFormat < 8) {
405
            subtableLength = buffer.getChar(offset+2);
406
        } else {
407
            subtableLength = buffer.getInt(offset+4) & INTMASK;
408
        }
409
        if (offset+subtableLength > buffer.capacity()) {
410
            if (FontUtilities.isLogging()) {
411
                FontUtilities.getLogger().warning("Cmap subtable overflows buffer.");
412
            }
413
        }
414
        switch (subtableFormat) {
415
        case 0:  return new CMapFormat0(buffer, offset);
416
        case 2:  return new CMapFormat2(buffer, offset, xlat);
417
        case 4:  return new CMapFormat4(buffer, offset, xlat);
418
        case 6:  return new CMapFormat6(buffer, offset, xlat);
419
        case 8:  return new CMapFormat8(buffer, offset, xlat);
420
        case 10: return new CMapFormat10(buffer, offset, xlat);
421
        case 12: return new CMapFormat12(buffer, offset, xlat);
422
        default: throw new RuntimeException("Cmap format unimplemented: " +
423
                                            (int)buffer.getChar(offset));
424
        }
425
    }
426

427
/*
428
    final char charVal(byte[] cmap, int index) {
429
        return (char)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1]));
430
    }
431

432
    final short shortVal(byte[] cmap, int index) {
433
        return (short)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1]));
434
    }
435
*/
436
    abstract char getGlyph(int charCode);
437

438
    /* Format 4 Header is
439
     * ushort format (off=0)
440
     * ushort length (off=2)
441
     * ushort language (off=4)
442
     * ushort segCountX2 (off=6)
443
     * ushort searchRange (off=8)
444
     * ushort entrySelector (off=10)
445
     * ushort rangeShift (off=12)
446
     * ushort endCount[segCount] (off=14)
447
     * ushort reservedPad
448
     * ushort startCount[segCount]
449
     * short idDelta[segCount]
450
     * idRangeOFfset[segCount]
451
     * ushort glyphIdArray[]
452
     */
453
    static class CMapFormat4 extends CMap {
454
        int segCount;
455
        int entrySelector;
456
        int rangeShift;
457
        char[] endCount;
458
        char[] startCount;
459
        short[] idDelta;
460
        char[] idRangeOffset;
461
        char[] glyphIds;
462

463
        CMapFormat4(ByteBuffer bbuffer, int offset, char[] xlat) {
464

465
            this.xlat = xlat;
466

467
            bbuffer.position(offset);
468
            CharBuffer buffer = bbuffer.asCharBuffer();
469
            buffer.get(); // skip, we already know format=4
470
            int subtableLength = buffer.get();
471
            /* Try to recover from some bad fonts which specify a subtable
472
             * length that would overflow the byte buffer holding the whole
473
             * cmap table. If this isn't a recoverable situation an exception
474
             * may be thrown which is caught higher up the call stack.
475
             * Whilst this may seem lenient, in practice, unless the "bad"
476
             * subtable we are using is the last one in the cmap table we
477
             * would have no way of knowing about this problem anyway.
478
             */
479
            if (offset+subtableLength > bbuffer.capacity()) {
480
                subtableLength = bbuffer.capacity() - offset;
481
            }
482
            buffer.get(); // skip language
483
            segCount = buffer.get()/2;
484
            int searchRange = buffer.get();
485
            entrySelector = buffer.get();
486
            rangeShift    = buffer.get()/2;
487
            startCount = new char[segCount];
488
            endCount = new char[segCount];
489
            idDelta = new short[segCount];
490
            idRangeOffset = new char[segCount];
491

492
            for (int i=0; i<segCount; i++) {
493
                endCount[i] = buffer.get();
494
            }
495
            buffer.get(); // 2 bytes for reserved pad
496
            for (int i=0; i<segCount; i++) {
497
                startCount[i] = buffer.get();
498
            }
499

500
            for (int i=0; i<segCount; i++) {
501
                idDelta[i] = (short)buffer.get();
502
            }
503

504
            for (int i=0; i<segCount; i++) {
505
                char ctmp = buffer.get();
506
                idRangeOffset[i] = (char)((ctmp>>1)&0xffff);
507
            }
508
            /* Can calculate the number of glyph IDs by subtracting
509
             * "pos" from the length of the cmap
510
             */
511
            int pos = (segCount*8+16)/2;
512
            buffer.position(pos);
513
            int numGlyphIds = (subtableLength/2 - pos);
514
            glyphIds = new char[numGlyphIds];
515
            for (int i=0;i<numGlyphIds;i++) {
516
                glyphIds[i] = buffer.get();
517
            }
518
/*
519
            System.err.println("segcount="+segCount);
520
            System.err.println("entrySelector="+entrySelector);
521
            System.err.println("rangeShift="+rangeShift);
522
            for (int j=0;j<segCount;j++) {
523
              System.err.println("j="+j+ " sc="+(int)(startCount[j]&0xffff)+
524
                                 " ec="+(int)(endCount[j]&0xffff)+
525
                                 " delta="+idDelta[j] +
526
                                 " ro="+(int)idRangeOffset[j]);
527
            }
528

529
            //System.err.println("numglyphs="+glyphIds.length);
530
            for (int i=0;i<numGlyphIds;i++) {
531
                  System.err.println("gid["+i+"]="+(int)glyphIds[i]);
532
            }
533
*/
534
        }
535

536
        char getGlyph(int charCode) {
537

538
            int index = 0;
539
            char glyphCode = 0;
540

541
            int controlGlyph = getControlCodeGlyph(charCode, true);
542
            if (controlGlyph >= 0) {
543
                return (char)controlGlyph;
544
            }
545

546
            /* presence of translation array indicates that this
547
             * cmap is in some other (non-unicode encoding).
548
             * In order to look-up a char->glyph mapping we need to
549
             * translate the unicode code point to the encoding of
550
             * the cmap.
551
             * REMIND: VALID CHARCODES??
552
             */
553
            if (xlat != null) {
554
                charCode = xlat[charCode];
555
            }
556

557
            /*
558
             * Citation from the TrueType (and OpenType) spec:
559
             *   The segments are sorted in order of increasing endCode
560
             *   values, and the segment values are specified in four parallel
561
             *   arrays. You search for the first endCode that is greater than
562
             *   or equal to the character code you want to map. If the
563
             *   corresponding startCode is less than or equal to the
564
             *   character code, then you use the corresponding idDelta and
565
             *   idRangeOffset to map the character code to a glyph index
566
             *   (otherwise, the missingGlyph is returned).
567
             */
568

569
            /*
570
             * CMAP format4 defines several fields for optimized search of
571
             * the segment list (entrySelector, searchRange, rangeShift).
572
             * However, benefits are neglible and some fonts have incorrect
573
             * data - so we use straightforward binary search (see bug 6247425)
574
             */
575
            int left = 0, right = startCount.length;
576
            index = startCount.length >> 1;
577
            while (left < right) {
578
                if (endCount[index] < charCode) {
579
                    left = index + 1;
580
                } else {
581
                    right = index;
582
                }
583
                index = (left + right) >> 1;
584
            }
585

586
            if (charCode >= startCount[index] && charCode <= endCount[index]) {
587
                int rangeOffset = idRangeOffset[index];
588

589
                if (rangeOffset == 0) {
590
                    glyphCode = (char)(charCode + idDelta[index]);
591
                } else {
592
                    /* Calculate an index into the glyphIds array */
593

594
/*
595
                    System.err.println("rangeoffset="+rangeOffset+
596
                                       " charCode=" + charCode +
597
                                       " scnt["+index+"]="+(int)startCount[index] +
598
                                       " segCnt="+segCount);
599
*/
600

601
                    int glyphIDIndex = rangeOffset - segCount + index
602
                                         + (charCode - startCount[index]);
603
                    glyphCode = glyphIds[glyphIDIndex];
604
                    if (glyphCode != 0) {
605
                        glyphCode = (char)(glyphCode + idDelta[index]);
606
                    }
607
                }
608
            }
609
            if (glyphCode != 0) {
610
            //System.err.println("cc="+Integer.toHexString((int)charCode) + " gc="+(int)glyphCode);
611
            }
612
            return glyphCode;
613
        }
614
    }
615

616
    // Format 0: Byte Encoding table
617
    static class CMapFormat0 extends CMap {
618
        byte [] cmap;
619

620
        CMapFormat0(ByteBuffer buffer, int offset) {
621

622
            /* skip 6 bytes of format, length, and version */
623
            int len = buffer.getChar(offset+2);
624
            cmap = new byte[len-6];
625
            buffer.position(offset+6);
626
            buffer.get(cmap);
627
        }
628

629
        char getGlyph(int charCode) {
630
            if (charCode < 256) {
631
                if (charCode < 0x0010) {
632
                    switch (charCode) {
633
                    case 0x0009:
634
                    case 0x000a:
635
                    case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID;
636
                    }
637
                }
638
                return (char)(0xff & cmap[charCode]);
639
            } else {
640
                return 0;
641
            }
642
        }
643
    }
644

645
//     static CMap createSymbolCMap(ByteBuffer buffer, int offset, char[] syms) {
646

647
//      CMap cmap = createCMap(buffer, offset, null);
648
//      if (cmap == null) {
649
//          return null;
650
//      } else {
651
//          return new CMapFormatSymbol(cmap, syms);
652
//      }
653
//     }
654

655
//     static class CMapFormatSymbol extends CMap {
656

657
//      CMap cmap;
658
//      static final int NUM_BUCKETS = 128;
659
//      Bucket[] buckets = new Bucket[NUM_BUCKETS];
660

661
//      class Bucket {
662
//          char unicode;
663
//          char glyph;
664
//          Bucket next;
665

666
//          Bucket(char u, char g) {
667
//              unicode = u;
668
//              glyph = g;
669
//          }
670
//      }
671

672
//      CMapFormatSymbol(CMap cmap, char[] syms) {
673

674
//          this.cmap = cmap;
675

676
//          for (int i=0;i<syms.length;i++) {
677
//              char unicode = syms[i];
678
//              if (unicode != noSuchChar) {
679
//                  char glyph = cmap.getGlyph(i + 0xf000);
680
//                  int hash = unicode % NUM_BUCKETS;
681
//                  Bucket bucket = new Bucket(unicode, glyph);
682
//                  if (buckets[hash] == null) {
683
//                      buckets[hash] = bucket;
684
//                  } else {
685
//                      Bucket b = buckets[hash];
686
//                      while (b.next != null) {
687
//                          b = b.next;
688
//                      }
689
//                      b.next = bucket;
690
//                  }
691
//              }
692
//          }
693
//      }
694

695
//      char getGlyph(int unicode) {
696
//          if (unicode >= 0x1000) {
697
//              return 0;
698
//          }
699
//          else if (unicode >=0xf000 && unicode < 0xf100) {
700
//              return cmap.getGlyph(unicode);
701
//          } else {
702
//              Bucket b = buckets[unicode % NUM_BUCKETS];
703
//              while (b != null) {
704
//                  if (b.unicode == unicode) {
705
//                      return b.glyph;
706
//                  } else {
707
//                      b = b.next;
708
//                  }
709
//              }
710
//              return 0;
711
//          }
712
//      }
713
//     }
714

715
    // Format 2: High-byte mapping through table
716
    static class CMapFormat2 extends CMap {
717

718
        char[] subHeaderKey = new char[256];
719
         /* Store subheaders in individual arrays
720
          * A SubHeader entry theortically looks like {
721
          *   char firstCode;
722
          *   char entryCount;
723
          *   short idDelta;
724
          *   char idRangeOffset;
725
          * }
726
          */
727
        char[] firstCodeArray;
728
        char[] entryCountArray;
729
        short[] idDeltaArray;
730
        char[] idRangeOffSetArray;
731

732
        char[] glyphIndexArray;
733

734
        CMapFormat2(ByteBuffer buffer, int offset, char[] xlat) {
735

736
            this.xlat = xlat;
737

738
            int tableLen = buffer.getChar(offset+2);
739
            buffer.position(offset+6);
740
            CharBuffer cBuffer = buffer.asCharBuffer();
741
            char maxSubHeader = 0;
742
            for (int i=0;i<256;i++) {
743
                subHeaderKey[i] = cBuffer.get();
744
                if (subHeaderKey[i] > maxSubHeader) {
745
                    maxSubHeader = subHeaderKey[i];
746
                }
747
            }
748
            /* The value of the subHeaderKey is 8 * the subHeader index,
749
             * so the number of subHeaders can be obtained by dividing
750
             * this value bv 8 and adding 1.
751
             */
752
            int numSubHeaders = (maxSubHeader >> 3) +1;
753
            firstCodeArray = new char[numSubHeaders];
754
            entryCountArray = new char[numSubHeaders];
755
            idDeltaArray  = new short[numSubHeaders];
756
            idRangeOffSetArray  = new char[numSubHeaders];
757
            for (int i=0; i<numSubHeaders; i++) {
758
                firstCodeArray[i] = cBuffer.get();
759
                entryCountArray[i] = cBuffer.get();
760
                idDeltaArray[i] = (short)cBuffer.get();
761
                idRangeOffSetArray[i] = cBuffer.get();
762
//              System.out.println("sh["+i+"]:fc="+(int)firstCodeArray[i]+
763
//                                 " ec="+(int)entryCountArray[i]+
764
//                                 " delta="+(int)idDeltaArray[i]+
765
//                                 " offset="+(int)idRangeOffSetArray[i]);
766
            }
767

768
            int glyphIndexArrSize = (tableLen-518-numSubHeaders*8)/2;
769
            glyphIndexArray = new char[glyphIndexArrSize];
770
            for (int i=0; i<glyphIndexArrSize;i++) {
771
                glyphIndexArray[i] = cBuffer.get();
772
            }
773
        }
774

775
        char getGlyph(int charCode) {
776
            int controlGlyph = getControlCodeGlyph(charCode, true);
777
            if (controlGlyph >= 0) {
778
                return (char)controlGlyph;
779
            }
780

781
            if (xlat != null) {
782
                charCode = xlat[charCode];
783
            }
784

785
            char highByte = (char)(charCode >> 8);
786
            char lowByte = (char)(charCode & 0xff);
787
            int key = subHeaderKey[highByte]>>3; // index into subHeaders
788
            char mapMe;
789

790
            if (key != 0) {
791
                mapMe = lowByte;
792
            } else {
793
                mapMe = highByte;
794
                if (mapMe == 0) {
795
                    mapMe = lowByte;
796
                }
797
            }
798

799
//          System.err.println("charCode="+Integer.toHexString(charCode)+
800
//                             " key="+key+ " mapMe="+Integer.toHexString(mapMe));
801
            char firstCode = firstCodeArray[key];
802
            if (mapMe < firstCode) {
803
                return 0;
804
            } else {
805
                mapMe -= firstCode;
806
            }
807

808
            if (mapMe < entryCountArray[key]) {
809
                /* "address" arithmetic is needed to calculate the offset
810
                 * into glyphIndexArray. "idRangeOffSetArray[key]" specifies
811
                 * the number of bytes from that location in the table where
812
                 * the subarray of glyphIndexes starting at "firstCode" begins.
813
                 * Each entry in the subHeader table is 8 bytes, and the
814
                 * idRangeOffSetArray field is at offset 6 in the entry.
815
                 * The glyphIndexArray immediately follows the subHeaders.
816
                 * So if there are "N" entries then the number of bytes to the
817
                 * start of glyphIndexArray is (N-key)*8-6.
818
                 * Subtract this from the idRangeOffSetArray value to get
819
                 * the number of bytes into glyphIndexArray and divide by 2 to
820
                 * get the (char) array index.
821
                 */
822
                int glyphArrayOffset = ((idRangeOffSetArray.length-key)*8)-6;
823
                int glyphSubArrayStart =
824
                        (idRangeOffSetArray[key] - glyphArrayOffset)/2;
825
                char glyphCode = glyphIndexArray[glyphSubArrayStart+mapMe];
826
                if (glyphCode != 0) {
827
                    glyphCode += idDeltaArray[key]; //idDelta
828
                    return glyphCode;
829
                }
830
            }
831
            return 0;
832
        }
833
    }
834

835
    // Format 6: Trimmed table mapping
836
    static class CMapFormat6 extends CMap {
837

838
        char firstCode;
839
        char entryCount;
840
        char[] glyphIdArray;
841

842
        CMapFormat6(ByteBuffer bbuffer, int offset, char[] xlat) {
843

844
             bbuffer.position(offset+6);
845
             CharBuffer buffer = bbuffer.asCharBuffer();
846
             firstCode = buffer.get();
847
             entryCount = buffer.get();
848
             glyphIdArray = new char[entryCount];
849
             for (int i=0; i< entryCount; i++) {
850
                 glyphIdArray[i] = buffer.get();
851
             }
852
         }
853

854
         char getGlyph(int charCode) {
855
            int controlGlyph = getControlCodeGlyph(charCode, true);
856
            if (controlGlyph >= 0) {
857
                return (char)controlGlyph;
858
            }
859

860
             if (xlat != null) {
861
                 charCode = xlat[charCode];
862
             }
863

864
             charCode -= firstCode;
865
             if (charCode < 0 || charCode >= entryCount) {
866
                  return 0;
867
             } else {
868
                  return glyphIdArray[charCode];
869
             }
870
         }
871
    }
872

873
    // Format 8: mixed 16-bit and 32-bit coverage
874
    // Seems unlikely this code will ever get tested as we look for
875
    // MS platform Cmaps and MS states (in the Opentype spec on their website)
876
    // that MS doesn't support this format
877
    static class CMapFormat8 extends CMap {
878
         byte[] is32 = new byte[8192];
879
         int nGroups;
880
         int[] startCharCode;
881
         int[] endCharCode;
882
         int[] startGlyphID;
883

884
         CMapFormat8(ByteBuffer bbuffer, int offset, char[] xlat) {
885

886
             bbuffer.position(12);
887
             bbuffer.get(is32);
888
             nGroups = bbuffer.getInt() & INTMASK;
889
             // A map group record is three uint32's making for 12 bytes total
890
             if (bbuffer.remaining() < (12 * (long)nGroups)) {
891
                 throw new RuntimeException("Format 8 table exceeded");
892
             }
893
             startCharCode = new int[nGroups];
894
             endCharCode   = new int[nGroups];
895
             startGlyphID  = new int[nGroups];
896
         }
897

898
        char getGlyph(int charCode) {
899
            if (xlat != null) {
900
                throw new RuntimeException("xlat array for cmap fmt=8");
901
            }
902
            return 0;
903
        }
904

905
    }
906

907

908
    // Format 4-byte 10: Trimmed table mapping
909
    // Seems unlikely this code will ever get tested as we look for
910
    // MS platform Cmaps and MS states (in the Opentype spec on their website)
911
    // that MS doesn't support this format
912
    static class CMapFormat10 extends CMap {
913

914
         long firstCode;
915
         int entryCount;
916
         char[] glyphIdArray;
917

918
         CMapFormat10(ByteBuffer bbuffer, int offset, char[] xlat) {
919

920
             bbuffer.position(offset+12);
921
             firstCode = bbuffer.getInt() & INTMASK;
922
             entryCount = bbuffer.getInt() & INTMASK;
923
             // each glyph is a uint16, so 2 bytes per value.
924
             if (bbuffer.remaining() < (2 * (long)entryCount)) {
925
                 throw new RuntimeException("Format 10 table exceeded");
926
             }
927
             CharBuffer buffer = bbuffer.asCharBuffer();
928
             glyphIdArray = new char[entryCount];
929
             for (int i=0; i< entryCount; i++) {
930
                 glyphIdArray[i] = buffer.get();
931
             }
932
         }
933

934
         char getGlyph(int charCode) {
935

936
             if (xlat != null) {
937
                 throw new RuntimeException("xlat array for cmap fmt=10");
938
             }
939

940
             int code = (int)(charCode - firstCode);
941
             if (code < 0 || code >= entryCount) {
942
                 return 0;
943
             } else {
944
                 return glyphIdArray[code];
945
             }
946
         }
947
    }
948

949
    // Format 12: Segmented coverage for UCS-4 (fonts supporting
950
    // surrogate pairs)
951
    static class CMapFormat12 extends CMap {
952

953
        int numGroups;
954
        int highBit =0;
955
        int power;
956
        int extra;
957
        long[] startCharCode;
958
        long[] endCharCode;
959
        int[] startGlyphID;
960

961
        CMapFormat12(ByteBuffer buffer, int offset, char[] xlat) {
962
            if (xlat != null) {
963
                throw new RuntimeException("xlat array for cmap fmt=12");
964
            }
965

966
            buffer.position(offset+12);
967
            numGroups = buffer.getInt() & INTMASK;
968
            // A map group record is three uint32's making for 12 bytes total
969
            if (buffer.remaining() < (12 * (long)numGroups)) {
970
                throw new RuntimeException("Format 12 table exceeded");
971
            }
972
            startCharCode = new long[numGroups];
973
            endCharCode = new long[numGroups];
974
            startGlyphID = new int[numGroups];
975
            buffer = buffer.slice();
976
            IntBuffer ibuffer = buffer.asIntBuffer();
977
            for (int i=0; i<numGroups; i++) {
978
                startCharCode[i] = ibuffer.get() & INTMASK;
979
                endCharCode[i] = ibuffer.get() & INTMASK;
980
                startGlyphID[i] = ibuffer.get() & INTMASK;
981
            }
982

983
            /* Finds the high bit by binary searching through the bits */
984
            int value = numGroups;
985

986
            if (value >= 1 << 16) {
987
                value >>= 16;
988
                highBit += 16;
989
            }
990

991
            if (value >= 1 << 8) {
992
                value >>= 8;
993
                highBit += 8;
994
            }
995

996
            if (value >= 1 << 4) {
997
                value >>= 4;
998
                highBit += 4;
999
            }
1000

1001
            if (value >= 1 << 2) {
1002
                value >>= 2;
1003
                highBit += 2;
1004
            }
1005

1006
            if (value >= 1 << 1) {
1007
                value >>= 1;
1008
                highBit += 1;
1009
            }
1010

1011
            power = 1 << highBit;
1012
            extra = numGroups - power;
1013
        }
1014

1015
        char getGlyph(int charCode) {
1016
            int controlGlyph = getControlCodeGlyph(charCode, false);
1017
            if (controlGlyph >= 0) {
1018
                return (char)controlGlyph;
1019
            }
1020
            int probe = power;
1021
            int range = 0;
1022

1023
            if (startCharCode[extra] <= charCode) {
1024
                range = extra;
1025
            }
1026

1027
            while (probe > 1) {
1028
                probe >>= 1;
1029

1030
                if (startCharCode[range+probe] <= charCode) {
1031
                    range += probe;
1032
                }
1033
            }
1034

1035
            if (startCharCode[range] <= charCode &&
1036
                  endCharCode[range] >= charCode) {
1037
                return (char)
1038
                    (startGlyphID[range] + (charCode - startCharCode[range]));
1039
            }
1040

1041
            return 0;
1042
        }
1043

1044
    }
1045

1046
    /* Used to substitute for bad Cmaps. */
1047
    static class NullCMapClass extends CMap {
1048

1049
        char getGlyph(int charCode) {
1050
            return 0;
1051
        }
1052
    }
1053

1054
    public static final NullCMapClass theNullCmap = new NullCMapClass();
1055

1056
    final int getControlCodeGlyph(int charCode, boolean noSurrogates) {
1057
        if (charCode < 0x0010) {
1058
            switch (charCode) {
1059
            case 0x0009:
1060
            case 0x000a:
1061
            case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID;
1062
            }
1063
        } else if (charCode >= 0x200c) {
1064
            if ((charCode <= 0x200f) ||
1065
                (charCode >= 0x2028 && charCode <= 0x202e) ||
1066
                (charCode >= 0x206a && charCode <= 0x206f)) {
1067
                return CharToGlyphMapper.INVISIBLE_GLYPH_ID;
1068
            } else if (noSurrogates && charCode >= 0xFFFF) {
1069
                return 0;
1070
            }
1071
        }
1072
        return -1;
1073
    }
1074
}
1075

1076
Product

Resources

Company