Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/classes/sun/font/CMap.java
38829 views
1
/*
2
* Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
26
package sun.font;
27
28
import java.nio.ByteBuffer;
29
import java.nio.CharBuffer;
30
import java.nio.IntBuffer;
31
import java.util.Locale;
32
import java.nio.charset.*;
33
34
/*
35
* A tt font has a CMAP table which is in turn made up of sub-tables which
36
* describe the char to glyph mapping in (possibly) multiple ways.
37
* CMAP subtables are described by 3 values.
38
* 1. Platform ID (eg 3=Microsoft, which is the id we look for in JDK)
39
* 2. Encoding (eg 0=symbol, 1=unicode)
40
* 3. TrueType subtable format (how the char->glyph mapping for the encoding
41
* is stored in the subtable). See the TrueType spec. Format 4 is required
42
* by MS in fonts for windows. Its uses segmented mapping to delta values.
43
* Most typically we see are (3,1,4) :
44
* CMAP Platform ID=3 is what we use.
45
* Encodings that are used in practice by JDK on Solaris are
46
* symbol (3,0)
47
* unicode (3,1)
48
* GBK (3,5) (note that solaris zh fonts report 3,4 but are really 3,5)
49
* The format for almost all subtables is 4. However the solaris (3,5)
50
* encodings are typically in format 2.
51
*/
52
abstract class CMap {
53
54
// static char WingDings_b2c[] = {
55
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
56
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
57
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
58
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
59
// 0xfffd, 0xfffd, 0x2702, 0x2701, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
60
// 0xfffd, 0x2706, 0x2709, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
61
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
62
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2707, 0x270d,
63
// 0xfffd, 0x270c, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
64
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
65
// 0xfffd, 0x2708, 0xfffd, 0xfffd, 0x2744, 0xfffd, 0x271e, 0xfffd,
66
// 0x2720, 0x2721, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
67
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
68
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
69
// 0xfffd, 0x2751, 0x2752, 0xfffd, 0xfffd, 0x2756, 0xfffd, 0xfffd,
70
// 0xfffd, 0xfffd, 0xfffd, 0x2740, 0x273f, 0x275d, 0x275e, 0xfffd,
71
// 0xfffd, 0x2780, 0x2781, 0x2782, 0x2783, 0x2784, 0x2785, 0x2786,
72
// 0x2787, 0x2788, 0x2789, 0xfffd, 0x278a, 0x278b, 0x278c, 0x278d,
73
// 0x278e, 0x278f, 0x2790, 0x2791, 0x2792, 0x2793, 0xfffd, 0xfffd,
74
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
75
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x274d, 0xfffd,
76
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2736, 0x2734, 0xfffd, 0x2735,
77
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x272a, 0x2730, 0xfffd,
78
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
79
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x27a5, 0xfffd, 0x27a6, 0xfffd,
80
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
81
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
82
// 0x27a2, 0xfffd, 0xfffd, 0xfffd, 0x27b3, 0xfffd, 0xfffd, 0xfffd,
83
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
84
// 0x27a1, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
85
// 0x27a9, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
86
// 0xfffd, 0xfffd, 0xfffd, 0x2717, 0x2713, 0xfffd, 0xfffd, 0xfffd,
87
// };
88
89
// static char Symbols_b2c[] = {
90
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
91
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
92
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
93
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
94
// 0xfffd, 0xfffd, 0x2200, 0xfffd, 0x2203, 0xfffd, 0xfffd, 0x220d,
95
// 0xfffd, 0xfffd, 0x2217, 0xfffd, 0xfffd, 0x2212, 0xfffd, 0xfffd,
96
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
97
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
98
// 0x2245, 0x0391, 0x0392, 0x03a7, 0x0394, 0x0395, 0x03a6, 0x0393,
99
// 0x0397, 0x0399, 0x03d1, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f,
100
// 0x03a0, 0x0398, 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03c2, 0x03a9,
101
// 0x039e, 0x03a8, 0x0396, 0xfffd, 0x2234, 0xfffd, 0x22a5, 0xfffd,
102
// 0xfffd, 0x03b1, 0x03b2, 0x03c7, 0x03b4, 0x03b5, 0x03c6, 0x03b3,
103
// 0x03b7, 0x03b9, 0x03d5, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03bf,
104
// 0x03c0, 0x03b8, 0x03c1, 0x03c3, 0x03c4, 0x03c5, 0x03d6, 0x03c9,
105
// 0x03be, 0x03c8, 0x03b6, 0xfffd, 0xfffd, 0xfffd, 0x223c, 0xfffd,
106
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
107
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
108
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
109
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
110
// 0xfffd, 0x03d2, 0xfffd, 0x2264, 0x2215, 0x221e, 0xfffd, 0xfffd,
111
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
112
// 0x2218, 0xfffd, 0xfffd, 0x2265, 0xfffd, 0x221d, 0xfffd, 0x2219,
113
// 0xfffd, 0x2260, 0x2261, 0x2248, 0x22ef, 0x2223, 0xfffd, 0xfffd,
114
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2297, 0x2295, 0x2205, 0x2229,
115
// 0x222a, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209,
116
// 0xfffd, 0x2207, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x221a, 0x22c5,
117
// 0xfffd, 0x2227, 0x2228, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
118
// 0x22c4, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2211, 0xfffd, 0xfffd,
119
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
120
// 0xfffd, 0xfffd, 0x222b, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
121
// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
122
// };
123
124
static final short ShiftJISEncoding = 2;
125
static final short GBKEncoding = 3;
126
static final short Big5Encoding = 4;
127
static final short WansungEncoding = 5;
128
static final short JohabEncoding = 6;
129
static final short MSUnicodeSurrogateEncoding = 10;
130
131
static final char noSuchChar = (char)0xfffd;
132
static final int SHORTMASK = 0x0000ffff;
133
static final int INTMASK = 0x7fffffff;
134
135
static final char[][] converterMaps = new char[7][];
136
137
/*
138
* Unicode->other encoding translation array. A pre-computed look up
139
* which can be shared across all fonts using that encoding.
140
* Using this saves running character coverters repeatedly.
141
*/
142
char[] xlat;
143
144
static CMap initialize(TrueTypeFont font) {
145
146
CMap cmap = null;
147
148
int offset, platformID, encodingID=-1;
149
150
int three0=0, three1=0, three2=0, three3=0, three4=0, three5=0,
151
three6=0, three10=0;
152
boolean threeStar = false;
153
154
ByteBuffer cmapBuffer = font.getTableBuffer(TrueTypeFont.cmapTag);
155
int cmapTableOffset = font.getTableSize(TrueTypeFont.cmapTag);
156
short numberSubTables = cmapBuffer.getShort(2);
157
158
/* locate the offsets of all 3,* (ie Microsoft platform) encodings */
159
for (int i=0; i<numberSubTables; i++) {
160
cmapBuffer.position(i * 8 + 4);
161
platformID = cmapBuffer.getShort();
162
if (platformID == 3) {
163
threeStar = true;
164
encodingID = cmapBuffer.getShort();
165
offset = cmapBuffer.getInt();
166
switch (encodingID) {
167
case 0: three0 = offset; break; // MS Symbol encoding
168
case 1: three1 = offset; break; // MS Unicode cmap
169
case 2: three2 = offset; break; // ShiftJIS cmap.
170
case 3: three3 = offset; break; // GBK cmap
171
case 4: three4 = offset; break; // Big 5 cmap
172
case 5: three5 = offset; break; // Wansung
173
case 6: three6 = offset; break; // Johab
174
case 10: three10 = offset; break; // MS Unicode surrogates
175
}
176
}
177
}
178
179
/* This defines the preference order for cmap subtables */
180
if (threeStar) {
181
if (three10 != 0) {
182
cmap = createCMap(cmapBuffer, three10, null);
183
}
184
else if (three0 != 0) {
185
/* The special case treatment of these fonts leads to
186
* anomalies where a user can view "wingdings" and "wingdings2"
187
* and the latter shows all its code points in the unicode
188
* private use area at 0xF000->0XF0FF and the former shows
189
* a scattered subset of its glyphs that are known mappings to
190
* unicode code points.
191
* The primary purpose of these mappings was to facilitate
192
* display of symbol chars etc in composite fonts, however
193
* this is not needed as all these code points are covered
194
* by Lucida Sans Regular.
195
* Commenting this out reduces the role of these two files
196
* (assuming that they continue to be used in font.properties)
197
* to just one of contributing to the overall composite
198
* font metrics, and also AWT can still access the fonts.
199
* Clients which explicitly accessed these fonts as names
200
* "Symbol" and "Wingdings" (ie as physical fonts) and
201
* expected to see a scattering of these characters will
202
* see them now as missing. How much of a problem is this?
203
* Perhaps we could still support this mapping just for
204
* "Symbol.ttf" but I suspect some users would prefer it
205
* to be mapped in to the Latin range as that is how
206
* the "symbol" font is used in native apps.
207
*/
208
// String name = font.platName.toLowerCase(Locale.ENGLISH);
209
// if (name.endsWith("symbol.ttf")) {
210
// cmap = createSymbolCMap(cmapBuffer, three0, Symbols_b2c);
211
// } else if (name.endsWith("wingding.ttf")) {
212
// cmap = createSymbolCMap(cmapBuffer, three0, WingDings_b2c);
213
// } else {
214
cmap = createCMap(cmapBuffer, three0, null);
215
// }
216
}
217
else if (three1 != 0) {
218
cmap = createCMap(cmapBuffer, three1, null);
219
}
220
else if (three2 != 0) {
221
cmap = createCMap(cmapBuffer, three2,
222
getConverterMap(ShiftJISEncoding));
223
}
224
else if (three3 != 0) {
225
cmap = createCMap(cmapBuffer, three3,
226
getConverterMap(GBKEncoding));
227
}
228
else if (three4 != 0) {
229
/* GB2312 TrueType fonts on Solaris have wrong encoding ID for
230
* cmap table, these fonts have EncodingID 4 which is Big5
231
* encoding according the TrueType spec, but actually the
232
* fonts are using gb2312 encoding, have to use this
233
* workaround to make Solaris zh_CN locale work. -sherman
234
*/
235
if (FontUtilities.isSolaris && font.platName != null &&
236
(font.platName.startsWith(
237
"/usr/openwin/lib/locale/zh_CN.EUC/X11/fonts/TrueType") ||
238
font.platName.startsWith(
239
"/usr/openwin/lib/locale/zh_CN/X11/fonts/TrueType") ||
240
font.platName.startsWith(
241
"/usr/openwin/lib/locale/zh/X11/fonts/TrueType"))) {
242
cmap = createCMap(cmapBuffer, three4,
243
getConverterMap(GBKEncoding));
244
}
245
else {
246
cmap = createCMap(cmapBuffer, three4,
247
getConverterMap(Big5Encoding));
248
}
249
}
250
else if (three5 != 0) {
251
cmap = createCMap(cmapBuffer, three5,
252
getConverterMap(WansungEncoding));
253
}
254
else if (three6 != 0) {
255
cmap = createCMap(cmapBuffer, three6,
256
getConverterMap(JohabEncoding));
257
}
258
} else {
259
/* No 3,* subtable was found. Just use whatever is the first
260
* table listed. Not very useful but maybe better than
261
* rejecting the font entirely?
262
*/
263
cmap = createCMap(cmapBuffer, cmapBuffer.getInt(8), null);
264
}
265
return cmap;
266
}
267
268
/* speed up the converting by setting the range for double
269
* byte characters;
270
*/
271
static char[] getConverter(short encodingID) {
272
int dBegin = 0x8000;
273
int dEnd = 0xffff;
274
String encoding;
275
276
switch (encodingID) {
277
case ShiftJISEncoding:
278
dBegin = 0x8140;
279
dEnd = 0xfcfc;
280
encoding = "SJIS";
281
break;
282
case GBKEncoding:
283
dBegin = 0x8140;
284
dEnd = 0xfea0;
285
encoding = "GBK";
286
break;
287
case Big5Encoding:
288
dBegin = 0xa140;
289
dEnd = 0xfefe;
290
encoding = "Big5";
291
break;
292
case WansungEncoding:
293
dBegin = 0xa1a1;
294
dEnd = 0xfede;
295
encoding = "EUC_KR";
296
break;
297
case JohabEncoding:
298
dBegin = 0x8141;
299
dEnd = 0xfdfe;
300
encoding = "Johab";
301
break;
302
default:
303
return null;
304
}
305
306
try {
307
char[] convertedChars = new char[65536];
308
for (int i=0; i<65536; i++) {
309
convertedChars[i] = noSuchChar;
310
}
311
312
byte[] inputBytes = new byte[(dEnd-dBegin+1)*2];
313
char[] outputChars = new char[(dEnd-dBegin+1)];
314
315
int j = 0;
316
int firstByte;
317
if (encodingID == ShiftJISEncoding) {
318
for (int i = dBegin; i <= dEnd; i++) {
319
firstByte = (i >> 8 & 0xff);
320
if (firstByte >= 0xa1 && firstByte <= 0xdf) {
321
//sjis halfwidth katakana
322
inputBytes[j++] = (byte)0xff;
323
inputBytes[j++] = (byte)0xff;
324
} else {
325
inputBytes[j++] = (byte)firstByte;
326
inputBytes[j++] = (byte)(i & 0xff);
327
}
328
}
329
} else {
330
for (int i = dBegin; i <= dEnd; i++) {
331
inputBytes[j++] = (byte)(i>>8 & 0xff);
332
inputBytes[j++] = (byte)(i & 0xff);
333
}
334
}
335
336
Charset.forName(encoding).newDecoder()
337
.onMalformedInput(CodingErrorAction.REPLACE)
338
.onUnmappableCharacter(CodingErrorAction.REPLACE)
339
.replaceWith("\u0000")
340
.decode(ByteBuffer.wrap(inputBytes, 0, inputBytes.length),
341
CharBuffer.wrap(outputChars, 0, outputChars.length),
342
true);
343
344
// ensure single byte ascii
345
for (int i = 0x20; i <= 0x7e; i++) {
346
convertedChars[i] = (char)i;
347
}
348
349
//sjis halfwidth katakana
350
if (encodingID == ShiftJISEncoding) {
351
for (int i = 0xa1; i <= 0xdf; i++) {
352
convertedChars[i] = (char)(i - 0xa1 + 0xff61);
353
}
354
}
355
356
/* It would save heap space (approx 60Kbytes for each of these
357
* converters) if stored only valid ranges (ie returned
358
* outputChars directly. But this is tricky since want to
359
* include the ASCII range too.
360
*/
361
// System.err.println("oc.len="+outputChars.length);
362
// System.err.println("cc.len="+convertedChars.length);
363
// System.err.println("dbegin="+dBegin);
364
System.arraycopy(outputChars, 0, convertedChars, dBegin,
365
outputChars.length);
366
367
//return convertedChars;
368
/* invert this map as now want it to map from Unicode
369
* to other encoding.
370
*/
371
char [] invertedChars = new char[65536];
372
for (int i=0;i<65536;i++) {
373
if (convertedChars[i] != noSuchChar) {
374
invertedChars[convertedChars[i]] = (char)i;
375
}
376
}
377
return invertedChars;
378
379
} catch (Exception e) {
380
e.printStackTrace();
381
}
382
return null;
383
}
384
385
/*
386
* The returned array maps to unicode from some other 2 byte encoding
387
* eg for a 2byte index which represents a SJIS char, the indexed
388
* value is the corresponding unicode char.
389
*/
390
static char[] getConverterMap(short encodingID) {
391
if (converterMaps[encodingID] == null) {
392
converterMaps[encodingID] = getConverter(encodingID);
393
}
394
return converterMaps[encodingID];
395
}
396
397
398
static CMap createCMap(ByteBuffer buffer, int offset, char[] xlat) {
399
/* First do a sanity check that this cmap subtable is contained
400
* within the cmap table.
401
*/
402
int subtableFormat = buffer.getChar(offset);
403
long subtableLength;
404
if (subtableFormat < 8) {
405
subtableLength = buffer.getChar(offset+2);
406
} else {
407
subtableLength = buffer.getInt(offset+4) & INTMASK;
408
}
409
if (offset+subtableLength > buffer.capacity()) {
410
if (FontUtilities.isLogging()) {
411
FontUtilities.getLogger().warning("Cmap subtable overflows buffer.");
412
}
413
}
414
switch (subtableFormat) {
415
case 0: return new CMapFormat0(buffer, offset);
416
case 2: return new CMapFormat2(buffer, offset, xlat);
417
case 4: return new CMapFormat4(buffer, offset, xlat);
418
case 6: return new CMapFormat6(buffer, offset, xlat);
419
case 8: return new CMapFormat8(buffer, offset, xlat);
420
case 10: return new CMapFormat10(buffer, offset, xlat);
421
case 12: return new CMapFormat12(buffer, offset, xlat);
422
default: throw new RuntimeException("Cmap format unimplemented: " +
423
(int)buffer.getChar(offset));
424
}
425
}
426
427
/*
428
final char charVal(byte[] cmap, int index) {
429
return (char)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1]));
430
}
431
432
final short shortVal(byte[] cmap, int index) {
433
return (short)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1]));
434
}
435
*/
436
abstract char getGlyph(int charCode);
437
438
/* Format 4 Header is
439
* ushort format (off=0)
440
* ushort length (off=2)
441
* ushort language (off=4)
442
* ushort segCountX2 (off=6)
443
* ushort searchRange (off=8)
444
* ushort entrySelector (off=10)
445
* ushort rangeShift (off=12)
446
* ushort endCount[segCount] (off=14)
447
* ushort reservedPad
448
* ushort startCount[segCount]
449
* short idDelta[segCount]
450
* idRangeOFfset[segCount]
451
* ushort glyphIdArray[]
452
*/
453
static class CMapFormat4 extends CMap {
454
int segCount;
455
int entrySelector;
456
int rangeShift;
457
char[] endCount;
458
char[] startCount;
459
short[] idDelta;
460
char[] idRangeOffset;
461
char[] glyphIds;
462
463
CMapFormat4(ByteBuffer bbuffer, int offset, char[] xlat) {
464
465
this.xlat = xlat;
466
467
bbuffer.position(offset);
468
CharBuffer buffer = bbuffer.asCharBuffer();
469
buffer.get(); // skip, we already know format=4
470
int subtableLength = buffer.get();
471
/* Try to recover from some bad fonts which specify a subtable
472
* length that would overflow the byte buffer holding the whole
473
* cmap table. If this isn't a recoverable situation an exception
474
* may be thrown which is caught higher up the call stack.
475
* Whilst this may seem lenient, in practice, unless the "bad"
476
* subtable we are using is the last one in the cmap table we
477
* would have no way of knowing about this problem anyway.
478
*/
479
if (offset+subtableLength > bbuffer.capacity()) {
480
subtableLength = bbuffer.capacity() - offset;
481
}
482
buffer.get(); // skip language
483
segCount = buffer.get()/2;
484
int searchRange = buffer.get();
485
entrySelector = buffer.get();
486
rangeShift = buffer.get()/2;
487
startCount = new char[segCount];
488
endCount = new char[segCount];
489
idDelta = new short[segCount];
490
idRangeOffset = new char[segCount];
491
492
for (int i=0; i<segCount; i++) {
493
endCount[i] = buffer.get();
494
}
495
buffer.get(); // 2 bytes for reserved pad
496
for (int i=0; i<segCount; i++) {
497
startCount[i] = buffer.get();
498
}
499
500
for (int i=0; i<segCount; i++) {
501
idDelta[i] = (short)buffer.get();
502
}
503
504
for (int i=0; i<segCount; i++) {
505
char ctmp = buffer.get();
506
idRangeOffset[i] = (char)((ctmp>>1)&0xffff);
507
}
508
/* Can calculate the number of glyph IDs by subtracting
509
* "pos" from the length of the cmap
510
*/
511
int pos = (segCount*8+16)/2;
512
buffer.position(pos);
513
int numGlyphIds = (subtableLength/2 - pos);
514
glyphIds = new char[numGlyphIds];
515
for (int i=0;i<numGlyphIds;i++) {
516
glyphIds[i] = buffer.get();
517
}
518
/*
519
System.err.println("segcount="+segCount);
520
System.err.println("entrySelector="+entrySelector);
521
System.err.println("rangeShift="+rangeShift);
522
for (int j=0;j<segCount;j++) {
523
System.err.println("j="+j+ " sc="+(int)(startCount[j]&0xffff)+
524
" ec="+(int)(endCount[j]&0xffff)+
525
" delta="+idDelta[j] +
526
" ro="+(int)idRangeOffset[j]);
527
}
528
529
//System.err.println("numglyphs="+glyphIds.length);
530
for (int i=0;i<numGlyphIds;i++) {
531
System.err.println("gid["+i+"]="+(int)glyphIds[i]);
532
}
533
*/
534
}
535
536
char getGlyph(int charCode) {
537
538
int index = 0;
539
char glyphCode = 0;
540
541
int controlGlyph = getControlCodeGlyph(charCode, true);
542
if (controlGlyph >= 0) {
543
return (char)controlGlyph;
544
}
545
546
/* presence of translation array indicates that this
547
* cmap is in some other (non-unicode encoding).
548
* In order to look-up a char->glyph mapping we need to
549
* translate the unicode code point to the encoding of
550
* the cmap.
551
* REMIND: VALID CHARCODES??
552
*/
553
if (xlat != null) {
554
charCode = xlat[charCode];
555
}
556
557
/*
558
* Citation from the TrueType (and OpenType) spec:
559
* The segments are sorted in order of increasing endCode
560
* values, and the segment values are specified in four parallel
561
* arrays. You search for the first endCode that is greater than
562
* or equal to the character code you want to map. If the
563
* corresponding startCode is less than or equal to the
564
* character code, then you use the corresponding idDelta and
565
* idRangeOffset to map the character code to a glyph index
566
* (otherwise, the missingGlyph is returned).
567
*/
568
569
/*
570
* CMAP format4 defines several fields for optimized search of
571
* the segment list (entrySelector, searchRange, rangeShift).
572
* However, benefits are neglible and some fonts have incorrect
573
* data - so we use straightforward binary search (see bug 6247425)
574
*/
575
int left = 0, right = startCount.length;
576
index = startCount.length >> 1;
577
while (left < right) {
578
if (endCount[index] < charCode) {
579
left = index + 1;
580
} else {
581
right = index;
582
}
583
index = (left + right) >> 1;
584
}
585
586
if (charCode >= startCount[index] && charCode <= endCount[index]) {
587
int rangeOffset = idRangeOffset[index];
588
589
if (rangeOffset == 0) {
590
glyphCode = (char)(charCode + idDelta[index]);
591
} else {
592
/* Calculate an index into the glyphIds array */
593
594
/*
595
System.err.println("rangeoffset="+rangeOffset+
596
" charCode=" + charCode +
597
" scnt["+index+"]="+(int)startCount[index] +
598
" segCnt="+segCount);
599
*/
600
601
int glyphIDIndex = rangeOffset - segCount + index
602
+ (charCode - startCount[index]);
603
glyphCode = glyphIds[glyphIDIndex];
604
if (glyphCode != 0) {
605
glyphCode = (char)(glyphCode + idDelta[index]);
606
}
607
}
608
}
609
if (glyphCode != 0) {
610
//System.err.println("cc="+Integer.toHexString((int)charCode) + " gc="+(int)glyphCode);
611
}
612
return glyphCode;
613
}
614
}
615
616
// Format 0: Byte Encoding table
617
static class CMapFormat0 extends CMap {
618
byte [] cmap;
619
620
CMapFormat0(ByteBuffer buffer, int offset) {
621
622
/* skip 6 bytes of format, length, and version */
623
int len = buffer.getChar(offset+2);
624
cmap = new byte[len-6];
625
buffer.position(offset+6);
626
buffer.get(cmap);
627
}
628
629
char getGlyph(int charCode) {
630
if (charCode < 256) {
631
if (charCode < 0x0010) {
632
switch (charCode) {
633
case 0x0009:
634
case 0x000a:
635
case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID;
636
}
637
}
638
return (char)(0xff & cmap[charCode]);
639
} else {
640
return 0;
641
}
642
}
643
}
644
645
// static CMap createSymbolCMap(ByteBuffer buffer, int offset, char[] syms) {
646
647
// CMap cmap = createCMap(buffer, offset, null);
648
// if (cmap == null) {
649
// return null;
650
// } else {
651
// return new CMapFormatSymbol(cmap, syms);
652
// }
653
// }
654
655
// static class CMapFormatSymbol extends CMap {
656
657
// CMap cmap;
658
// static final int NUM_BUCKETS = 128;
659
// Bucket[] buckets = new Bucket[NUM_BUCKETS];
660
661
// class Bucket {
662
// char unicode;
663
// char glyph;
664
// Bucket next;
665
666
// Bucket(char u, char g) {
667
// unicode = u;
668
// glyph = g;
669
// }
670
// }
671
672
// CMapFormatSymbol(CMap cmap, char[] syms) {
673
674
// this.cmap = cmap;
675
676
// for (int i=0;i<syms.length;i++) {
677
// char unicode = syms[i];
678
// if (unicode != noSuchChar) {
679
// char glyph = cmap.getGlyph(i + 0xf000);
680
// int hash = unicode % NUM_BUCKETS;
681
// Bucket bucket = new Bucket(unicode, glyph);
682
// if (buckets[hash] == null) {
683
// buckets[hash] = bucket;
684
// } else {
685
// Bucket b = buckets[hash];
686
// while (b.next != null) {
687
// b = b.next;
688
// }
689
// b.next = bucket;
690
// }
691
// }
692
// }
693
// }
694
695
// char getGlyph(int unicode) {
696
// if (unicode >= 0x1000) {
697
// return 0;
698
// }
699
// else if (unicode >=0xf000 && unicode < 0xf100) {
700
// return cmap.getGlyph(unicode);
701
// } else {
702
// Bucket b = buckets[unicode % NUM_BUCKETS];
703
// while (b != null) {
704
// if (b.unicode == unicode) {
705
// return b.glyph;
706
// } else {
707
// b = b.next;
708
// }
709
// }
710
// return 0;
711
// }
712
// }
713
// }
714
715
// Format 2: High-byte mapping through table
716
static class CMapFormat2 extends CMap {
717
718
char[] subHeaderKey = new char[256];
719
/* Store subheaders in individual arrays
720
* A SubHeader entry theortically looks like {
721
* char firstCode;
722
* char entryCount;
723
* short idDelta;
724
* char idRangeOffset;
725
* }
726
*/
727
char[] firstCodeArray;
728
char[] entryCountArray;
729
short[] idDeltaArray;
730
char[] idRangeOffSetArray;
731
732
char[] glyphIndexArray;
733
734
CMapFormat2(ByteBuffer buffer, int offset, char[] xlat) {
735
736
this.xlat = xlat;
737
738
int tableLen = buffer.getChar(offset+2);
739
buffer.position(offset+6);
740
CharBuffer cBuffer = buffer.asCharBuffer();
741
char maxSubHeader = 0;
742
for (int i=0;i<256;i++) {
743
subHeaderKey[i] = cBuffer.get();
744
if (subHeaderKey[i] > maxSubHeader) {
745
maxSubHeader = subHeaderKey[i];
746
}
747
}
748
/* The value of the subHeaderKey is 8 * the subHeader index,
749
* so the number of subHeaders can be obtained by dividing
750
* this value bv 8 and adding 1.
751
*/
752
int numSubHeaders = (maxSubHeader >> 3) +1;
753
firstCodeArray = new char[numSubHeaders];
754
entryCountArray = new char[numSubHeaders];
755
idDeltaArray = new short[numSubHeaders];
756
idRangeOffSetArray = new char[numSubHeaders];
757
for (int i=0; i<numSubHeaders; i++) {
758
firstCodeArray[i] = cBuffer.get();
759
entryCountArray[i] = cBuffer.get();
760
idDeltaArray[i] = (short)cBuffer.get();
761
idRangeOffSetArray[i] = cBuffer.get();
762
// System.out.println("sh["+i+"]:fc="+(int)firstCodeArray[i]+
763
// " ec="+(int)entryCountArray[i]+
764
// " delta="+(int)idDeltaArray[i]+
765
// " offset="+(int)idRangeOffSetArray[i]);
766
}
767
768
int glyphIndexArrSize = (tableLen-518-numSubHeaders*8)/2;
769
glyphIndexArray = new char[glyphIndexArrSize];
770
for (int i=0; i<glyphIndexArrSize;i++) {
771
glyphIndexArray[i] = cBuffer.get();
772
}
773
}
774
775
char getGlyph(int charCode) {
776
int controlGlyph = getControlCodeGlyph(charCode, true);
777
if (controlGlyph >= 0) {
778
return (char)controlGlyph;
779
}
780
781
if (xlat != null) {
782
charCode = xlat[charCode];
783
}
784
785
char highByte = (char)(charCode >> 8);
786
char lowByte = (char)(charCode & 0xff);
787
int key = subHeaderKey[highByte]>>3; // index into subHeaders
788
char mapMe;
789
790
if (key != 0) {
791
mapMe = lowByte;
792
} else {
793
mapMe = highByte;
794
if (mapMe == 0) {
795
mapMe = lowByte;
796
}
797
}
798
799
// System.err.println("charCode="+Integer.toHexString(charCode)+
800
// " key="+key+ " mapMe="+Integer.toHexString(mapMe));
801
char firstCode = firstCodeArray[key];
802
if (mapMe < firstCode) {
803
return 0;
804
} else {
805
mapMe -= firstCode;
806
}
807
808
if (mapMe < entryCountArray[key]) {
809
/* "address" arithmetic is needed to calculate the offset
810
* into glyphIndexArray. "idRangeOffSetArray[key]" specifies
811
* the number of bytes from that location in the table where
812
* the subarray of glyphIndexes starting at "firstCode" begins.
813
* Each entry in the subHeader table is 8 bytes, and the
814
* idRangeOffSetArray field is at offset 6 in the entry.
815
* The glyphIndexArray immediately follows the subHeaders.
816
* So if there are "N" entries then the number of bytes to the
817
* start of glyphIndexArray is (N-key)*8-6.
818
* Subtract this from the idRangeOffSetArray value to get
819
* the number of bytes into glyphIndexArray and divide by 2 to
820
* get the (char) array index.
821
*/
822
int glyphArrayOffset = ((idRangeOffSetArray.length-key)*8)-6;
823
int glyphSubArrayStart =
824
(idRangeOffSetArray[key] - glyphArrayOffset)/2;
825
char glyphCode = glyphIndexArray[glyphSubArrayStart+mapMe];
826
if (glyphCode != 0) {
827
glyphCode += idDeltaArray[key]; //idDelta
828
return glyphCode;
829
}
830
}
831
return 0;
832
}
833
}
834
835
// Format 6: Trimmed table mapping
836
static class CMapFormat6 extends CMap {
837
838
char firstCode;
839
char entryCount;
840
char[] glyphIdArray;
841
842
CMapFormat6(ByteBuffer bbuffer, int offset, char[] xlat) {
843
844
bbuffer.position(offset+6);
845
CharBuffer buffer = bbuffer.asCharBuffer();
846
firstCode = buffer.get();
847
entryCount = buffer.get();
848
glyphIdArray = new char[entryCount];
849
for (int i=0; i< entryCount; i++) {
850
glyphIdArray[i] = buffer.get();
851
}
852
}
853
854
char getGlyph(int charCode) {
855
int controlGlyph = getControlCodeGlyph(charCode, true);
856
if (controlGlyph >= 0) {
857
return (char)controlGlyph;
858
}
859
860
if (xlat != null) {
861
charCode = xlat[charCode];
862
}
863
864
charCode -= firstCode;
865
if (charCode < 0 || charCode >= entryCount) {
866
return 0;
867
} else {
868
return glyphIdArray[charCode];
869
}
870
}
871
}
872
873
// Format 8: mixed 16-bit and 32-bit coverage
874
// Seems unlikely this code will ever get tested as we look for
875
// MS platform Cmaps and MS states (in the Opentype spec on their website)
876
// that MS doesn't support this format
877
static class CMapFormat8 extends CMap {
878
byte[] is32 = new byte[8192];
879
int nGroups;
880
int[] startCharCode;
881
int[] endCharCode;
882
int[] startGlyphID;
883
884
CMapFormat8(ByteBuffer bbuffer, int offset, char[] xlat) {
885
886
bbuffer.position(12);
887
bbuffer.get(is32);
888
nGroups = bbuffer.getInt() & INTMASK;
889
// A map group record is three uint32's making for 12 bytes total
890
if (bbuffer.remaining() < (12 * (long)nGroups)) {
891
throw new RuntimeException("Format 8 table exceeded");
892
}
893
startCharCode = new int[nGroups];
894
endCharCode = new int[nGroups];
895
startGlyphID = new int[nGroups];
896
}
897
898
char getGlyph(int charCode) {
899
if (xlat != null) {
900
throw new RuntimeException("xlat array for cmap fmt=8");
901
}
902
return 0;
903
}
904
905
}
906
907
908
// Format 4-byte 10: Trimmed table mapping
909
// Seems unlikely this code will ever get tested as we look for
910
// MS platform Cmaps and MS states (in the Opentype spec on their website)
911
// that MS doesn't support this format
912
static class CMapFormat10 extends CMap {
913
914
long firstCode;
915
int entryCount;
916
char[] glyphIdArray;
917
918
CMapFormat10(ByteBuffer bbuffer, int offset, char[] xlat) {
919
920
bbuffer.position(offset+12);
921
firstCode = bbuffer.getInt() & INTMASK;
922
entryCount = bbuffer.getInt() & INTMASK;
923
// each glyph is a uint16, so 2 bytes per value.
924
if (bbuffer.remaining() < (2 * (long)entryCount)) {
925
throw new RuntimeException("Format 10 table exceeded");
926
}
927
CharBuffer buffer = bbuffer.asCharBuffer();
928
glyphIdArray = new char[entryCount];
929
for (int i=0; i< entryCount; i++) {
930
glyphIdArray[i] = buffer.get();
931
}
932
}
933
934
char getGlyph(int charCode) {
935
936
if (xlat != null) {
937
throw new RuntimeException("xlat array for cmap fmt=10");
938
}
939
940
int code = (int)(charCode - firstCode);
941
if (code < 0 || code >= entryCount) {
942
return 0;
943
} else {
944
return glyphIdArray[code];
945
}
946
}
947
}
948
949
// Format 12: Segmented coverage for UCS-4 (fonts supporting
950
// surrogate pairs)
951
static class CMapFormat12 extends CMap {
952
953
int numGroups;
954
int highBit =0;
955
int power;
956
int extra;
957
long[] startCharCode;
958
long[] endCharCode;
959
int[] startGlyphID;
960
961
CMapFormat12(ByteBuffer buffer, int offset, char[] xlat) {
962
if (xlat != null) {
963
throw new RuntimeException("xlat array for cmap fmt=12");
964
}
965
966
buffer.position(offset+12);
967
numGroups = buffer.getInt() & INTMASK;
968
// A map group record is three uint32's making for 12 bytes total
969
if (buffer.remaining() < (12 * (long)numGroups)) {
970
throw new RuntimeException("Format 12 table exceeded");
971
}
972
startCharCode = new long[numGroups];
973
endCharCode = new long[numGroups];
974
startGlyphID = new int[numGroups];
975
buffer = buffer.slice();
976
IntBuffer ibuffer = buffer.asIntBuffer();
977
for (int i=0; i<numGroups; i++) {
978
startCharCode[i] = ibuffer.get() & INTMASK;
979
endCharCode[i] = ibuffer.get() & INTMASK;
980
startGlyphID[i] = ibuffer.get() & INTMASK;
981
}
982
983
/* Finds the high bit by binary searching through the bits */
984
int value = numGroups;
985
986
if (value >= 1 << 16) {
987
value >>= 16;
988
highBit += 16;
989
}
990
991
if (value >= 1 << 8) {
992
value >>= 8;
993
highBit += 8;
994
}
995
996
if (value >= 1 << 4) {
997
value >>= 4;
998
highBit += 4;
999
}
1000
1001
if (value >= 1 << 2) {
1002
value >>= 2;
1003
highBit += 2;
1004
}
1005
1006
if (value >= 1 << 1) {
1007
value >>= 1;
1008
highBit += 1;
1009
}
1010
1011
power = 1 << highBit;
1012
extra = numGroups - power;
1013
}
1014
1015
char getGlyph(int charCode) {
1016
int controlGlyph = getControlCodeGlyph(charCode, false);
1017
if (controlGlyph >= 0) {
1018
return (char)controlGlyph;
1019
}
1020
int probe = power;
1021
int range = 0;
1022
1023
if (startCharCode[extra] <= charCode) {
1024
range = extra;
1025
}
1026
1027
while (probe > 1) {
1028
probe >>= 1;
1029
1030
if (startCharCode[range+probe] <= charCode) {
1031
range += probe;
1032
}
1033
}
1034
1035
if (startCharCode[range] <= charCode &&
1036
endCharCode[range] >= charCode) {
1037
return (char)
1038
(startGlyphID[range] + (charCode - startCharCode[range]));
1039
}
1040
1041
return 0;
1042
}
1043
1044
}
1045
1046
/* Used to substitute for bad Cmaps. */
1047
static class NullCMapClass extends CMap {
1048
1049
char getGlyph(int charCode) {
1050
return 0;
1051
}
1052
}
1053
1054
public static final NullCMapClass theNullCmap = new NullCMapClass();
1055
1056
final int getControlCodeGlyph(int charCode, boolean noSurrogates) {
1057
if (charCode < 0x0010) {
1058
switch (charCode) {
1059
case 0x0009:
1060
case 0x000a:
1061
case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID;
1062
}
1063
} else if (charCode >= 0x200c) {
1064
if ((charCode <= 0x200f) ||
1065
(charCode >= 0x2028 && charCode <= 0x202e) ||
1066
(charCode >= 0x206a && charCode <= 0x206f)) {
1067
return CharToGlyphMapper.INVISIBLE_GLYPH_ID;
1068
} else if (noSurrogates && charCode >= 0xFFFF) {
1069
return 0;
1070
}
1071
}
1072
return -1;
1073
}
1074
}
1075
1076