Path: blob/master/src/java.desktop/share/classes/sun/font/CMap.java
66645 views
/*1* Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425package sun.font;2627import java.nio.ByteBuffer;28import java.nio.CharBuffer;29import java.nio.IntBuffer;30import java.util.Locale;31import java.nio.charset.*;3233/*34* A tt font has a CMAP table which is in turn made up of sub-tables which35* describe the char to glyph mapping in (possibly) multiple ways.36* CMAP subtables are described by 3 values.37* 1. Platform ID (eg 3=Microsoft, which is the id we look for in JDK)38* 2. Encoding (eg 0=symbol, 1=unicode)39* 3. TrueType subtable format (how the char->glyph mapping for the encoding40* is stored in the subtable). See the TrueType spec. Format 4 is required41* by MS in fonts for windows. Its uses segmented mapping to delta values.42* Most typically we see are (3,1,4) :43* CMAP Platform ID=3 is what we use.44* Encodings that are used in practice by JDK on Solaris are45* symbol (3,0)46* unicode (3,1)47* GBK (3,5) (note that solaris zh fonts report 3,4 but are really 3,5)48* The format for almost all subtables is 4. However the solaris (3,5)49* encodings are typically in format 2.50*/51abstract class CMap {5253// static char WingDings_b2c[] = {54// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,55// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,56// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,57// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,58// 0xfffd, 0xfffd, 0x2702, 0x2701, 0xfffd, 0xfffd, 0xfffd, 0xfffd,59// 0xfffd, 0x2706, 0x2709, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,60// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,61// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2707, 0x270d,62// 0xfffd, 0x270c, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,63// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,64// 0xfffd, 0x2708, 0xfffd, 0xfffd, 0x2744, 0xfffd, 0x271e, 0xfffd,65// 0x2720, 0x2721, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,66// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,67// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,68// 0xfffd, 0x2751, 0x2752, 0xfffd, 0xfffd, 0x2756, 0xfffd, 0xfffd,69// 0xfffd, 0xfffd, 0xfffd, 0x2740, 0x273f, 0x275d, 0x275e, 0xfffd,70// 0xfffd, 0x2780, 0x2781, 0x2782, 0x2783, 0x2784, 0x2785, 0x2786,71// 0x2787, 0x2788, 0x2789, 0xfffd, 0x278a, 0x278b, 0x278c, 0x278d,72// 0x278e, 0x278f, 0x2790, 0x2791, 0x2792, 0x2793, 0xfffd, 0xfffd,73// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,74// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x274d, 0xfffd,75// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2736, 0x2734, 0xfffd, 0x2735,76// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x272a, 0x2730, 0xfffd,77// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,78// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x27a5, 0xfffd, 0x27a6, 0xfffd,79// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,80// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,81// 0x27a2, 0xfffd, 0xfffd, 0xfffd, 0x27b3, 0xfffd, 0xfffd, 0xfffd,82// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,83// 0x27a1, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,84// 0x27a9, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,85// 0xfffd, 0xfffd, 0xfffd, 0x2717, 0x2713, 0xfffd, 0xfffd, 0xfffd,86// };8788// static char Symbols_b2c[] = {89// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,90// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,91// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,92// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,93// 0xfffd, 0xfffd, 0x2200, 0xfffd, 0x2203, 0xfffd, 0xfffd, 0x220d,94// 0xfffd, 0xfffd, 0x2217, 0xfffd, 0xfffd, 0x2212, 0xfffd, 0xfffd,95// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,96// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,97// 0x2245, 0x0391, 0x0392, 0x03a7, 0x0394, 0x0395, 0x03a6, 0x0393,98// 0x0397, 0x0399, 0x03d1, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f,99// 0x03a0, 0x0398, 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03c2, 0x03a9,100// 0x039e, 0x03a8, 0x0396, 0xfffd, 0x2234, 0xfffd, 0x22a5, 0xfffd,101// 0xfffd, 0x03b1, 0x03b2, 0x03c7, 0x03b4, 0x03b5, 0x03c6, 0x03b3,102// 0x03b7, 0x03b9, 0x03d5, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03bf,103// 0x03c0, 0x03b8, 0x03c1, 0x03c3, 0x03c4, 0x03c5, 0x03d6, 0x03c9,104// 0x03be, 0x03c8, 0x03b6, 0xfffd, 0xfffd, 0xfffd, 0x223c, 0xfffd,105// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,106// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,107// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,108// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,109// 0xfffd, 0x03d2, 0xfffd, 0x2264, 0x2215, 0x221e, 0xfffd, 0xfffd,110// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,111// 0x2218, 0xfffd, 0xfffd, 0x2265, 0xfffd, 0x221d, 0xfffd, 0x2219,112// 0xfffd, 0x2260, 0x2261, 0x2248, 0x22ef, 0x2223, 0xfffd, 0xfffd,113// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2297, 0x2295, 0x2205, 0x2229,114// 0x222a, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209,115// 0xfffd, 0x2207, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x221a, 0x22c5,116// 0xfffd, 0x2227, 0x2228, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,117// 0x22c4, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2211, 0xfffd, 0xfffd,118// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,119// 0xfffd, 0xfffd, 0x222b, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,120// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,121// };122123static final short ShiftJISEncoding = 2;124static final short GBKEncoding = 3;125static final short Big5Encoding = 4;126static final short WansungEncoding = 5;127static final short JohabEncoding = 6;128static final short MSUnicodeSurrogateEncoding = 10;129130static final char noSuchChar = (char)0xfffd;131static final int SHORTMASK = 0x0000ffff;132static final int INTMASK = 0x7fffffff;133134static final char[][] converterMaps = new char[7][];135136/*137* Unicode->other encoding translation array. A pre-computed look up138* which can be shared across all fonts using that encoding.139* Using this saves running character coverters repeatedly.140*/141char[] xlat;142UVS uvs = null;143144static CMap initialize(TrueTypeFont font) {145146CMap cmap = null;147148int offset, platformID, encodingID=-1;149150int three0=0, three1=0, three2=0, three3=0, three4=0, three5=0,151three6=0, three10=0;152int zero5 = 0; // for Unicode Variation Sequences153boolean threeStar = false;154155ByteBuffer cmapBuffer = font.getTableBuffer(TrueTypeFont.cmapTag);156int cmapTableOffset = font.getTableSize(TrueTypeFont.cmapTag);157short numberSubTables = cmapBuffer.getShort(2);158159/* locate the offsets of all 3,* (ie Microsoft platform) encodings */160for (int i=0; i<numberSubTables; i++) {161cmapBuffer.position(i * 8 + 4);162platformID = cmapBuffer.getShort();163if (platformID == 3) {164threeStar = true;165encodingID = cmapBuffer.getShort();166offset = cmapBuffer.getInt();167switch (encodingID) {168case 0: three0 = offset; break; // MS Symbol encoding169case 1: three1 = offset; break; // MS Unicode cmap170case 2: three2 = offset; break; // ShiftJIS cmap.171case 3: three3 = offset; break; // GBK cmap172case 4: three4 = offset; break; // Big 5 cmap173case 5: three5 = offset; break; // Wansung174case 6: three6 = offset; break; // Johab175case 10: three10 = offset; break; // MS Unicode surrogates176}177} else if (platformID == 0) {178encodingID = cmapBuffer.getShort();179offset = cmapBuffer.getInt();180if (encodingID == 5) {181zero5 = offset;182}183}184}185186/* This defines the preference order for cmap subtables */187if (threeStar) {188if (three10 != 0) {189cmap = createCMap(cmapBuffer, three10, null);190}191else if (three0 != 0) {192/* The special case treatment of these fonts leads to193* anomalies where a user can view "wingdings" and "wingdings2"194* and the latter shows all its code points in the unicode195* private use area at 0xF000->0XF0FF and the former shows196* a scattered subset of its glyphs that are known mappings to197* unicode code points.198* The primary purpose of these mappings was to facilitate199* display of symbol chars etc in composite fonts, however200* this is not needed as all these code points are covered201* by some other platform symbol font.202* Commenting this out reduces the role of these two files203* (assuming that they continue to be used in font.properties)204* to just one of contributing to the overall composite205* font metrics, and also AWT can still access the fonts.206* Clients which explicitly accessed these fonts as names207* "Symbol" and "Wingdings" (ie as physical fonts) and208* expected to see a scattering of these characters will209* see them now as missing. How much of a problem is this?210* Perhaps we could still support this mapping just for211* "Symbol.ttf" but I suspect some users would prefer it212* to be mapped in to the Latin range as that is how213* the "symbol" font is used in native apps.214*/215// String name = font.platName.toLowerCase(Locale.ENGLISH);216// if (name.endsWith("symbol.ttf")) {217// cmap = createSymbolCMap(cmapBuffer, three0, Symbols_b2c);218// } else if (name.endsWith("wingding.ttf")) {219// cmap = createSymbolCMap(cmapBuffer, three0, WingDings_b2c);220// } else {221cmap = createCMap(cmapBuffer, three0, null);222// }223}224else if (three1 != 0) {225cmap = createCMap(cmapBuffer, three1, null);226}227else if (three2 != 0) {228cmap = createCMap(cmapBuffer, three2,229getConverterMap(ShiftJISEncoding));230}231else if (three3 != 0) {232cmap = createCMap(cmapBuffer, three3,233getConverterMap(GBKEncoding));234}235else if (three4 != 0) {236cmap = createCMap(cmapBuffer, three4,237getConverterMap(Big5Encoding));238}239else if (three5 != 0) {240cmap = createCMap(cmapBuffer, three5,241getConverterMap(WansungEncoding));242}243else if (three6 != 0) {244cmap = createCMap(cmapBuffer, three6,245getConverterMap(JohabEncoding));246}247} else {248/* No 3,* subtable was found. Just use whatever is the first249* table listed. Not very useful but maybe better than250* rejecting the font entirely?251*/252cmap = createCMap(cmapBuffer, cmapBuffer.getInt(8), null);253}254// For Unicode Variation Sequences255if (cmap != null && zero5 != 0) {256cmap.createUVS(cmapBuffer, zero5);257}258return cmap;259}260261/* speed up the converting by setting the range for double262* byte characters;263*/264static char[] getConverter(short encodingID) {265int dBegin = 0x8000;266int dEnd = 0xffff;267String encoding;268269switch (encodingID) {270case ShiftJISEncoding:271dBegin = 0x8140;272dEnd = 0xfcfc;273encoding = "SJIS";274break;275case GBKEncoding:276dBegin = 0x8140;277dEnd = 0xfea0;278encoding = "GBK";279break;280case Big5Encoding:281dBegin = 0xa140;282dEnd = 0xfefe;283encoding = "Big5";284break;285case WansungEncoding:286dBegin = 0xa1a1;287dEnd = 0xfede;288encoding = "EUC_KR";289break;290case JohabEncoding:291dBegin = 0x8141;292dEnd = 0xfdfe;293encoding = "Johab";294break;295default:296return null;297}298299try {300char[] convertedChars = new char[65536];301for (int i=0; i<65536; i++) {302convertedChars[i] = noSuchChar;303}304305byte[] inputBytes = new byte[(dEnd-dBegin+1)*2];306char[] outputChars = new char[(dEnd-dBegin+1)];307308int j = 0;309int firstByte;310if (encodingID == ShiftJISEncoding) {311for (int i = dBegin; i <= dEnd; i++) {312firstByte = (i >> 8 & 0xff);313if (firstByte >= 0xa1 && firstByte <= 0xdf) {314//sjis halfwidth katakana315inputBytes[j++] = (byte)0xff;316inputBytes[j++] = (byte)0xff;317} else {318inputBytes[j++] = (byte)firstByte;319inputBytes[j++] = (byte)(i & 0xff);320}321}322} else {323for (int i = dBegin; i <= dEnd; i++) {324inputBytes[j++] = (byte)(i>>8 & 0xff);325inputBytes[j++] = (byte)(i & 0xff);326}327}328329Charset.forName(encoding).newDecoder()330.onMalformedInput(CodingErrorAction.REPLACE)331.onUnmappableCharacter(CodingErrorAction.REPLACE)332.replaceWith("\u0000")333.decode(ByteBuffer.wrap(inputBytes, 0, inputBytes.length),334CharBuffer.wrap(outputChars, 0, outputChars.length),335true);336337// ensure single byte ascii338for (int i = 0x20; i <= 0x7e; i++) {339convertedChars[i] = (char)i;340}341342//sjis halfwidth katakana343if (encodingID == ShiftJISEncoding) {344for (int i = 0xa1; i <= 0xdf; i++) {345convertedChars[i] = (char)(i - 0xa1 + 0xff61);346}347}348349/* It would save heap space (approx 60Kbytes for each of these350* converters) if stored only valid ranges (ie returned351* outputChars directly. But this is tricky since want to352* include the ASCII range too.353*/354// System.err.println("oc.len="+outputChars.length);355// System.err.println("cc.len="+convertedChars.length);356// System.err.println("dbegin="+dBegin);357System.arraycopy(outputChars, 0, convertedChars, dBegin,358outputChars.length);359360//return convertedChars;361/* invert this map as now want it to map from Unicode362* to other encoding.363*/364char [] invertedChars = new char[65536];365for (int i=0;i<65536;i++) {366if (convertedChars[i] != noSuchChar) {367invertedChars[convertedChars[i]] = (char)i;368}369}370return invertedChars;371372} catch (Exception e) {373e.printStackTrace();374}375return null;376}377378/*379* The returned array maps to unicode from some other 2 byte encoding380* eg for a 2byte index which represents a SJIS char, the indexed381* value is the corresponding unicode char.382*/383static char[] getConverterMap(short encodingID) {384if (converterMaps[encodingID] == null) {385converterMaps[encodingID] = getConverter(encodingID);386}387return converterMaps[encodingID];388}389390391static CMap createCMap(ByteBuffer buffer, int offset, char[] xlat) {392/* First do a sanity check that this cmap subtable is contained393* within the cmap table.394*/395int subtableFormat = buffer.getChar(offset);396long subtableLength;397if (subtableFormat < 8) {398subtableLength = buffer.getChar(offset+2);399} else {400subtableLength = buffer.getInt(offset+4) & INTMASK;401}402if (FontUtilities.isLogging() && offset + subtableLength > buffer.capacity()) {403FontUtilities.logWarning("Cmap subtable overflows buffer.");404}405switch (subtableFormat) {406case 0: return new CMapFormat0(buffer, offset);407case 2: return new CMapFormat2(buffer, offset, xlat);408case 4: return new CMapFormat4(buffer, offset, xlat);409case 6: return new CMapFormat6(buffer, offset, xlat);410case 8: return new CMapFormat8(buffer, offset, xlat);411case 10: return new CMapFormat10(buffer, offset, xlat);412case 12: return new CMapFormat12(buffer, offset, xlat);413default: throw new RuntimeException("Cmap format unimplemented: " +414(int)buffer.getChar(offset));415}416}417418private void createUVS(ByteBuffer buffer, int offset) {419int subtableFormat = buffer.getChar(offset);420if (subtableFormat == 14) {421long subtableLength = buffer.getInt(offset + 2) & INTMASK;422if (FontUtilities.isLogging() && offset + subtableLength > buffer.capacity()) {423FontUtilities.logWarning("Cmap UVS subtable overflows buffer.");424}425try {426this.uvs = new UVS(buffer, offset);427} catch (Throwable t) {428}429}430return;431}432433/*434final char charVal(byte[] cmap, int index) {435return (char)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1]));436}437438final short shortVal(byte[] cmap, int index) {439return (short)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1]));440}441*/442abstract char getGlyph(int charCode);443444/* Format 4 Header is445* ushort format (off=0)446* ushort length (off=2)447* ushort language (off=4)448* ushort segCountX2 (off=6)449* ushort searchRange (off=8)450* ushort entrySelector (off=10)451* ushort rangeShift (off=12)452* ushort endCount[segCount] (off=14)453* ushort reservedPad454* ushort startCount[segCount]455* short idDelta[segCount]456* idRangeOFfset[segCount]457* ushort glyphIdArray[]458*/459static class CMapFormat4 extends CMap {460int segCount;461int entrySelector;462int rangeShift;463char[] endCount;464char[] startCount;465short[] idDelta;466char[] idRangeOffset;467char[] glyphIds;468469CMapFormat4(ByteBuffer bbuffer, int offset, char[] xlat) {470471this.xlat = xlat;472473bbuffer.position(offset);474CharBuffer buffer = bbuffer.asCharBuffer();475buffer.get(); // skip, we already know format=4476int subtableLength = buffer.get();477/* Try to recover from some bad fonts which specify a subtable478* length that would overflow the byte buffer holding the whole479* cmap table. If this isn't a recoverable situation an exception480* may be thrown which is caught higher up the call stack.481* Whilst this may seem lenient, in practice, unless the "bad"482* subtable we are using is the last one in the cmap table we483* would have no way of knowing about this problem anyway.484*/485if (offset+subtableLength > bbuffer.capacity()) {486subtableLength = bbuffer.capacity() - offset;487}488buffer.get(); // skip language489segCount = buffer.get()/2;490int searchRange = buffer.get();491entrySelector = buffer.get();492rangeShift = buffer.get()/2;493startCount = new char[segCount];494endCount = new char[segCount];495idDelta = new short[segCount];496idRangeOffset = new char[segCount];497498for (int i=0; i<segCount; i++) {499endCount[i] = buffer.get();500}501buffer.get(); // 2 bytes for reserved pad502for (int i=0; i<segCount; i++) {503startCount[i] = buffer.get();504}505506for (int i=0; i<segCount; i++) {507idDelta[i] = (short)buffer.get();508}509510for (int i=0; i<segCount; i++) {511char ctmp = buffer.get();512idRangeOffset[i] = (char)((ctmp>>1)&0xffff);513}514/* Can calculate the number of glyph IDs by subtracting515* "pos" from the length of the cmap516*/517int pos = (segCount*8+16)/2;518buffer.position(pos);519int numGlyphIds = (subtableLength/2 - pos);520glyphIds = new char[numGlyphIds];521for (int i=0;i<numGlyphIds;i++) {522glyphIds[i] = buffer.get();523}524/*525System.err.println("segcount="+segCount);526System.err.println("entrySelector="+entrySelector);527System.err.println("rangeShift="+rangeShift);528for (int j=0;j<segCount;j++) {529System.err.println("j="+j+ " sc="+(int)(startCount[j]&0xffff)+530" ec="+(int)(endCount[j]&0xffff)+531" delta="+idDelta[j] +532" ro="+(int)idRangeOffset[j]);533}534535//System.err.println("numglyphs="+glyphIds.length);536for (int i=0;i<numGlyphIds;i++) {537System.err.println("gid["+i+"]="+(int)glyphIds[i]);538}539*/540}541542char getGlyph(int charCode) {543544final int origCharCode = charCode;545int index = 0;546char glyphCode = 0;547548int controlGlyph = getControlCodeGlyph(charCode, true);549if (controlGlyph >= 0) {550return (char)controlGlyph;551}552553/* presence of translation array indicates that this554* cmap is in some other (non-unicode encoding).555* In order to look-up a char->glyph mapping we need to556* translate the unicode code point to the encoding of557* the cmap.558* REMIND: VALID CHARCODES??559*/560if (xlat != null) {561charCode = xlat[charCode];562}563564/*565* Citation from the TrueType (and OpenType) spec:566* The segments are sorted in order of increasing endCode567* values, and the segment values are specified in four parallel568* arrays. You search for the first endCode that is greater than569* or equal to the character code you want to map. If the570* corresponding startCode is less than or equal to the571* character code, then you use the corresponding idDelta and572* idRangeOffset to map the character code to a glyph index573* (otherwise, the missingGlyph is returned).574*/575576/*577* CMAP format4 defines several fields for optimized search of578* the segment list (entrySelector, searchRange, rangeShift).579* However, benefits are neglible and some fonts have incorrect580* data - so we use straightforward binary search (see bug 6247425)581*/582int left = 0, right = startCount.length;583index = startCount.length >> 1;584while (left < right) {585if (endCount[index] < charCode) {586left = index + 1;587} else {588right = index;589}590index = (left + right) >> 1;591}592593if (charCode >= startCount[index] && charCode <= endCount[index]) {594int rangeOffset = idRangeOffset[index];595596if (rangeOffset == 0) {597glyphCode = (char)(charCode + idDelta[index]);598} else {599/* Calculate an index into the glyphIds array */600601/*602System.err.println("rangeoffset="+rangeOffset+603" charCode=" + charCode +604" scnt["+index+"]="+(int)startCount[index] +605" segCnt="+segCount);606*/607608int glyphIDIndex = rangeOffset - segCount + index609+ (charCode - startCount[index]);610glyphCode = glyphIds[glyphIDIndex];611if (glyphCode != 0) {612glyphCode = (char)(glyphCode + idDelta[index]);613}614}615}616if (glyphCode == 0) {617glyphCode = getFormatCharGlyph(origCharCode);618}619return glyphCode;620}621}622623// Format 0: Byte Encoding table624static class CMapFormat0 extends CMap {625byte [] cmap;626627CMapFormat0(ByteBuffer buffer, int offset) {628629/* skip 6 bytes of format, length, and version */630int len = buffer.getChar(offset+2);631cmap = new byte[len-6];632buffer.position(offset+6);633buffer.get(cmap);634}635636char getGlyph(int charCode) {637if (charCode < 256) {638if (charCode < 0x0010) {639switch (charCode) {640case 0x0009:641case 0x000a:642case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID;643}644}645return (char)(0xff & cmap[charCode]);646} else {647return 0;648}649}650}651652// static CMap createSymbolCMap(ByteBuffer buffer, int offset, char[] syms) {653654// CMap cmap = createCMap(buffer, offset, null);655// if (cmap == null) {656// return null;657// } else {658// return new CMapFormatSymbol(cmap, syms);659// }660// }661662// static class CMapFormatSymbol extends CMap {663664// CMap cmap;665// static final int NUM_BUCKETS = 128;666// Bucket[] buckets = new Bucket[NUM_BUCKETS];667668// class Bucket {669// char unicode;670// char glyph;671// Bucket next;672673// Bucket(char u, char g) {674// unicode = u;675// glyph = g;676// }677// }678679// CMapFormatSymbol(CMap cmap, char[] syms) {680681// this.cmap = cmap;682683// for (int i=0;i<syms.length;i++) {684// char unicode = syms[i];685// if (unicode != noSuchChar) {686// char glyph = cmap.getGlyph(i + 0xf000);687// int hash = unicode % NUM_BUCKETS;688// Bucket bucket = new Bucket(unicode, glyph);689// if (buckets[hash] == null) {690// buckets[hash] = bucket;691// } else {692// Bucket b = buckets[hash];693// while (b.next != null) {694// b = b.next;695// }696// b.next = bucket;697// }698// }699// }700// }701702// char getGlyph(int unicode) {703// if (unicode >= 0x1000) {704// return 0;705// }706// else if (unicode >=0xf000 && unicode < 0xf100) {707// return cmap.getGlyph(unicode);708// } else {709// Bucket b = buckets[unicode % NUM_BUCKETS];710// while (b != null) {711// if (b.unicode == unicode) {712// return b.glyph;713// } else {714// b = b.next;715// }716// }717// return 0;718// }719// }720// }721722// Format 2: High-byte mapping through table723static class CMapFormat2 extends CMap {724725char[] subHeaderKey = new char[256];726/* Store subheaders in individual arrays727* A SubHeader entry theortically looks like {728* char firstCode;729* char entryCount;730* short idDelta;731* char idRangeOffset;732* }733*/734char[] firstCodeArray;735char[] entryCountArray;736short[] idDeltaArray;737char[] idRangeOffSetArray;738739char[] glyphIndexArray;740741CMapFormat2(ByteBuffer buffer, int offset, char[] xlat) {742743this.xlat = xlat;744745int tableLen = buffer.getChar(offset+2);746buffer.position(offset+6);747CharBuffer cBuffer = buffer.asCharBuffer();748char maxSubHeader = 0;749for (int i=0;i<256;i++) {750subHeaderKey[i] = cBuffer.get();751if (subHeaderKey[i] > maxSubHeader) {752maxSubHeader = subHeaderKey[i];753}754}755/* The value of the subHeaderKey is 8 * the subHeader index,756* so the number of subHeaders can be obtained by dividing757* this value bv 8 and adding 1.758*/759int numSubHeaders = (maxSubHeader >> 3) +1;760firstCodeArray = new char[numSubHeaders];761entryCountArray = new char[numSubHeaders];762idDeltaArray = new short[numSubHeaders];763idRangeOffSetArray = new char[numSubHeaders];764for (int i=0; i<numSubHeaders; i++) {765firstCodeArray[i] = cBuffer.get();766entryCountArray[i] = cBuffer.get();767idDeltaArray[i] = (short)cBuffer.get();768idRangeOffSetArray[i] = cBuffer.get();769// System.out.println("sh["+i+"]:fc="+(int)firstCodeArray[i]+770// " ec="+(int)entryCountArray[i]+771// " delta="+(int)idDeltaArray[i]+772// " offset="+(int)idRangeOffSetArray[i]);773}774775int glyphIndexArrSize = (tableLen-518-numSubHeaders*8)/2;776glyphIndexArray = new char[glyphIndexArrSize];777for (int i=0; i<glyphIndexArrSize;i++) {778glyphIndexArray[i] = cBuffer.get();779}780}781782char getGlyph(int charCode) {783final int origCharCode = charCode;784int controlGlyph = getControlCodeGlyph(charCode, true);785if (controlGlyph >= 0) {786return (char)controlGlyph;787}788789if (xlat != null) {790charCode = xlat[charCode];791}792793char highByte = (char)(charCode >> 8);794char lowByte = (char)(charCode & 0xff);795int key = subHeaderKey[highByte]>>3; // index into subHeaders796char mapMe;797798if (key != 0) {799mapMe = lowByte;800} else {801mapMe = highByte;802if (mapMe == 0) {803mapMe = lowByte;804}805}806807// System.err.println("charCode="+Integer.toHexString(charCode)+808// " key="+key+ " mapMe="+Integer.toHexString(mapMe));809char firstCode = firstCodeArray[key];810if (mapMe < firstCode) {811return 0;812} else {813mapMe -= firstCode;814}815816if (mapMe < entryCountArray[key]) {817/* "address" arithmetic is needed to calculate the offset818* into glyphIndexArray. "idRangeOffSetArray[key]" specifies819* the number of bytes from that location in the table where820* the subarray of glyphIndexes starting at "firstCode" begins.821* Each entry in the subHeader table is 8 bytes, and the822* idRangeOffSetArray field is at offset 6 in the entry.823* The glyphIndexArray immediately follows the subHeaders.824* So if there are "N" entries then the number of bytes to the825* start of glyphIndexArray is (N-key)*8-6.826* Subtract this from the idRangeOffSetArray value to get827* the number of bytes into glyphIndexArray and divide by 2 to828* get the (char) array index.829*/830int glyphArrayOffset = ((idRangeOffSetArray.length-key)*8)-6;831int glyphSubArrayStart =832(idRangeOffSetArray[key] - glyphArrayOffset)/2;833char glyphCode = glyphIndexArray[glyphSubArrayStart+mapMe];834if (glyphCode != 0) {835glyphCode += idDeltaArray[key]; //idDelta836return glyphCode;837}838}839return getFormatCharGlyph(origCharCode);840}841}842843// Format 6: Trimmed table mapping844static class CMapFormat6 extends CMap {845846char firstCode;847char entryCount;848char[] glyphIdArray;849850CMapFormat6(ByteBuffer bbuffer, int offset, char[] xlat) {851852bbuffer.position(offset+6);853CharBuffer buffer = bbuffer.asCharBuffer();854firstCode = buffer.get();855entryCount = buffer.get();856glyphIdArray = new char[entryCount];857for (int i=0; i< entryCount; i++) {858glyphIdArray[i] = buffer.get();859}860}861862char getGlyph(int charCode) {863final int origCharCode = charCode;864int controlGlyph = getControlCodeGlyph(charCode, true);865if (controlGlyph >= 0) {866return (char)controlGlyph;867}868869if (xlat != null) {870charCode = xlat[charCode];871}872873charCode -= firstCode;874if (charCode < 0 || charCode >= entryCount) {875return getFormatCharGlyph(origCharCode);876} else {877return glyphIdArray[charCode];878}879}880}881882// Format 8: mixed 16-bit and 32-bit coverage883// Seems unlikely this code will ever get tested as we look for884// MS platform Cmaps and MS states (in the Opentype spec on their website)885// that MS doesn't support this format886static class CMapFormat8 extends CMap {887byte[] is32 = new byte[8192];888int nGroups;889int[] startCharCode;890int[] endCharCode;891int[] startGlyphID;892893CMapFormat8(ByteBuffer bbuffer, int offset, char[] xlat) {894895bbuffer.position(12);896bbuffer.get(is32);897nGroups = bbuffer.getInt() & INTMASK;898// A map group record is three uint32's making for 12 bytes total899if (bbuffer.remaining() < (12 * (long)nGroups)) {900throw new RuntimeException("Format 8 table exceeded");901}902startCharCode = new int[nGroups];903endCharCode = new int[nGroups];904startGlyphID = new int[nGroups];905}906907char getGlyph(int charCode) {908if (xlat != null) {909throw new RuntimeException("xlat array for cmap fmt=8");910}911return 0;912}913914}915916917// Format 4-byte 10: Trimmed table mapping918// Seems unlikely this code will ever get tested as we look for919// MS platform Cmaps and MS states (in the Opentype spec on their website)920// that MS doesn't support this format921static class CMapFormat10 extends CMap {922923long firstCode;924int entryCount;925char[] glyphIdArray;926927CMapFormat10(ByteBuffer bbuffer, int offset, char[] xlat) {928929bbuffer.position(offset+12);930firstCode = bbuffer.getInt() & INTMASK;931entryCount = bbuffer.getInt() & INTMASK;932// each glyph is a uint16, so 2 bytes per value.933if (bbuffer.remaining() < (2 * (long)entryCount)) {934throw new RuntimeException("Format 10 table exceeded");935}936CharBuffer buffer = bbuffer.asCharBuffer();937glyphIdArray = new char[entryCount];938for (int i=0; i< entryCount; i++) {939glyphIdArray[i] = buffer.get();940}941}942943char getGlyph(int charCode) {944945if (xlat != null) {946throw new RuntimeException("xlat array for cmap fmt=10");947}948949int code = (int)(charCode - firstCode);950if (code < 0 || code >= entryCount) {951return 0;952} else {953return glyphIdArray[code];954}955}956}957958// Format 12: Segmented coverage for UCS-4 (fonts supporting959// surrogate pairs)960static class CMapFormat12 extends CMap {961962int numGroups;963int highBit =0;964int power;965int extra;966long[] startCharCode;967long[] endCharCode;968int[] startGlyphID;969970CMapFormat12(ByteBuffer buffer, int offset, char[] xlat) {971if (xlat != null) {972throw new RuntimeException("xlat array for cmap fmt=12");973}974975buffer.position(offset+12);976numGroups = buffer.getInt() & INTMASK;977// A map group record is three uint32's making for 12 bytes total978if (buffer.remaining() < (12 * (long)numGroups)) {979throw new RuntimeException("Format 12 table exceeded");980}981startCharCode = new long[numGroups];982endCharCode = new long[numGroups];983startGlyphID = new int[numGroups];984buffer = buffer.slice();985IntBuffer ibuffer = buffer.asIntBuffer();986for (int i=0; i<numGroups; i++) {987startCharCode[i] = ibuffer.get() & INTMASK;988endCharCode[i] = ibuffer.get() & INTMASK;989startGlyphID[i] = ibuffer.get() & INTMASK;990}991992/* Finds the high bit by binary searching through the bits */993int value = numGroups;994995if (value >= 1 << 16) {996value >>= 16;997highBit += 16;998}9991000if (value >= 1 << 8) {1001value >>= 8;1002highBit += 8;1003}10041005if (value >= 1 << 4) {1006value >>= 4;1007highBit += 4;1008}10091010if (value >= 1 << 2) {1011value >>= 2;1012highBit += 2;1013}10141015if (value >= 1 << 1) {1016value >>= 1;1017highBit += 1;1018}10191020power = 1 << highBit;1021extra = numGroups - power;1022}10231024char getGlyph(int charCode) {1025final int origCharCode = charCode;1026int controlGlyph = getControlCodeGlyph(charCode, false);1027if (controlGlyph >= 0) {1028return (char)controlGlyph;1029}1030int probe = power;1031int range = 0;10321033if (startCharCode[extra] <= charCode) {1034range = extra;1035}10361037while (probe > 1) {1038probe >>= 1;10391040if (startCharCode[range+probe] <= charCode) {1041range += probe;1042}1043}10441045if (startCharCode[range] <= charCode &&1046endCharCode[range] >= charCode) {1047return (char)1048(startGlyphID[range] + (charCode - startCharCode[range]));1049}10501051return getFormatCharGlyph(origCharCode);1052}10531054}10551056/* Used to substitute for bad Cmaps. */1057static class NullCMapClass extends CMap {10581059char getGlyph(int charCode) {1060return 0;1061}1062}10631064public static final NullCMapClass theNullCmap = new NullCMapClass();10651066final int getControlCodeGlyph(int charCode, boolean noSurrogates) {1067if (charCode < 0x0010) {1068switch (charCode) {1069case 0x0009:1070case 0x000a:1071case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID;1072}1073} else if (noSurrogates && charCode >= 0xFFFF) {1074return 0;1075}1076return -1;1077}10781079final char getFormatCharGlyph(int charCode) {1080if (charCode >= 0x200c) {1081if ((charCode <= 0x200f) ||1082(charCode >= 0x2028 && charCode <= 0x202e) ||1083(charCode >= 0x206a && charCode <= 0x206f)) {1084return (char)CharToGlyphMapper.INVISIBLE_GLYPH_ID;1085}1086}1087return 0;1088}10891090static class UVS {1091int numSelectors;1092int[] selector;10931094//for Non-Default UVS Table1095int[] numUVSMapping;1096int[][] unicodeValue;1097char[][] glyphID;10981099UVS(ByteBuffer buffer, int offset) {1100buffer.position(offset+6);1101numSelectors = buffer.getInt() & INTMASK;1102// A variation selector record is one 3 byte int + two int32's1103// making for 11 bytes per record.1104if (buffer.remaining() < (11 * (long)numSelectors)) {1105throw new RuntimeException("Variations exceed buffer");1106}1107selector = new int[numSelectors];1108numUVSMapping = new int[numSelectors];1109unicodeValue = new int[numSelectors][];1110glyphID = new char[numSelectors][];11111112for (int i = 0; i < numSelectors; i++) {1113buffer.position(offset + 10 + i * 11);1114selector[i] = (buffer.get() & 0xff) << 16; //UINT241115selector[i] += (buffer.get() & 0xff) << 8;1116selector[i] += buffer.get() & 0xff;11171118//skip Default UVS Table11191120//for Non-Default UVS Table1121int tableOffset = buffer.getInt(offset + 10 + i * 11 + 7);1122if (tableOffset == 0) {1123numUVSMapping[i] = 0;1124} else if (tableOffset > 0) {1125buffer.position(offset+tableOffset);1126numUVSMapping[i] = buffer.getInt() & INTMASK;1127// a UVS mapping record is one 3 byte int + uint161128// making for 5 bytes per record.1129if (buffer.remaining() < (5 * (long)numUVSMapping[i])) {1130throw new RuntimeException("Variations exceed buffer");1131}1132unicodeValue[i] = new int[numUVSMapping[i]];1133glyphID[i] = new char[numUVSMapping[i]];11341135for (int j = 0; j < numUVSMapping[i]; j++) {1136int temp = (buffer.get() & 0xff) << 16; //UINT241137temp += (buffer.get() & 0xff) << 8;1138temp += buffer.get() & 0xff;1139unicodeValue[i][j] = temp;1140glyphID[i][j] = buffer.getChar();1141}1142}1143}1144}11451146static final int VS_NOGLYPH = 0;1147private int getGlyph(int charCode, int variationSelector) {1148int targetSelector = -1;1149for (int i = 0; i < numSelectors; i++) {1150if (selector[i] == variationSelector) {1151targetSelector = i;1152break;1153}1154}1155if (targetSelector == -1) {1156return VS_NOGLYPH;1157}1158if (numUVSMapping[targetSelector] > 0) {1159int index = java.util.Arrays.binarySearch(1160unicodeValue[targetSelector], charCode);1161if (index >= 0) {1162return glyphID[targetSelector][index];1163}1164}1165return VS_NOGLYPH;1166}1167}11681169char getVariationGlyph(int charCode, int variationSelector) {1170char glyph = 0;1171if (uvs == null) {1172glyph = getGlyph(charCode);1173} else {1174int result = uvs.getGlyph(charCode, variationSelector);1175if (result > 0) {1176glyph = (char)(result & 0xFFFF);1177} else {1178glyph = getGlyph(charCode);1179}1180}1181return glyph;1182}1183}118411851186