Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/classes/sun/font/CMap.java
38829 views
/*1* Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425package sun.font;2627import java.nio.ByteBuffer;28import java.nio.CharBuffer;29import java.nio.IntBuffer;30import java.util.Locale;31import java.nio.charset.*;3233/*34* A tt font has a CMAP table which is in turn made up of sub-tables which35* describe the char to glyph mapping in (possibly) multiple ways.36* CMAP subtables are described by 3 values.37* 1. Platform ID (eg 3=Microsoft, which is the id we look for in JDK)38* 2. Encoding (eg 0=symbol, 1=unicode)39* 3. TrueType subtable format (how the char->glyph mapping for the encoding40* is stored in the subtable). See the TrueType spec. Format 4 is required41* by MS in fonts for windows. Its uses segmented mapping to delta values.42* Most typically we see are (3,1,4) :43* CMAP Platform ID=3 is what we use.44* Encodings that are used in practice by JDK on Solaris are45* symbol (3,0)46* unicode (3,1)47* GBK (3,5) (note that solaris zh fonts report 3,4 but are really 3,5)48* The format for almost all subtables is 4. However the solaris (3,5)49* encodings are typically in format 2.50*/51abstract class CMap {5253// static char WingDings_b2c[] = {54// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,55// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,56// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,57// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,58// 0xfffd, 0xfffd, 0x2702, 0x2701, 0xfffd, 0xfffd, 0xfffd, 0xfffd,59// 0xfffd, 0x2706, 0x2709, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,60// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,61// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2707, 0x270d,62// 0xfffd, 0x270c, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,63// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,64// 0xfffd, 0x2708, 0xfffd, 0xfffd, 0x2744, 0xfffd, 0x271e, 0xfffd,65// 0x2720, 0x2721, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,66// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,67// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,68// 0xfffd, 0x2751, 0x2752, 0xfffd, 0xfffd, 0x2756, 0xfffd, 0xfffd,69// 0xfffd, 0xfffd, 0xfffd, 0x2740, 0x273f, 0x275d, 0x275e, 0xfffd,70// 0xfffd, 0x2780, 0x2781, 0x2782, 0x2783, 0x2784, 0x2785, 0x2786,71// 0x2787, 0x2788, 0x2789, 0xfffd, 0x278a, 0x278b, 0x278c, 0x278d,72// 0x278e, 0x278f, 0x2790, 0x2791, 0x2792, 0x2793, 0xfffd, 0xfffd,73// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,74// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x274d, 0xfffd,75// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2736, 0x2734, 0xfffd, 0x2735,76// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x272a, 0x2730, 0xfffd,77// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,78// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x27a5, 0xfffd, 0x27a6, 0xfffd,79// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,80// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,81// 0x27a2, 0xfffd, 0xfffd, 0xfffd, 0x27b3, 0xfffd, 0xfffd, 0xfffd,82// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,83// 0x27a1, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,84// 0x27a9, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,85// 0xfffd, 0xfffd, 0xfffd, 0x2717, 0x2713, 0xfffd, 0xfffd, 0xfffd,86// };8788// static char Symbols_b2c[] = {89// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,90// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,91// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,92// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,93// 0xfffd, 0xfffd, 0x2200, 0xfffd, 0x2203, 0xfffd, 0xfffd, 0x220d,94// 0xfffd, 0xfffd, 0x2217, 0xfffd, 0xfffd, 0x2212, 0xfffd, 0xfffd,95// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,96// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,97// 0x2245, 0x0391, 0x0392, 0x03a7, 0x0394, 0x0395, 0x03a6, 0x0393,98// 0x0397, 0x0399, 0x03d1, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f,99// 0x03a0, 0x0398, 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03c2, 0x03a9,100// 0x039e, 0x03a8, 0x0396, 0xfffd, 0x2234, 0xfffd, 0x22a5, 0xfffd,101// 0xfffd, 0x03b1, 0x03b2, 0x03c7, 0x03b4, 0x03b5, 0x03c6, 0x03b3,102// 0x03b7, 0x03b9, 0x03d5, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03bf,103// 0x03c0, 0x03b8, 0x03c1, 0x03c3, 0x03c4, 0x03c5, 0x03d6, 0x03c9,104// 0x03be, 0x03c8, 0x03b6, 0xfffd, 0xfffd, 0xfffd, 0x223c, 0xfffd,105// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,106// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,107// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,108// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,109// 0xfffd, 0x03d2, 0xfffd, 0x2264, 0x2215, 0x221e, 0xfffd, 0xfffd,110// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,111// 0x2218, 0xfffd, 0xfffd, 0x2265, 0xfffd, 0x221d, 0xfffd, 0x2219,112// 0xfffd, 0x2260, 0x2261, 0x2248, 0x22ef, 0x2223, 0xfffd, 0xfffd,113// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2297, 0x2295, 0x2205, 0x2229,114// 0x222a, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209,115// 0xfffd, 0x2207, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x221a, 0x22c5,116// 0xfffd, 0x2227, 0x2228, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,117// 0x22c4, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2211, 0xfffd, 0xfffd,118// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,119// 0xfffd, 0xfffd, 0x222b, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,120// 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,121// };122123static final short ShiftJISEncoding = 2;124static final short GBKEncoding = 3;125static final short Big5Encoding = 4;126static final short WansungEncoding = 5;127static final short JohabEncoding = 6;128static final short MSUnicodeSurrogateEncoding = 10;129130static final char noSuchChar = (char)0xfffd;131static final int SHORTMASK = 0x0000ffff;132static final int INTMASK = 0x7fffffff;133134static final char[][] converterMaps = new char[7][];135136/*137* Unicode->other encoding translation array. A pre-computed look up138* which can be shared across all fonts using that encoding.139* Using this saves running character coverters repeatedly.140*/141char[] xlat;142143static CMap initialize(TrueTypeFont font) {144145CMap cmap = null;146147int offset, platformID, encodingID=-1;148149int three0=0, three1=0, three2=0, three3=0, three4=0, three5=0,150three6=0, three10=0;151boolean threeStar = false;152153ByteBuffer cmapBuffer = font.getTableBuffer(TrueTypeFont.cmapTag);154int cmapTableOffset = font.getTableSize(TrueTypeFont.cmapTag);155short numberSubTables = cmapBuffer.getShort(2);156157/* locate the offsets of all 3,* (ie Microsoft platform) encodings */158for (int i=0; i<numberSubTables; i++) {159cmapBuffer.position(i * 8 + 4);160platformID = cmapBuffer.getShort();161if (platformID == 3) {162threeStar = true;163encodingID = cmapBuffer.getShort();164offset = cmapBuffer.getInt();165switch (encodingID) {166case 0: three0 = offset; break; // MS Symbol encoding167case 1: three1 = offset; break; // MS Unicode cmap168case 2: three2 = offset; break; // ShiftJIS cmap.169case 3: three3 = offset; break; // GBK cmap170case 4: three4 = offset; break; // Big 5 cmap171case 5: three5 = offset; break; // Wansung172case 6: three6 = offset; break; // Johab173case 10: three10 = offset; break; // MS Unicode surrogates174}175}176}177178/* This defines the preference order for cmap subtables */179if (threeStar) {180if (three10 != 0) {181cmap = createCMap(cmapBuffer, three10, null);182}183else if (three0 != 0) {184/* The special case treatment of these fonts leads to185* anomalies where a user can view "wingdings" and "wingdings2"186* and the latter shows all its code points in the unicode187* private use area at 0xF000->0XF0FF and the former shows188* a scattered subset of its glyphs that are known mappings to189* unicode code points.190* The primary purpose of these mappings was to facilitate191* display of symbol chars etc in composite fonts, however192* this is not needed as all these code points are covered193* by Lucida Sans Regular.194* Commenting this out reduces the role of these two files195* (assuming that they continue to be used in font.properties)196* to just one of contributing to the overall composite197* font metrics, and also AWT can still access the fonts.198* Clients which explicitly accessed these fonts as names199* "Symbol" and "Wingdings" (ie as physical fonts) and200* expected to see a scattering of these characters will201* see them now as missing. How much of a problem is this?202* Perhaps we could still support this mapping just for203* "Symbol.ttf" but I suspect some users would prefer it204* to be mapped in to the Latin range as that is how205* the "symbol" font is used in native apps.206*/207// String name = font.platName.toLowerCase(Locale.ENGLISH);208// if (name.endsWith("symbol.ttf")) {209// cmap = createSymbolCMap(cmapBuffer, three0, Symbols_b2c);210// } else if (name.endsWith("wingding.ttf")) {211// cmap = createSymbolCMap(cmapBuffer, three0, WingDings_b2c);212// } else {213cmap = createCMap(cmapBuffer, three0, null);214// }215}216else if (three1 != 0) {217cmap = createCMap(cmapBuffer, three1, null);218}219else if (three2 != 0) {220cmap = createCMap(cmapBuffer, three2,221getConverterMap(ShiftJISEncoding));222}223else if (three3 != 0) {224cmap = createCMap(cmapBuffer, three3,225getConverterMap(GBKEncoding));226}227else if (three4 != 0) {228/* GB2312 TrueType fonts on Solaris have wrong encoding ID for229* cmap table, these fonts have EncodingID 4 which is Big5230* encoding according the TrueType spec, but actually the231* fonts are using gb2312 encoding, have to use this232* workaround to make Solaris zh_CN locale work. -sherman233*/234if (FontUtilities.isSolaris && font.platName != null &&235(font.platName.startsWith(236"/usr/openwin/lib/locale/zh_CN.EUC/X11/fonts/TrueType") ||237font.platName.startsWith(238"/usr/openwin/lib/locale/zh_CN/X11/fonts/TrueType") ||239font.platName.startsWith(240"/usr/openwin/lib/locale/zh/X11/fonts/TrueType"))) {241cmap = createCMap(cmapBuffer, three4,242getConverterMap(GBKEncoding));243}244else {245cmap = createCMap(cmapBuffer, three4,246getConverterMap(Big5Encoding));247}248}249else if (three5 != 0) {250cmap = createCMap(cmapBuffer, three5,251getConverterMap(WansungEncoding));252}253else if (three6 != 0) {254cmap = createCMap(cmapBuffer, three6,255getConverterMap(JohabEncoding));256}257} else {258/* No 3,* subtable was found. Just use whatever is the first259* table listed. Not very useful but maybe better than260* rejecting the font entirely?261*/262cmap = createCMap(cmapBuffer, cmapBuffer.getInt(8), null);263}264return cmap;265}266267/* speed up the converting by setting the range for double268* byte characters;269*/270static char[] getConverter(short encodingID) {271int dBegin = 0x8000;272int dEnd = 0xffff;273String encoding;274275switch (encodingID) {276case ShiftJISEncoding:277dBegin = 0x8140;278dEnd = 0xfcfc;279encoding = "SJIS";280break;281case GBKEncoding:282dBegin = 0x8140;283dEnd = 0xfea0;284encoding = "GBK";285break;286case Big5Encoding:287dBegin = 0xa140;288dEnd = 0xfefe;289encoding = "Big5";290break;291case WansungEncoding:292dBegin = 0xa1a1;293dEnd = 0xfede;294encoding = "EUC_KR";295break;296case JohabEncoding:297dBegin = 0x8141;298dEnd = 0xfdfe;299encoding = "Johab";300break;301default:302return null;303}304305try {306char[] convertedChars = new char[65536];307for (int i=0; i<65536; i++) {308convertedChars[i] = noSuchChar;309}310311byte[] inputBytes = new byte[(dEnd-dBegin+1)*2];312char[] outputChars = new char[(dEnd-dBegin+1)];313314int j = 0;315int firstByte;316if (encodingID == ShiftJISEncoding) {317for (int i = dBegin; i <= dEnd; i++) {318firstByte = (i >> 8 & 0xff);319if (firstByte >= 0xa1 && firstByte <= 0xdf) {320//sjis halfwidth katakana321inputBytes[j++] = (byte)0xff;322inputBytes[j++] = (byte)0xff;323} else {324inputBytes[j++] = (byte)firstByte;325inputBytes[j++] = (byte)(i & 0xff);326}327}328} else {329for (int i = dBegin; i <= dEnd; i++) {330inputBytes[j++] = (byte)(i>>8 & 0xff);331inputBytes[j++] = (byte)(i & 0xff);332}333}334335Charset.forName(encoding).newDecoder()336.onMalformedInput(CodingErrorAction.REPLACE)337.onUnmappableCharacter(CodingErrorAction.REPLACE)338.replaceWith("\u0000")339.decode(ByteBuffer.wrap(inputBytes, 0, inputBytes.length),340CharBuffer.wrap(outputChars, 0, outputChars.length),341true);342343// ensure single byte ascii344for (int i = 0x20; i <= 0x7e; i++) {345convertedChars[i] = (char)i;346}347348//sjis halfwidth katakana349if (encodingID == ShiftJISEncoding) {350for (int i = 0xa1; i <= 0xdf; i++) {351convertedChars[i] = (char)(i - 0xa1 + 0xff61);352}353}354355/* It would save heap space (approx 60Kbytes for each of these356* converters) if stored only valid ranges (ie returned357* outputChars directly. But this is tricky since want to358* include the ASCII range too.359*/360// System.err.println("oc.len="+outputChars.length);361// System.err.println("cc.len="+convertedChars.length);362// System.err.println("dbegin="+dBegin);363System.arraycopy(outputChars, 0, convertedChars, dBegin,364outputChars.length);365366//return convertedChars;367/* invert this map as now want it to map from Unicode368* to other encoding.369*/370char [] invertedChars = new char[65536];371for (int i=0;i<65536;i++) {372if (convertedChars[i] != noSuchChar) {373invertedChars[convertedChars[i]] = (char)i;374}375}376return invertedChars;377378} catch (Exception e) {379e.printStackTrace();380}381return null;382}383384/*385* The returned array maps to unicode from some other 2 byte encoding386* eg for a 2byte index which represents a SJIS char, the indexed387* value is the corresponding unicode char.388*/389static char[] getConverterMap(short encodingID) {390if (converterMaps[encodingID] == null) {391converterMaps[encodingID] = getConverter(encodingID);392}393return converterMaps[encodingID];394}395396397static CMap createCMap(ByteBuffer buffer, int offset, char[] xlat) {398/* First do a sanity check that this cmap subtable is contained399* within the cmap table.400*/401int subtableFormat = buffer.getChar(offset);402long subtableLength;403if (subtableFormat < 8) {404subtableLength = buffer.getChar(offset+2);405} else {406subtableLength = buffer.getInt(offset+4) & INTMASK;407}408if (offset+subtableLength > buffer.capacity()) {409if (FontUtilities.isLogging()) {410FontUtilities.getLogger().warning("Cmap subtable overflows buffer.");411}412}413switch (subtableFormat) {414case 0: return new CMapFormat0(buffer, offset);415case 2: return new CMapFormat2(buffer, offset, xlat);416case 4: return new CMapFormat4(buffer, offset, xlat);417case 6: return new CMapFormat6(buffer, offset, xlat);418case 8: return new CMapFormat8(buffer, offset, xlat);419case 10: return new CMapFormat10(buffer, offset, xlat);420case 12: return new CMapFormat12(buffer, offset, xlat);421default: throw new RuntimeException("Cmap format unimplemented: " +422(int)buffer.getChar(offset));423}424}425426/*427final char charVal(byte[] cmap, int index) {428return (char)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1]));429}430431final short shortVal(byte[] cmap, int index) {432return (short)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1]));433}434*/435abstract char getGlyph(int charCode);436437/* Format 4 Header is438* ushort format (off=0)439* ushort length (off=2)440* ushort language (off=4)441* ushort segCountX2 (off=6)442* ushort searchRange (off=8)443* ushort entrySelector (off=10)444* ushort rangeShift (off=12)445* ushort endCount[segCount] (off=14)446* ushort reservedPad447* ushort startCount[segCount]448* short idDelta[segCount]449* idRangeOFfset[segCount]450* ushort glyphIdArray[]451*/452static class CMapFormat4 extends CMap {453int segCount;454int entrySelector;455int rangeShift;456char[] endCount;457char[] startCount;458short[] idDelta;459char[] idRangeOffset;460char[] glyphIds;461462CMapFormat4(ByteBuffer bbuffer, int offset, char[] xlat) {463464this.xlat = xlat;465466bbuffer.position(offset);467CharBuffer buffer = bbuffer.asCharBuffer();468buffer.get(); // skip, we already know format=4469int subtableLength = buffer.get();470/* Try to recover from some bad fonts which specify a subtable471* length that would overflow the byte buffer holding the whole472* cmap table. If this isn't a recoverable situation an exception473* may be thrown which is caught higher up the call stack.474* Whilst this may seem lenient, in practice, unless the "bad"475* subtable we are using is the last one in the cmap table we476* would have no way of knowing about this problem anyway.477*/478if (offset+subtableLength > bbuffer.capacity()) {479subtableLength = bbuffer.capacity() - offset;480}481buffer.get(); // skip language482segCount = buffer.get()/2;483int searchRange = buffer.get();484entrySelector = buffer.get();485rangeShift = buffer.get()/2;486startCount = new char[segCount];487endCount = new char[segCount];488idDelta = new short[segCount];489idRangeOffset = new char[segCount];490491for (int i=0; i<segCount; i++) {492endCount[i] = buffer.get();493}494buffer.get(); // 2 bytes for reserved pad495for (int i=0; i<segCount; i++) {496startCount[i] = buffer.get();497}498499for (int i=0; i<segCount; i++) {500idDelta[i] = (short)buffer.get();501}502503for (int i=0; i<segCount; i++) {504char ctmp = buffer.get();505idRangeOffset[i] = (char)((ctmp>>1)&0xffff);506}507/* Can calculate the number of glyph IDs by subtracting508* "pos" from the length of the cmap509*/510int pos = (segCount*8+16)/2;511buffer.position(pos);512int numGlyphIds = (subtableLength/2 - pos);513glyphIds = new char[numGlyphIds];514for (int i=0;i<numGlyphIds;i++) {515glyphIds[i] = buffer.get();516}517/*518System.err.println("segcount="+segCount);519System.err.println("entrySelector="+entrySelector);520System.err.println("rangeShift="+rangeShift);521for (int j=0;j<segCount;j++) {522System.err.println("j="+j+ " sc="+(int)(startCount[j]&0xffff)+523" ec="+(int)(endCount[j]&0xffff)+524" delta="+idDelta[j] +525" ro="+(int)idRangeOffset[j]);526}527528//System.err.println("numglyphs="+glyphIds.length);529for (int i=0;i<numGlyphIds;i++) {530System.err.println("gid["+i+"]="+(int)glyphIds[i]);531}532*/533}534535char getGlyph(int charCode) {536537int index = 0;538char glyphCode = 0;539540int controlGlyph = getControlCodeGlyph(charCode, true);541if (controlGlyph >= 0) {542return (char)controlGlyph;543}544545/* presence of translation array indicates that this546* cmap is in some other (non-unicode encoding).547* In order to look-up a char->glyph mapping we need to548* translate the unicode code point to the encoding of549* the cmap.550* REMIND: VALID CHARCODES??551*/552if (xlat != null) {553charCode = xlat[charCode];554}555556/*557* Citation from the TrueType (and OpenType) spec:558* The segments are sorted in order of increasing endCode559* values, and the segment values are specified in four parallel560* arrays. You search for the first endCode that is greater than561* or equal to the character code you want to map. If the562* corresponding startCode is less than or equal to the563* character code, then you use the corresponding idDelta and564* idRangeOffset to map the character code to a glyph index565* (otherwise, the missingGlyph is returned).566*/567568/*569* CMAP format4 defines several fields for optimized search of570* the segment list (entrySelector, searchRange, rangeShift).571* However, benefits are neglible and some fonts have incorrect572* data - so we use straightforward binary search (see bug 6247425)573*/574int left = 0, right = startCount.length;575index = startCount.length >> 1;576while (left < right) {577if (endCount[index] < charCode) {578left = index + 1;579} else {580right = index;581}582index = (left + right) >> 1;583}584585if (charCode >= startCount[index] && charCode <= endCount[index]) {586int rangeOffset = idRangeOffset[index];587588if (rangeOffset == 0) {589glyphCode = (char)(charCode + idDelta[index]);590} else {591/* Calculate an index into the glyphIds array */592593/*594System.err.println("rangeoffset="+rangeOffset+595" charCode=" + charCode +596" scnt["+index+"]="+(int)startCount[index] +597" segCnt="+segCount);598*/599600int glyphIDIndex = rangeOffset - segCount + index601+ (charCode - startCount[index]);602glyphCode = glyphIds[glyphIDIndex];603if (glyphCode != 0) {604glyphCode = (char)(glyphCode + idDelta[index]);605}606}607}608if (glyphCode != 0) {609//System.err.println("cc="+Integer.toHexString((int)charCode) + " gc="+(int)glyphCode);610}611return glyphCode;612}613}614615// Format 0: Byte Encoding table616static class CMapFormat0 extends CMap {617byte [] cmap;618619CMapFormat0(ByteBuffer buffer, int offset) {620621/* skip 6 bytes of format, length, and version */622int len = buffer.getChar(offset+2);623cmap = new byte[len-6];624buffer.position(offset+6);625buffer.get(cmap);626}627628char getGlyph(int charCode) {629if (charCode < 256) {630if (charCode < 0x0010) {631switch (charCode) {632case 0x0009:633case 0x000a:634case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID;635}636}637return (char)(0xff & cmap[charCode]);638} else {639return 0;640}641}642}643644// static CMap createSymbolCMap(ByteBuffer buffer, int offset, char[] syms) {645646// CMap cmap = createCMap(buffer, offset, null);647// if (cmap == null) {648// return null;649// } else {650// return new CMapFormatSymbol(cmap, syms);651// }652// }653654// static class CMapFormatSymbol extends CMap {655656// CMap cmap;657// static final int NUM_BUCKETS = 128;658// Bucket[] buckets = new Bucket[NUM_BUCKETS];659660// class Bucket {661// char unicode;662// char glyph;663// Bucket next;664665// Bucket(char u, char g) {666// unicode = u;667// glyph = g;668// }669// }670671// CMapFormatSymbol(CMap cmap, char[] syms) {672673// this.cmap = cmap;674675// for (int i=0;i<syms.length;i++) {676// char unicode = syms[i];677// if (unicode != noSuchChar) {678// char glyph = cmap.getGlyph(i + 0xf000);679// int hash = unicode % NUM_BUCKETS;680// Bucket bucket = new Bucket(unicode, glyph);681// if (buckets[hash] == null) {682// buckets[hash] = bucket;683// } else {684// Bucket b = buckets[hash];685// while (b.next != null) {686// b = b.next;687// }688// b.next = bucket;689// }690// }691// }692// }693694// char getGlyph(int unicode) {695// if (unicode >= 0x1000) {696// return 0;697// }698// else if (unicode >=0xf000 && unicode < 0xf100) {699// return cmap.getGlyph(unicode);700// } else {701// Bucket b = buckets[unicode % NUM_BUCKETS];702// while (b != null) {703// if (b.unicode == unicode) {704// return b.glyph;705// } else {706// b = b.next;707// }708// }709// return 0;710// }711// }712// }713714// Format 2: High-byte mapping through table715static class CMapFormat2 extends CMap {716717char[] subHeaderKey = new char[256];718/* Store subheaders in individual arrays719* A SubHeader entry theortically looks like {720* char firstCode;721* char entryCount;722* short idDelta;723* char idRangeOffset;724* }725*/726char[] firstCodeArray;727char[] entryCountArray;728short[] idDeltaArray;729char[] idRangeOffSetArray;730731char[] glyphIndexArray;732733CMapFormat2(ByteBuffer buffer, int offset, char[] xlat) {734735this.xlat = xlat;736737int tableLen = buffer.getChar(offset+2);738buffer.position(offset+6);739CharBuffer cBuffer = buffer.asCharBuffer();740char maxSubHeader = 0;741for (int i=0;i<256;i++) {742subHeaderKey[i] = cBuffer.get();743if (subHeaderKey[i] > maxSubHeader) {744maxSubHeader = subHeaderKey[i];745}746}747/* The value of the subHeaderKey is 8 * the subHeader index,748* so the number of subHeaders can be obtained by dividing749* this value bv 8 and adding 1.750*/751int numSubHeaders = (maxSubHeader >> 3) +1;752firstCodeArray = new char[numSubHeaders];753entryCountArray = new char[numSubHeaders];754idDeltaArray = new short[numSubHeaders];755idRangeOffSetArray = new char[numSubHeaders];756for (int i=0; i<numSubHeaders; i++) {757firstCodeArray[i] = cBuffer.get();758entryCountArray[i] = cBuffer.get();759idDeltaArray[i] = (short)cBuffer.get();760idRangeOffSetArray[i] = cBuffer.get();761// System.out.println("sh["+i+"]:fc="+(int)firstCodeArray[i]+762// " ec="+(int)entryCountArray[i]+763// " delta="+(int)idDeltaArray[i]+764// " offset="+(int)idRangeOffSetArray[i]);765}766767int glyphIndexArrSize = (tableLen-518-numSubHeaders*8)/2;768glyphIndexArray = new char[glyphIndexArrSize];769for (int i=0; i<glyphIndexArrSize;i++) {770glyphIndexArray[i] = cBuffer.get();771}772}773774char getGlyph(int charCode) {775int controlGlyph = getControlCodeGlyph(charCode, true);776if (controlGlyph >= 0) {777return (char)controlGlyph;778}779780if (xlat != null) {781charCode = xlat[charCode];782}783784char highByte = (char)(charCode >> 8);785char lowByte = (char)(charCode & 0xff);786int key = subHeaderKey[highByte]>>3; // index into subHeaders787char mapMe;788789if (key != 0) {790mapMe = lowByte;791} else {792mapMe = highByte;793if (mapMe == 0) {794mapMe = lowByte;795}796}797798// System.err.println("charCode="+Integer.toHexString(charCode)+799// " key="+key+ " mapMe="+Integer.toHexString(mapMe));800char firstCode = firstCodeArray[key];801if (mapMe < firstCode) {802return 0;803} else {804mapMe -= firstCode;805}806807if (mapMe < entryCountArray[key]) {808/* "address" arithmetic is needed to calculate the offset809* into glyphIndexArray. "idRangeOffSetArray[key]" specifies810* the number of bytes from that location in the table where811* the subarray of glyphIndexes starting at "firstCode" begins.812* Each entry in the subHeader table is 8 bytes, and the813* idRangeOffSetArray field is at offset 6 in the entry.814* The glyphIndexArray immediately follows the subHeaders.815* So if there are "N" entries then the number of bytes to the816* start of glyphIndexArray is (N-key)*8-6.817* Subtract this from the idRangeOffSetArray value to get818* the number of bytes into glyphIndexArray and divide by 2 to819* get the (char) array index.820*/821int glyphArrayOffset = ((idRangeOffSetArray.length-key)*8)-6;822int glyphSubArrayStart =823(idRangeOffSetArray[key] - glyphArrayOffset)/2;824char glyphCode = glyphIndexArray[glyphSubArrayStart+mapMe];825if (glyphCode != 0) {826glyphCode += idDeltaArray[key]; //idDelta827return glyphCode;828}829}830return 0;831}832}833834// Format 6: Trimmed table mapping835static class CMapFormat6 extends CMap {836837char firstCode;838char entryCount;839char[] glyphIdArray;840841CMapFormat6(ByteBuffer bbuffer, int offset, char[] xlat) {842843bbuffer.position(offset+6);844CharBuffer buffer = bbuffer.asCharBuffer();845firstCode = buffer.get();846entryCount = buffer.get();847glyphIdArray = new char[entryCount];848for (int i=0; i< entryCount; i++) {849glyphIdArray[i] = buffer.get();850}851}852853char getGlyph(int charCode) {854int controlGlyph = getControlCodeGlyph(charCode, true);855if (controlGlyph >= 0) {856return (char)controlGlyph;857}858859if (xlat != null) {860charCode = xlat[charCode];861}862863charCode -= firstCode;864if (charCode < 0 || charCode >= entryCount) {865return 0;866} else {867return glyphIdArray[charCode];868}869}870}871872// Format 8: mixed 16-bit and 32-bit coverage873// Seems unlikely this code will ever get tested as we look for874// MS platform Cmaps and MS states (in the Opentype spec on their website)875// that MS doesn't support this format876static class CMapFormat8 extends CMap {877byte[] is32 = new byte[8192];878int nGroups;879int[] startCharCode;880int[] endCharCode;881int[] startGlyphID;882883CMapFormat8(ByteBuffer bbuffer, int offset, char[] xlat) {884885bbuffer.position(12);886bbuffer.get(is32);887nGroups = bbuffer.getInt() & INTMASK;888// A map group record is three uint32's making for 12 bytes total889if (bbuffer.remaining() < (12 * (long)nGroups)) {890throw new RuntimeException("Format 8 table exceeded");891}892startCharCode = new int[nGroups];893endCharCode = new int[nGroups];894startGlyphID = new int[nGroups];895}896897char getGlyph(int charCode) {898if (xlat != null) {899throw new RuntimeException("xlat array for cmap fmt=8");900}901return 0;902}903904}905906907// Format 4-byte 10: Trimmed table mapping908// Seems unlikely this code will ever get tested as we look for909// MS platform Cmaps and MS states (in the Opentype spec on their website)910// that MS doesn't support this format911static class CMapFormat10 extends CMap {912913long firstCode;914int entryCount;915char[] glyphIdArray;916917CMapFormat10(ByteBuffer bbuffer, int offset, char[] xlat) {918919bbuffer.position(offset+12);920firstCode = bbuffer.getInt() & INTMASK;921entryCount = bbuffer.getInt() & INTMASK;922// each glyph is a uint16, so 2 bytes per value.923if (bbuffer.remaining() < (2 * (long)entryCount)) {924throw new RuntimeException("Format 10 table exceeded");925}926CharBuffer buffer = bbuffer.asCharBuffer();927glyphIdArray = new char[entryCount];928for (int i=0; i< entryCount; i++) {929glyphIdArray[i] = buffer.get();930}931}932933char getGlyph(int charCode) {934935if (xlat != null) {936throw new RuntimeException("xlat array for cmap fmt=10");937}938939int code = (int)(charCode - firstCode);940if (code < 0 || code >= entryCount) {941return 0;942} else {943return glyphIdArray[code];944}945}946}947948// Format 12: Segmented coverage for UCS-4 (fonts supporting949// surrogate pairs)950static class CMapFormat12 extends CMap {951952int numGroups;953int highBit =0;954int power;955int extra;956long[] startCharCode;957long[] endCharCode;958int[] startGlyphID;959960CMapFormat12(ByteBuffer buffer, int offset, char[] xlat) {961if (xlat != null) {962throw new RuntimeException("xlat array for cmap fmt=12");963}964965buffer.position(offset+12);966numGroups = buffer.getInt() & INTMASK;967// A map group record is three uint32's making for 12 bytes total968if (buffer.remaining() < (12 * (long)numGroups)) {969throw new RuntimeException("Format 12 table exceeded");970}971startCharCode = new long[numGroups];972endCharCode = new long[numGroups];973startGlyphID = new int[numGroups];974buffer = buffer.slice();975IntBuffer ibuffer = buffer.asIntBuffer();976for (int i=0; i<numGroups; i++) {977startCharCode[i] = ibuffer.get() & INTMASK;978endCharCode[i] = ibuffer.get() & INTMASK;979startGlyphID[i] = ibuffer.get() & INTMASK;980}981982/* Finds the high bit by binary searching through the bits */983int value = numGroups;984985if (value >= 1 << 16) {986value >>= 16;987highBit += 16;988}989990if (value >= 1 << 8) {991value >>= 8;992highBit += 8;993}994995if (value >= 1 << 4) {996value >>= 4;997highBit += 4;998}9991000if (value >= 1 << 2) {1001value >>= 2;1002highBit += 2;1003}10041005if (value >= 1 << 1) {1006value >>= 1;1007highBit += 1;1008}10091010power = 1 << highBit;1011extra = numGroups - power;1012}10131014char getGlyph(int charCode) {1015int controlGlyph = getControlCodeGlyph(charCode, false);1016if (controlGlyph >= 0) {1017return (char)controlGlyph;1018}1019int probe = power;1020int range = 0;10211022if (startCharCode[extra] <= charCode) {1023range = extra;1024}10251026while (probe > 1) {1027probe >>= 1;10281029if (startCharCode[range+probe] <= charCode) {1030range += probe;1031}1032}10331034if (startCharCode[range] <= charCode &&1035endCharCode[range] >= charCode) {1036return (char)1037(startGlyphID[range] + (charCode - startCharCode[range]));1038}10391040return 0;1041}10421043}10441045/* Used to substitute for bad Cmaps. */1046static class NullCMapClass extends CMap {10471048char getGlyph(int charCode) {1049return 0;1050}1051}10521053public static final NullCMapClass theNullCmap = new NullCMapClass();10541055final int getControlCodeGlyph(int charCode, boolean noSurrogates) {1056if (charCode < 0x0010) {1057switch (charCode) {1058case 0x0009:1059case 0x000a:1060case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID;1061}1062} else if (charCode >= 0x200c) {1063if ((charCode <= 0x200f) ||1064(charCode >= 0x2028 && charCode <= 0x202e) ||1065(charCode >= 0x206a && charCode <= 0x206f)) {1066return CharToGlyphMapper.INVISIBLE_GLYPH_ID;1067} else if (noSurrogates && charCode >= 0xFFFF) {1068return 0;1069}1070}1071return -1;1072}1073}107410751076