Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/classes/sun/text/normalizer/CharTrie.java
38830 views
/*1* Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/24/*25*******************************************************************************26* (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved *27* *28* The original version of this source code and documentation is copyrighted *29* and owned by IBM, These materials are provided under terms of a License *30* Agreement between IBM and Sun. This technology is protected by multiple *31* US and International patents. This notice and attribution to IBM may not *32* to removed. *33*******************************************************************************34*/3536package sun.text.normalizer;3738import java.io.InputStream;39import java.io.DataInputStream;40import java.io.IOException;4142/**43* Trie implementation which stores data in char, 16 bits.44* @author synwee45* @see com.ibm.icu.impl.Trie46* @since release 2.1, Jan 01 200247*/4849// note that i need to handle the block calculations later, since chartrie50// in icu4c uses the same index array.51public class CharTrie extends Trie52{53// public constructors ---------------------------------------------5455/**56* <p>Creates a new Trie with the settings for the trie data.</p>57* <p>Unserialize the 32-bit-aligned input stream and use the data for the58* trie.</p>59* @param inputStream file input stream to a ICU data file, containing60* the trie61* @param dataManipulate object which provides methods to parse the char62* data63* @throws IOException thrown when data reading fails64* @draft 2.165*/66public CharTrie(InputStream inputStream,67DataManipulate dataManipulate) throws IOException68{69super(inputStream, dataManipulate);7071if (!isCharTrie()) {72throw new IllegalArgumentException(73"Data given does not belong to a char trie.");74}75m_friendAgent_ = new FriendAgent();76}7778/**79* Make a dummy CharTrie.80* A dummy trie is an empty runtime trie, used when a real data trie cannot81* be loaded.82*83* The trie always returns the initialValue,84* or the leadUnitValue for lead surrogate code points.85* The Latin-1 part is always set up to be linear.86*87* @param initialValue the initial value that is set for all code points88* @param leadUnitValue the value for lead surrogate code _units_ that do not89* have associated supplementary data90* @param dataManipulate object which provides methods to parse the char data91*/92public CharTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) {93super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate);9495int dataLength, latin1Length, i, limit;96char block;9798/* calculate the actual size of the dummy trie data */99100/* max(Latin-1, block 0) */101dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH;102if(leadUnitValue!=initialValue) {103dataLength+=DATA_BLOCK_LENGTH;104}105m_data_=new char[dataLength];106m_dataLength_=dataLength;107108m_initialValue_=(char)initialValue;109110/* fill the index and data arrays */111112/* indexes are preset to 0 (block 0) */113114/* Latin-1 data */115for(i=0; i<latin1Length; ++i) {116m_data_[i]=(char)initialValue;117}118119if(leadUnitValue!=initialValue) {120/* indexes for lead surrogate code units to the block after Latin-1 */121block=(char)(latin1Length>>INDEX_STAGE_2_SHIFT_);122i=0xd800>>INDEX_STAGE_1_SHIFT_;123limit=0xdc00>>INDEX_STAGE_1_SHIFT_;124for(; i<limit; ++i) {125m_index_[i]=block;126}127128/* data for lead surrogate code units */129limit=latin1Length+DATA_BLOCK_LENGTH;130for(i=latin1Length; i<limit; ++i) {131m_data_[i]=(char)leadUnitValue;132}133}134135m_friendAgent_ = new FriendAgent();136}137138/**139* Java friend implementation140*/141public class FriendAgent142{143/**144* Gives out the index array of the trie145* @return index array of trie146*/147public char[] getPrivateIndex()148{149return m_index_;150}151/**152* Gives out the data array of the trie153* @return data array of trie154*/155public char[] getPrivateData()156{157return m_data_;158}159/**160* Gives out the data offset in the trie161* @return data offset in the trie162*/163public int getPrivateInitialValue()164{165return m_initialValue_;166}167}168169// public methods --------------------------------------------------170171/**172* Java friend implementation173* To store the index and data array into the argument.174* @param friend java friend UCharacterProperty object to store the array175*/176public void putIndexData(UCharacterProperty friend)177{178friend.setIndexData(m_friendAgent_);179}180181/**182* Gets the value associated with the codepoint.183* If no value is associated with the codepoint, a default value will be184* returned.185* @param ch codepoint186* @return offset to data187* @draft 2.1188*/189public final char getCodePointValue(int ch)190{191int offset;192193// fastpath for U+0000..U+D7FF194if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {195// copy of getRawOffset()196offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)197+ (ch & INDEX_STAGE_3_MASK_);198return m_data_[offset];199}200201// handle U+D800..U+10FFFF202offset = getCodePointOffset(ch);203204// return -1 if there is an error, in this case we return the default205// value: m_initialValue_206return (offset >= 0) ? m_data_[offset] : m_initialValue_;207}208209/**210* Gets the value to the data which this lead surrogate character points211* to.212* Returned data may contain folding offset information for the next213* trailing surrogate character.214* This method does not guarantee correct results for trail surrogates.215* @param ch lead surrogate character216* @return data value217* @draft 2.1218*/219public final char getLeadValue(char ch)220{221return m_data_[getLeadOffset(ch)];222}223224/**225* Get the value associated with a pair of surrogates.226* @param lead a lead surrogate227* @param trail a trail surrogate228* @draft 2.1229*/230public final char getSurrogateValue(char lead, char trail)231{232int offset = getSurrogateOffset(lead, trail);233if (offset > 0) {234return m_data_[offset];235}236return m_initialValue_;237}238239/**240* <p>Get a value from a folding offset (from the value of a lead surrogate)241* and a trail surrogate.</p>242* <p>If the243* @param leadvalue value associated with the lead surrogate which contains244* the folding offset245* @param trail surrogate246* @return trie data value associated with the trail character247* @draft 2.1248*/249public final char getTrailValue(int leadvalue, char trail)250{251if (m_dataManipulate_ == null) {252throw new NullPointerException(253"The field DataManipulate in this Trie is null");254}255int offset = m_dataManipulate_.getFoldingOffset(leadvalue);256if (offset > 0) {257return m_data_[getRawOffset(offset,258(char)(trail & SURROGATE_MASK_))];259}260return m_initialValue_;261}262263// protected methods -----------------------------------------------264265/**266* <p>Parses the input stream and stores its trie content into a index and267* data array</p>268* @param inputStream data input stream containing trie data269* @exception IOException thrown when data reading fails270*/271protected final void unserialize(InputStream inputStream)272throws IOException273{274DataInputStream input = new DataInputStream(inputStream);275int indexDataLength = m_dataOffset_ + m_dataLength_;276m_index_ = new char[indexDataLength];277for (int i = 0; i < indexDataLength; i ++) {278m_index_[i] = input.readChar();279}280m_data_ = m_index_;281m_initialValue_ = m_data_[m_dataOffset_];282}283284/**285* Gets the offset to the data which the surrogate pair points to.286* @param lead lead surrogate287* @param trail trailing surrogate288* @return offset to data289* @draft 2.1290*/291protected final int getSurrogateOffset(char lead, char trail)292{293if (m_dataManipulate_ == null) {294throw new NullPointerException(295"The field DataManipulate in this Trie is null");296}297298// get fold position for the next trail surrogate299int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));300301// get the real data from the folded lead/trail units302if (offset > 0) {303return getRawOffset(offset, (char)(trail & SURROGATE_MASK_));304}305306// return -1 if there is an error, in this case we return the default307// value: m_initialValue_308return -1;309}310311/**312* Gets the value at the argument index.313* For use internally in TrieIterator.314* @param index value at index will be retrieved315* @return 32 bit value316* @see com.ibm.icu.impl.TrieIterator317* @draft 2.1318*/319protected final int getValue(int index)320{321return m_data_[index];322}323324/**325* Gets the default initial value326* @return 32 bit value327* @draft 2.1328*/329protected final int getInitialValue()330{331return m_initialValue_;332}333334// private data members --------------------------------------------335336/**337* Default value338*/339private char m_initialValue_;340/**341* Array of char data342*/343private char m_data_[];344/**345* Agent for friends346*/347private FriendAgent m_friendAgent_;348}349350351