CoCalc -- BreakDictionary.java

GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/classes/sun/util/locale/provider/BreakDictionary.java
⁴⁷⁸⁰⁶ views
1
/*
2
 * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.  Oracle designates this
8
 * particular file as subject to the "Classpath" exception as provided
9
 * by Oracle in the LICENSE file that accompanied this code.
10
 *
11
 * This code is distributed in the hope that it will be useful, but WITHOUT
12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14
 * version 2 for more details (a copy is included in the LICENSE file that
15
 * accompanied this code).
16
 *
17
 * You should have received a copy of the GNU General Public License version
18
 * 2 along with this work; if not, write to the Free Software Foundation,
19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
 *
21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
 * or visit www.oracle.com if you need additional information or have any
23
 * questions.
24
 */
25

26
/*
27
 *
28
 * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
29
 * (C) Copyright IBM Corp. 1996 - 2002 - All Rights Reserved
30
 *
31
 * The original version of this source code and documentation
32
 * is copyrighted and owned by Taligent, Inc., a wholly-owned
33
 * subsidiary of IBM. These materials are provided under terms
34
 * of a License Agreement between Taligent and Sun. This technology
35
 * is protected by multiple US and International patents.
36
 *
37
 * This notice and attribution to Taligent may not be removed.
38
 * Taligent is a registered trademark of Taligent, Inc.
39
 */
40
package sun.util.locale.provider;
41

42
import java.io.BufferedInputStream;
43
import java.io.IOException;
44
import java.security.AccessController;
45
import java.security.PrivilegedActionException;
46
import java.security.PrivilegedExceptionAction;
47
import java.util.MissingResourceException;
48
import sun.text.CompactByteArray;
49
import sun.text.SupplementaryCharacterData;
50

51
/**
52
 * This is the class that represents the list of known words used by
53
 * DictionaryBasedBreakIterator.  The conceptual data structure used
54
 * here is a trie: there is a node hanging off the root node for every
55
 * letter that can start a word.  Each of these nodes has a node hanging
56
 * off of it for every letter that can be the second letter of a word
57
 * if this node is the first letter, and so on.  The trie is represented
58
 * as a two-dimensional array that can be treated as a table of state
59
 * transitions.  Indexes are used to compress this array, taking
60
 * advantage of the fact that this array will always be very sparse.
61
 */
62
class BreakDictionary {
63

64
    //=========================================================================
65
    // data members
66
    //=========================================================================
67

68
    /**
69
      * The version of the dictionary that was read in.
70
      */
71
    private static int supportedVersion = 1;
72

73
    /**
74
     * Maps from characters to column numbers.  The main use of this is to
75
     * avoid making room in the array for empty columns.
76
     */
77
    private CompactByteArray columnMap = null;
78
    private SupplementaryCharacterData supplementaryCharColumnMap = null;
79

80
    /**
81
     * The number of actual columns in the table
82
     */
83
    private int numCols;
84

85
    /**
86
     * Columns are organized into groups of 32.  This says how many
87
     * column groups.  (We could calculate this, but we store the
88
     * value to avoid having to repeatedly calculate it.)
89
     */
90
    private int numColGroups;
91

92
    /**
93
     * The actual compressed state table.  Each conceptual row represents
94
     * a state, and the cells in it contain the row numbers of the states
95
     * to transition to for each possible letter.  0 is used to indicate
96
     * an illegal combination of letters (i.e., the error state).  The
97
     * table is compressed by eliminating all the unpopulated (i.e., zero)
98
     * cells.  Multiple conceptual rows can then be doubled up in a single
99
     * physical row by sliding them up and possibly shifting them to one
100
     * side or the other so the populated cells don't collide.  Indexes
101
     * are used to identify unpopulated cells and to locate populated cells.
102
     */
103
    private short[] table = null;
104

105
    /**
106
     * This index maps logical row numbers to physical row numbers
107
     */
108
    private short[] rowIndex = null;
109

110
    /**
111
     * A bitmap is used to tell which cells in the comceptual table are
112
     * populated.  This array contains all the unique bit combinations
113
     * in that bitmap.  If the table is more than 32 columns wide,
114
     * successive entries in this array are used for a single row.
115
     */
116
    private int[] rowIndexFlags = null;
117

118
    /**
119
     * This index maps from a logical row number into the bitmap table above.
120
     * (This keeps us from storing duplicate bitmap combinations.)  Since there
121
     * are a lot of rows with only one populated cell, instead of wasting space
122
     * in the bitmap table, we just store a negative number in this index for
123
     * rows with one populated cell.  The absolute value of that number is
124
     * the column number of the populated cell.
125
     */
126
    private short[] rowIndexFlagsIndex = null;
127

128
    /**
129
     * For each logical row, this index contains a constant that is added to
130
     * the logical column number to get the physical column number
131
     */
132
    private byte[] rowIndexShifts = null;
133

134
    //=========================================================================
135
    // deserialization
136
    //=========================================================================
137

138
    BreakDictionary(String dictionaryName)
139
        throws IOException, MissingResourceException {
140

141
        readDictionaryFile(dictionaryName);
142
    }
143

144
    private void readDictionaryFile(final String dictionaryName)
145
        throws IOException, MissingResourceException {
146

147
        BufferedInputStream in;
148
        try {
149
            in = AccessController.doPrivileged(
150
                new PrivilegedExceptionAction<BufferedInputStream>() {
151
                    @Override
152
                    public BufferedInputStream run() throws Exception {
153
                        return new BufferedInputStream(getClass().getResourceAsStream("/sun/text/resources/" + dictionaryName));
154
                    }
155
                }
156
            );
157
        }
158
        catch (PrivilegedActionException e) {
159
            throw new InternalError(e.toString(), e);
160
        }
161

162
        byte[] buf = new byte[8];
163
        if (in.read(buf) != 8) {
164
            throw new MissingResourceException("Wrong data length",
165
                                               dictionaryName, "");
166
        }
167

168
        // check version
169
        int version = RuleBasedBreakIterator.getInt(buf, 0);
170
        if (version != supportedVersion) {
171
            throw new MissingResourceException("Dictionary version(" + version + ") is unsupported",
172
                                                           dictionaryName, "");
173
        }
174

175
        // get data size
176
        int len = RuleBasedBreakIterator.getInt(buf, 4);
177
        buf = new byte[len];
178
        if (in.read(buf) != len) {
179
            throw new MissingResourceException("Wrong data length",
180
                                               dictionaryName, "");
181
        }
182

183
        // close the stream
184
        in.close();
185

186
        int l;
187
        int offset = 0;
188

189
        // read in the column map for BMP characteres (this is serialized in
190
        // its internal form: an index array followed by a data array)
191
        l = RuleBasedBreakIterator.getInt(buf, offset);
192
        offset += 4;
193
        short[] temp = new short[l];
194
        for (int i = 0; i < l; i++, offset+=2) {
195
            temp[i] = RuleBasedBreakIterator.getShort(buf, offset);
196
        }
197
        l = RuleBasedBreakIterator.getInt(buf, offset);
198
        offset += 4;
199
        byte[] temp2 = new byte[l];
200
        for (int i = 0; i < l; i++, offset++) {
201
            temp2[i] = buf[offset];
202
        }
203
        columnMap = new CompactByteArray(temp, temp2);
204

205
        // read in numCols and numColGroups
206
        numCols = RuleBasedBreakIterator.getInt(buf, offset);
207
        offset += 4;
208
        numColGroups = RuleBasedBreakIterator.getInt(buf, offset);
209
        offset += 4;
210

211
        // read in the row-number index
212
        l = RuleBasedBreakIterator.getInt(buf, offset);
213
        offset += 4;
214
        rowIndex = new short[l];
215
        for (int i = 0; i < l; i++, offset+=2) {
216
            rowIndex[i] = RuleBasedBreakIterator.getShort(buf, offset);
217
        }
218

219
        // load in the populated-cells bitmap: index first, then bitmap list
220
        l = RuleBasedBreakIterator.getInt(buf, offset);
221
        offset += 4;
222
        rowIndexFlagsIndex = new short[l];
223
        for (int i = 0; i < l; i++, offset+=2) {
224
            rowIndexFlagsIndex[i] = RuleBasedBreakIterator.getShort(buf, offset);
225
        }
226
        l = RuleBasedBreakIterator.getInt(buf, offset);
227
        offset += 4;
228
        rowIndexFlags = new int[l];
229
        for (int i = 0; i < l; i++, offset+=4) {
230
            rowIndexFlags[i] = RuleBasedBreakIterator.getInt(buf, offset);
231
        }
232

233
        // load in the row-shift index
234
        l = RuleBasedBreakIterator.getInt(buf, offset);
235
        offset += 4;
236
        rowIndexShifts = new byte[l];
237
        for (int i = 0; i < l; i++, offset++) {
238
            rowIndexShifts[i] = buf[offset];
239
        }
240

241
        // load in the actual state table
242
        l = RuleBasedBreakIterator.getInt(buf, offset);
243
        offset += 4;
244
        table = new short[l];
245
        for (int i = 0; i < l; i++, offset+=2) {
246
            table[i] = RuleBasedBreakIterator.getShort(buf, offset);
247
        }
248

249
        // finally, prepare the column map for supplementary characters
250
        l = RuleBasedBreakIterator.getInt(buf, offset);
251
        offset += 4;
252
        int[] temp3 = new int[l];
253
        for (int i = 0; i < l; i++, offset+=4) {
254
            temp3[i] = RuleBasedBreakIterator.getInt(buf, offset);
255
        }
256
        supplementaryCharColumnMap = new SupplementaryCharacterData(temp3);
257
    }
258

259
    //=========================================================================
260
    // access to the words
261
    //=========================================================================
262

263
    /**
264
     * Uses the column map to map the character to a column number, then
265
     * passes the row and column number to getNextState()
266
     * @param row The current state
267
     * @param ch The character whose column we're interested in
268
     * @return The new state to transition to
269
     */
270
    public final short getNextStateFromCharacter(int row, int ch) {
271
        int col;
272
        if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
273
            col = columnMap.elementAt((char)ch);
274
        } else {
275
            col = supplementaryCharColumnMap.getValue(ch);
276
        }
277
        return getNextState(row, col);
278
    }
279

280
    /**
281
     * Returns the value in the cell with the specified (logical) row and
282
     * column numbers.  In DictionaryBasedBreakIterator, the row number is
283
     * a state number, the column number is an input, and the return value
284
     * is the row number of the new state to transition to.  (0 is the
285
     * "error" state, and -1 is the "end of word" state in a dictionary)
286
     * @param row The row number of the current state
287
     * @param col The column number of the input character (0 means "not a
288
     * dictionary character")
289
     * @return The row number of the new state to transition to
290
     */
291
    public final short getNextState(int row, int col) {
292
        if (cellIsPopulated(row, col)) {
293
            // we map from logical to physical row number by looking up the
294
            // mapping in rowIndex; we map from logical column number to
295
            // physical column number by looking up a shift value for this
296
            // logical row and offsetting the logical column number by
297
            // the shift amount.  Then we can use internalAt() to actually
298
            // get the value out of the table.
299
            return internalAt(rowIndex[row], col + rowIndexShifts[row]);
300
        }
301
        else {
302
            return 0;
303
        }
304
    }
305

306
    /**
307
     * Given (logical) row and column numbers, returns true if the
308
     * cell in that position is populated
309
     */
310
    private boolean cellIsPopulated(int row, int col) {
311
        // look up the entry in the bitmap index for the specified row.
312
        // If it's a negative number, it's the column number of the only
313
        // populated cell in the row
314
        if (rowIndexFlagsIndex[row] < 0) {
315
            return col == -rowIndexFlagsIndex[row];
316
        }
317

318
        // if it's a positive number, it's the offset of an entry in the bitmap
319
        // list.  If the table is more than 32 columns wide, the bitmap is stored
320
        // successive entries in the bitmap list, so we have to divide the column
321
        // number by 32 and offset the number we got out of the index by the result.
322
        // Once we have the appropriate piece of the bitmap, test the appropriate
323
        // bit and return the result.
324
        else {
325
            int flags = rowIndexFlags[rowIndexFlagsIndex[row] + (col >> 5)];
326
            return (flags & (1 << (col & 0x1f))) != 0;
327
        }
328
    }
329

330
    /**
331
     * Implementation of getNextState() when we know the specified cell is
332
     * populated.
333
     * @param row The PHYSICAL row number of the cell
334
     * @param col The PHYSICAL column number of the cell
335
     * @return The value stored in the cell
336
     */
337
    private short internalAt(int row, int col) {
338
        // the table is a one-dimensional array, so this just does the math necessary
339
        // to treat it as a two-dimensional array (we don't just use a two-dimensional
340
        // array because two-dimensional arrays are inefficient in Java)
341
        return table[row * numCols + col];
342
    }
343
}
344

345
Product

Resources

Company