CoCalc -- collationdatawriter.cpp

GitHub Repository: wine-mirror/wine
Path: blob/master/libs/icui18n/collationdatawriter.cpp
¹²³⁴³ views
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 2013-2015, International Business Machines
6
* Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
* collationdatawriter.cpp
9
*
10
* created on: 2013aug06
11
* created by: Markus W. Scherer
12
*/
13

14
#include "unicode/utypes.h"
15

16
#if !UCONFIG_NO_COLLATION
17

18
#include "unicode/tblcoll.h"
19
#include "unicode/udata.h"
20
#include "unicode/uniset.h"
21
#include "cmemory.h"
22
#include "collationdata.h"
23
#include "collationdatabuilder.h"
24
#include "collationdatareader.h"
25
#include "collationdatawriter.h"
26
#include "collationfastlatin.h"
27
#include "collationsettings.h"
28
#include "collationtailoring.h"
29
#include "uassert.h"
30
#include "ucmndata.h"
31

32
U_NAMESPACE_BEGIN
33

34
uint8_t *
35
RuleBasedCollator::cloneRuleData(int32_t &length, UErrorCode &errorCode) const {
36
    if(U_FAILURE(errorCode)) { return NULL; }
37
    LocalMemory<uint8_t> buffer((uint8_t *)uprv_malloc(20000));
38
    if(buffer.isNull()) {
39
        errorCode = U_MEMORY_ALLOCATION_ERROR;
40
        return NULL;
41
    }
42
    length = cloneBinary(buffer.getAlias(), 20000, errorCode);
43
    if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
44
        if(buffer.allocateInsteadAndCopy(length, 0) == NULL) {
45
            errorCode = U_MEMORY_ALLOCATION_ERROR;
46
            return NULL;
47
        }
48
        errorCode = U_ZERO_ERROR;
49
        length = cloneBinary(buffer.getAlias(), length, errorCode);
50
    }
51
    if(U_FAILURE(errorCode)) { return NULL; }
52
    return buffer.orphan();
53
}
54

55
int32_t
56
RuleBasedCollator::cloneBinary(uint8_t *dest, int32_t capacity, UErrorCode &errorCode) const {
57
    int32_t indexes[CollationDataReader::IX_TOTAL_SIZE + 1];
58
    return CollationDataWriter::writeTailoring(
59
            *tailoring, *settings, indexes, dest, capacity,
60
            errorCode);
61
}
62

63
static const UDataInfo dataInfo = {
64
    sizeof(UDataInfo),
65
    0,
66

67
    U_IS_BIG_ENDIAN,
68
    U_CHARSET_FAMILY,
69
    U_SIZEOF_UCHAR,
70
    0,
71

72
    { 0x55, 0x43, 0x6f, 0x6c },         // dataFormat="UCol"
73
    { 5, 0, 0, 0 },                     // formatVersion
74
    { 6, 3, 0, 0 }                      // dataVersion
75
};
76

77
int32_t
78
CollationDataWriter::writeBase(const CollationData &data, const CollationSettings &settings,
79
                               const void *rootElements, int32_t rootElementsLength,
80
                               int32_t indexes[], uint8_t *dest, int32_t capacity,
81
                               UErrorCode &errorCode) {
82
    return write(true, NULL,
83
                 data, settings,
84
                 rootElements, rootElementsLength,
85
                 indexes, dest, capacity, errorCode);
86
}
87

88
int32_t
89
CollationDataWriter::writeTailoring(const CollationTailoring &t, const CollationSettings &settings,
90
                                    int32_t indexes[], uint8_t *dest, int32_t capacity,
91
                                    UErrorCode &errorCode) {
92
    return write(false, t.version,
93
                 *t.data, settings,
94
                 NULL, 0,
95
                 indexes, dest, capacity, errorCode);
96
}
97

98
int32_t
99
CollationDataWriter::write(UBool isBase, const UVersionInfo dataVersion,
100
                           const CollationData &data, const CollationSettings &settings,
101
                           const void *rootElements, int32_t rootElementsLength,
102
                           int32_t indexes[], uint8_t *dest, int32_t capacity,
103
                           UErrorCode &errorCode) {
104
    if(U_FAILURE(errorCode)) { return 0; }
105
    if(capacity < 0 || (capacity > 0 && dest == NULL)) {
106
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
107
        return 0;
108
    }
109

110
    // Figure out which data items to write before settling on
111
    // the indexes length and writing offsets.
112
    // For any data item, we need to write the start and limit offsets,
113
    // so the indexes length must be at least index-of-start-offset + 2.
114
    int32_t indexesLength;
115
    UBool hasMappings;
116
    UnicodeSet unsafeBackwardSet;
117
    const CollationData *baseData = data.base;
118

119
    int32_t fastLatinVersion;
120
    if(data.fastLatinTable != NULL) {
121
        fastLatinVersion = (int32_t)CollationFastLatin::VERSION << 16;
122
    } else {
123
        fastLatinVersion = 0;
124
    }
125
    int32_t fastLatinTableLength = 0;
126

127
    if(isBase) {
128
        // For the root collator, we write an even number of indexes
129
        // so that we start with an 8-aligned offset.
130
        indexesLength = CollationDataReader::IX_TOTAL_SIZE + 1;
131
        U_ASSERT(settings.reorderCodesLength == 0);
132
        hasMappings = true;
133
        unsafeBackwardSet = *data.unsafeBackwardSet;
134
        fastLatinTableLength = data.fastLatinTableLength;
135
    } else if(baseData == NULL) {
136
        hasMappings = false;
137
        if(settings.reorderCodesLength == 0) {
138
            // only options
139
            indexesLength = CollationDataReader::IX_OPTIONS + 1;  // no limit offset here
140
        } else {
141
            // only options, reorder codes, and the reorder table
142
            indexesLength = CollationDataReader::IX_REORDER_TABLE_OFFSET + 2;
143
        }
144
    } else {
145
        hasMappings = true;
146
        // Tailored mappings, and what else?
147
        // Check in ascending order of optional tailoring data items.
148
        indexesLength = CollationDataReader::IX_CE32S_OFFSET + 2;
149
        if(data.contextsLength != 0) {
150
            indexesLength = CollationDataReader::IX_CONTEXTS_OFFSET + 2;
151
        }
152
        unsafeBackwardSet.addAll(*data.unsafeBackwardSet).removeAll(*baseData->unsafeBackwardSet);
153
        if(!unsafeBackwardSet.isEmpty()) {
154
            indexesLength = CollationDataReader::IX_UNSAFE_BWD_OFFSET + 2;
155
        }
156
        if(data.fastLatinTable != baseData->fastLatinTable) {
157
            fastLatinTableLength = data.fastLatinTableLength;
158
            indexesLength = CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET + 2;
159
        }
160
    }
161

162
    UVector32 codesAndRanges(errorCode);
163
    const int32_t *reorderCodes = settings.reorderCodes;
164
    int32_t reorderCodesLength = settings.reorderCodesLength;
165
    if(settings.hasReordering() &&
166
            CollationSettings::reorderTableHasSplitBytes(settings.reorderTable)) {
167
        // Rebuild the full list of reorder ranges.
168
        // The list in the settings is truncated for efficiency.
169
        data.makeReorderRanges(reorderCodes, reorderCodesLength, codesAndRanges, errorCode);
170
        // Write the codes, then the ranges.
171
        for(int32_t i = 0; i < reorderCodesLength; ++i) {
172
            codesAndRanges.insertElementAt(reorderCodes[i], i, errorCode);
173
        }
174
        if(U_FAILURE(errorCode)) { return 0; }
175
        reorderCodes = codesAndRanges.getBuffer();
176
        reorderCodesLength = codesAndRanges.size();
177
    }
178

179
    int32_t headerSize;
180
    if(isBase) {
181
        headerSize = 0;  // udata_create() writes the header
182
    } else {
183
        DataHeader header;
184
        header.dataHeader.magic1 = 0xda;
185
        header.dataHeader.magic2 = 0x27;
186
        uprv_memcpy(&header.info, &dataInfo, sizeof(UDataInfo));
187
        uprv_memcpy(header.info.dataVersion, dataVersion, sizeof(UVersionInfo));
188
        headerSize = (int32_t)sizeof(header);
189
        U_ASSERT((headerSize & 3) == 0);  // multiple of 4 bytes
190
        if(hasMappings && data.cesLength != 0) {
191
            // Sum of the sizes of the data items which are
192
            // not automatically multiples of 8 bytes and which are placed before the CEs.
193
            int32_t sum = headerSize + (indexesLength + reorderCodesLength) * 4;
194
            if((sum & 7) != 0) {
195
                // We need to add padding somewhere so that the 64-bit CEs are 8-aligned.
196
                // We add to the header size here.
197
                // Alternatively, we could increment the indexesLength
198
                // or add a few bytes to the reorderTable.
199
                headerSize += 4;
200
            }
201
        }
202
        header.dataHeader.headerSize = (uint16_t)headerSize;
203
        if(headerSize <= capacity) {
204
            uprv_memcpy(dest, &header, sizeof(header));
205
            // Write 00 bytes so that the padding is not mistaken for a copyright string.
206
            uprv_memset(dest + sizeof(header), 0, headerSize - (int32_t)sizeof(header));
207
            dest += headerSize;
208
            capacity -= headerSize;
209
        } else {
210
            dest = NULL;
211
            capacity = 0;
212
        }
213
    }
214

215
    indexes[CollationDataReader::IX_INDEXES_LENGTH] = indexesLength;
216
    U_ASSERT((settings.options & ~0xffff) == 0);
217
    indexes[CollationDataReader::IX_OPTIONS] =
218
            data.numericPrimary | fastLatinVersion | settings.options;
219
    indexes[CollationDataReader::IX_RESERVED2] = 0;
220
    indexes[CollationDataReader::IX_RESERVED3] = 0;
221

222
    // Byte offsets of data items all start from the start of the indexes.
223
    // We add the headerSize at the very end.
224
    int32_t totalSize = indexesLength * 4;
225

226
    if(hasMappings && (isBase || data.jamoCE32s != baseData->jamoCE32s)) {
227
        indexes[CollationDataReader::IX_JAMO_CE32S_START] = static_cast<int32_t>(data.jamoCE32s - data.ce32s);
228
    } else {
229
        indexes[CollationDataReader::IX_JAMO_CE32S_START] = -1;
230
    }
231

232
    indexes[CollationDataReader::IX_REORDER_CODES_OFFSET] = totalSize;
233
    totalSize += reorderCodesLength * 4;
234

235
    indexes[CollationDataReader::IX_REORDER_TABLE_OFFSET] = totalSize;
236
    if(settings.reorderTable != NULL) {
237
        totalSize += 256;
238
    }
239

240
    indexes[CollationDataReader::IX_TRIE_OFFSET] = totalSize;
241
    if(hasMappings) {
242
        UErrorCode errorCode2 = U_ZERO_ERROR;
243
        int32_t length;
244
        if(totalSize < capacity) {
245
            length = utrie2_serialize(data.trie, dest + totalSize,
246
                                      capacity - totalSize, &errorCode2);
247
        } else {
248
            length = utrie2_serialize(data.trie, NULL, 0, &errorCode2);
249
        }
250
        if(U_FAILURE(errorCode2) && errorCode2 != U_BUFFER_OVERFLOW_ERROR) {
251
            errorCode = errorCode2;
252
            return 0;
253
        }
254
        // The trie size should be a multiple of 8 bytes due to the way
255
        // compactIndex2(UNewTrie2 *trie) currently works.
256
        U_ASSERT((length & 7) == 0);
257
        totalSize += length;
258
    }
259

260
    indexes[CollationDataReader::IX_RESERVED8_OFFSET] = totalSize;
261
    indexes[CollationDataReader::IX_CES_OFFSET] = totalSize;
262
    if(hasMappings && data.cesLength != 0) {
263
        U_ASSERT(((headerSize + totalSize) & 7) == 0);
264
        totalSize += data.cesLength * 8;
265
    }
266

267
    indexes[CollationDataReader::IX_RESERVED10_OFFSET] = totalSize;
268
    indexes[CollationDataReader::IX_CE32S_OFFSET] = totalSize;
269
    if(hasMappings) {
270
        totalSize += data.ce32sLength * 4;
271
    }
272

273
    indexes[CollationDataReader::IX_ROOT_ELEMENTS_OFFSET] = totalSize;
274
    totalSize += rootElementsLength * 4;
275

276
    indexes[CollationDataReader::IX_CONTEXTS_OFFSET] = totalSize;
277
    if(hasMappings) {
278
        totalSize += data.contextsLength * 2;
279
    }
280

281
    indexes[CollationDataReader::IX_UNSAFE_BWD_OFFSET] = totalSize;
282
    if(hasMappings && !unsafeBackwardSet.isEmpty()) {
283
        UErrorCode errorCode2 = U_ZERO_ERROR;
284
        int32_t length;
285
        if(totalSize < capacity) {
286
            uint16_t *p = reinterpret_cast<uint16_t *>(dest + totalSize);
287
            length = unsafeBackwardSet.serialize(
288
                    p, (capacity - totalSize) / 2, errorCode2);
289
        } else {
290
            length = unsafeBackwardSet.serialize(NULL, 0, errorCode2);
291
        }
292
        if(U_FAILURE(errorCode2) && errorCode2 != U_BUFFER_OVERFLOW_ERROR) {
293
            errorCode = errorCode2;
294
            return 0;
295
        }
296
        totalSize += length * 2;
297
    }
298

299
    indexes[CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET] = totalSize;
300
    totalSize += fastLatinTableLength * 2;
301

302
    UnicodeString scripts;
303
    indexes[CollationDataReader::IX_SCRIPTS_OFFSET] = totalSize;
304
    if(isBase) {
305
        scripts.append((UChar)data.numScripts);
306
        scripts.append(reinterpret_cast<const UChar *>(data.scriptsIndex), data.numScripts + 16);
307
        scripts.append(reinterpret_cast<const UChar *>(data.scriptStarts), data.scriptStartsLength);
308
        totalSize += scripts.length() * 2;
309
    }
310

311
    indexes[CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET] = totalSize;
312
    if(isBase) {
313
        totalSize += 256;
314
    }
315

316
    indexes[CollationDataReader::IX_RESERVED18_OFFSET] = totalSize;
317
    indexes[CollationDataReader::IX_TOTAL_SIZE] = totalSize;
318

319
    if(totalSize > capacity) {
320
        errorCode = U_BUFFER_OVERFLOW_ERROR;
321
        return headerSize + totalSize;
322
    }
323

324
    uprv_memcpy(dest, indexes, indexesLength * 4);
325
    copyData(indexes, CollationDataReader::IX_REORDER_CODES_OFFSET, reorderCodes, dest);
326
    copyData(indexes, CollationDataReader::IX_REORDER_TABLE_OFFSET, settings.reorderTable, dest);
327
    // The trie has already been serialized into the dest buffer.
328
    copyData(indexes, CollationDataReader::IX_CES_OFFSET, data.ces, dest);
329
    copyData(indexes, CollationDataReader::IX_CE32S_OFFSET, data.ce32s, dest);
330
    copyData(indexes, CollationDataReader::IX_ROOT_ELEMENTS_OFFSET, rootElements, dest);
331
    copyData(indexes, CollationDataReader::IX_CONTEXTS_OFFSET, data.contexts, dest);
332
    // The unsafeBackwardSet has already been serialized into the dest buffer.
333
    copyData(indexes, CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET, data.fastLatinTable, dest);
334
    copyData(indexes, CollationDataReader::IX_SCRIPTS_OFFSET, scripts.getBuffer(), dest);
335
    copyData(indexes, CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET, data.compressibleBytes, dest);
336

337
    return headerSize + totalSize;
338
}
339

340
void
341
CollationDataWriter::copyData(const int32_t indexes[], int32_t startIndex,
342
                              const void *src, uint8_t *dest) {
343
    int32_t start = indexes[startIndex];
344
    int32_t limit = indexes[startIndex + 1];
345
    if(start < limit) {
346
        uprv_memcpy(dest + start, src, limit - start);
347
    }
348
}
349

350
U_NAMESPACE_END
351

352
#endif  // !UCONFIG_NO_COLLATION
353

354
Product

Resources

Company