Path: blob/master/libs/icui18n/collationdatawriter.cpp
12343 views
// © 2016 and later: Unicode, Inc. and others.1// License & terms of use: http://www.unicode.org/copyright.html2/*3*******************************************************************************4* Copyright (C) 2013-2015, International Business Machines5* Corporation and others. All Rights Reserved.6*******************************************************************************7* collationdatawriter.cpp8*9* created on: 2013aug0610* created by: Markus W. Scherer11*/1213#include "unicode/utypes.h"1415#if !UCONFIG_NO_COLLATION1617#include "unicode/tblcoll.h"18#include "unicode/udata.h"19#include "unicode/uniset.h"20#include "cmemory.h"21#include "collationdata.h"22#include "collationdatabuilder.h"23#include "collationdatareader.h"24#include "collationdatawriter.h"25#include "collationfastlatin.h"26#include "collationsettings.h"27#include "collationtailoring.h"28#include "uassert.h"29#include "ucmndata.h"3031U_NAMESPACE_BEGIN3233uint8_t *34RuleBasedCollator::cloneRuleData(int32_t &length, UErrorCode &errorCode) const {35if(U_FAILURE(errorCode)) { return NULL; }36LocalMemory<uint8_t> buffer((uint8_t *)uprv_malloc(20000));37if(buffer.isNull()) {38errorCode = U_MEMORY_ALLOCATION_ERROR;39return NULL;40}41length = cloneBinary(buffer.getAlias(), 20000, errorCode);42if(errorCode == U_BUFFER_OVERFLOW_ERROR) {43if(buffer.allocateInsteadAndCopy(length, 0) == NULL) {44errorCode = U_MEMORY_ALLOCATION_ERROR;45return NULL;46}47errorCode = U_ZERO_ERROR;48length = cloneBinary(buffer.getAlias(), length, errorCode);49}50if(U_FAILURE(errorCode)) { return NULL; }51return buffer.orphan();52}5354int32_t55RuleBasedCollator::cloneBinary(uint8_t *dest, int32_t capacity, UErrorCode &errorCode) const {56int32_t indexes[CollationDataReader::IX_TOTAL_SIZE + 1];57return CollationDataWriter::writeTailoring(58*tailoring, *settings, indexes, dest, capacity,59errorCode);60}6162static const UDataInfo dataInfo = {63sizeof(UDataInfo),640,6566U_IS_BIG_ENDIAN,67U_CHARSET_FAMILY,68U_SIZEOF_UCHAR,690,7071{ 0x55, 0x43, 0x6f, 0x6c }, // dataFormat="UCol"72{ 5, 0, 0, 0 }, // formatVersion73{ 6, 3, 0, 0 } // dataVersion74};7576int32_t77CollationDataWriter::writeBase(const CollationData &data, const CollationSettings &settings,78const void *rootElements, int32_t rootElementsLength,79int32_t indexes[], uint8_t *dest, int32_t capacity,80UErrorCode &errorCode) {81return write(true, NULL,82data, settings,83rootElements, rootElementsLength,84indexes, dest, capacity, errorCode);85}8687int32_t88CollationDataWriter::writeTailoring(const CollationTailoring &t, const CollationSettings &settings,89int32_t indexes[], uint8_t *dest, int32_t capacity,90UErrorCode &errorCode) {91return write(false, t.version,92*t.data, settings,93NULL, 0,94indexes, dest, capacity, errorCode);95}9697int32_t98CollationDataWriter::write(UBool isBase, const UVersionInfo dataVersion,99const CollationData &data, const CollationSettings &settings,100const void *rootElements, int32_t rootElementsLength,101int32_t indexes[], uint8_t *dest, int32_t capacity,102UErrorCode &errorCode) {103if(U_FAILURE(errorCode)) { return 0; }104if(capacity < 0 || (capacity > 0 && dest == NULL)) {105errorCode = U_ILLEGAL_ARGUMENT_ERROR;106return 0;107}108109// Figure out which data items to write before settling on110// the indexes length and writing offsets.111// For any data item, we need to write the start and limit offsets,112// so the indexes length must be at least index-of-start-offset + 2.113int32_t indexesLength;114UBool hasMappings;115UnicodeSet unsafeBackwardSet;116const CollationData *baseData = data.base;117118int32_t fastLatinVersion;119if(data.fastLatinTable != NULL) {120fastLatinVersion = (int32_t)CollationFastLatin::VERSION << 16;121} else {122fastLatinVersion = 0;123}124int32_t fastLatinTableLength = 0;125126if(isBase) {127// For the root collator, we write an even number of indexes128// so that we start with an 8-aligned offset.129indexesLength = CollationDataReader::IX_TOTAL_SIZE + 1;130U_ASSERT(settings.reorderCodesLength == 0);131hasMappings = true;132unsafeBackwardSet = *data.unsafeBackwardSet;133fastLatinTableLength = data.fastLatinTableLength;134} else if(baseData == NULL) {135hasMappings = false;136if(settings.reorderCodesLength == 0) {137// only options138indexesLength = CollationDataReader::IX_OPTIONS + 1; // no limit offset here139} else {140// only options, reorder codes, and the reorder table141indexesLength = CollationDataReader::IX_REORDER_TABLE_OFFSET + 2;142}143} else {144hasMappings = true;145// Tailored mappings, and what else?146// Check in ascending order of optional tailoring data items.147indexesLength = CollationDataReader::IX_CE32S_OFFSET + 2;148if(data.contextsLength != 0) {149indexesLength = CollationDataReader::IX_CONTEXTS_OFFSET + 2;150}151unsafeBackwardSet.addAll(*data.unsafeBackwardSet).removeAll(*baseData->unsafeBackwardSet);152if(!unsafeBackwardSet.isEmpty()) {153indexesLength = CollationDataReader::IX_UNSAFE_BWD_OFFSET + 2;154}155if(data.fastLatinTable != baseData->fastLatinTable) {156fastLatinTableLength = data.fastLatinTableLength;157indexesLength = CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET + 2;158}159}160161UVector32 codesAndRanges(errorCode);162const int32_t *reorderCodes = settings.reorderCodes;163int32_t reorderCodesLength = settings.reorderCodesLength;164if(settings.hasReordering() &&165CollationSettings::reorderTableHasSplitBytes(settings.reorderTable)) {166// Rebuild the full list of reorder ranges.167// The list in the settings is truncated for efficiency.168data.makeReorderRanges(reorderCodes, reorderCodesLength, codesAndRanges, errorCode);169// Write the codes, then the ranges.170for(int32_t i = 0; i < reorderCodesLength; ++i) {171codesAndRanges.insertElementAt(reorderCodes[i], i, errorCode);172}173if(U_FAILURE(errorCode)) { return 0; }174reorderCodes = codesAndRanges.getBuffer();175reorderCodesLength = codesAndRanges.size();176}177178int32_t headerSize;179if(isBase) {180headerSize = 0; // udata_create() writes the header181} else {182DataHeader header;183header.dataHeader.magic1 = 0xda;184header.dataHeader.magic2 = 0x27;185uprv_memcpy(&header.info, &dataInfo, sizeof(UDataInfo));186uprv_memcpy(header.info.dataVersion, dataVersion, sizeof(UVersionInfo));187headerSize = (int32_t)sizeof(header);188U_ASSERT((headerSize & 3) == 0); // multiple of 4 bytes189if(hasMappings && data.cesLength != 0) {190// Sum of the sizes of the data items which are191// not automatically multiples of 8 bytes and which are placed before the CEs.192int32_t sum = headerSize + (indexesLength + reorderCodesLength) * 4;193if((sum & 7) != 0) {194// We need to add padding somewhere so that the 64-bit CEs are 8-aligned.195// We add to the header size here.196// Alternatively, we could increment the indexesLength197// or add a few bytes to the reorderTable.198headerSize += 4;199}200}201header.dataHeader.headerSize = (uint16_t)headerSize;202if(headerSize <= capacity) {203uprv_memcpy(dest, &header, sizeof(header));204// Write 00 bytes so that the padding is not mistaken for a copyright string.205uprv_memset(dest + sizeof(header), 0, headerSize - (int32_t)sizeof(header));206dest += headerSize;207capacity -= headerSize;208} else {209dest = NULL;210capacity = 0;211}212}213214indexes[CollationDataReader::IX_INDEXES_LENGTH] = indexesLength;215U_ASSERT((settings.options & ~0xffff) == 0);216indexes[CollationDataReader::IX_OPTIONS] =217data.numericPrimary | fastLatinVersion | settings.options;218indexes[CollationDataReader::IX_RESERVED2] = 0;219indexes[CollationDataReader::IX_RESERVED3] = 0;220221// Byte offsets of data items all start from the start of the indexes.222// We add the headerSize at the very end.223int32_t totalSize = indexesLength * 4;224225if(hasMappings && (isBase || data.jamoCE32s != baseData->jamoCE32s)) {226indexes[CollationDataReader::IX_JAMO_CE32S_START] = static_cast<int32_t>(data.jamoCE32s - data.ce32s);227} else {228indexes[CollationDataReader::IX_JAMO_CE32S_START] = -1;229}230231indexes[CollationDataReader::IX_REORDER_CODES_OFFSET] = totalSize;232totalSize += reorderCodesLength * 4;233234indexes[CollationDataReader::IX_REORDER_TABLE_OFFSET] = totalSize;235if(settings.reorderTable != NULL) {236totalSize += 256;237}238239indexes[CollationDataReader::IX_TRIE_OFFSET] = totalSize;240if(hasMappings) {241UErrorCode errorCode2 = U_ZERO_ERROR;242int32_t length;243if(totalSize < capacity) {244length = utrie2_serialize(data.trie, dest + totalSize,245capacity - totalSize, &errorCode2);246} else {247length = utrie2_serialize(data.trie, NULL, 0, &errorCode2);248}249if(U_FAILURE(errorCode2) && errorCode2 != U_BUFFER_OVERFLOW_ERROR) {250errorCode = errorCode2;251return 0;252}253// The trie size should be a multiple of 8 bytes due to the way254// compactIndex2(UNewTrie2 *trie) currently works.255U_ASSERT((length & 7) == 0);256totalSize += length;257}258259indexes[CollationDataReader::IX_RESERVED8_OFFSET] = totalSize;260indexes[CollationDataReader::IX_CES_OFFSET] = totalSize;261if(hasMappings && data.cesLength != 0) {262U_ASSERT(((headerSize + totalSize) & 7) == 0);263totalSize += data.cesLength * 8;264}265266indexes[CollationDataReader::IX_RESERVED10_OFFSET] = totalSize;267indexes[CollationDataReader::IX_CE32S_OFFSET] = totalSize;268if(hasMappings) {269totalSize += data.ce32sLength * 4;270}271272indexes[CollationDataReader::IX_ROOT_ELEMENTS_OFFSET] = totalSize;273totalSize += rootElementsLength * 4;274275indexes[CollationDataReader::IX_CONTEXTS_OFFSET] = totalSize;276if(hasMappings) {277totalSize += data.contextsLength * 2;278}279280indexes[CollationDataReader::IX_UNSAFE_BWD_OFFSET] = totalSize;281if(hasMappings && !unsafeBackwardSet.isEmpty()) {282UErrorCode errorCode2 = U_ZERO_ERROR;283int32_t length;284if(totalSize < capacity) {285uint16_t *p = reinterpret_cast<uint16_t *>(dest + totalSize);286length = unsafeBackwardSet.serialize(287p, (capacity - totalSize) / 2, errorCode2);288} else {289length = unsafeBackwardSet.serialize(NULL, 0, errorCode2);290}291if(U_FAILURE(errorCode2) && errorCode2 != U_BUFFER_OVERFLOW_ERROR) {292errorCode = errorCode2;293return 0;294}295totalSize += length * 2;296}297298indexes[CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET] = totalSize;299totalSize += fastLatinTableLength * 2;300301UnicodeString scripts;302indexes[CollationDataReader::IX_SCRIPTS_OFFSET] = totalSize;303if(isBase) {304scripts.append((UChar)data.numScripts);305scripts.append(reinterpret_cast<const UChar *>(data.scriptsIndex), data.numScripts + 16);306scripts.append(reinterpret_cast<const UChar *>(data.scriptStarts), data.scriptStartsLength);307totalSize += scripts.length() * 2;308}309310indexes[CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET] = totalSize;311if(isBase) {312totalSize += 256;313}314315indexes[CollationDataReader::IX_RESERVED18_OFFSET] = totalSize;316indexes[CollationDataReader::IX_TOTAL_SIZE] = totalSize;317318if(totalSize > capacity) {319errorCode = U_BUFFER_OVERFLOW_ERROR;320return headerSize + totalSize;321}322323uprv_memcpy(dest, indexes, indexesLength * 4);324copyData(indexes, CollationDataReader::IX_REORDER_CODES_OFFSET, reorderCodes, dest);325copyData(indexes, CollationDataReader::IX_REORDER_TABLE_OFFSET, settings.reorderTable, dest);326// The trie has already been serialized into the dest buffer.327copyData(indexes, CollationDataReader::IX_CES_OFFSET, data.ces, dest);328copyData(indexes, CollationDataReader::IX_CE32S_OFFSET, data.ce32s, dest);329copyData(indexes, CollationDataReader::IX_ROOT_ELEMENTS_OFFSET, rootElements, dest);330copyData(indexes, CollationDataReader::IX_CONTEXTS_OFFSET, data.contexts, dest);331// The unsafeBackwardSet has already been serialized into the dest buffer.332copyData(indexes, CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET, data.fastLatinTable, dest);333copyData(indexes, CollationDataReader::IX_SCRIPTS_OFFSET, scripts.getBuffer(), dest);334copyData(indexes, CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET, data.compressibleBytes, dest);335336return headerSize + totalSize;337}338339void340CollationDataWriter::copyData(const int32_t indexes[], int32_t startIndex,341const void *src, uint8_t *dest) {342int32_t start = indexes[startIndex];343int32_t limit = indexes[startIndex + 1];344if(start < limit) {345uprv_memcpy(dest + start, src, limit - start);346}347}348349U_NAMESPACE_END350351#endif // !UCONFIG_NO_COLLATION352353354