Path: blob/master/thirdparty/brotli/common/transform.c
21549 views
/* Copyright 2013 Google Inc. All Rights Reserved.12Distributed under MIT license.3See file LICENSE for detail or copy at https://opensource.org/licenses/MIT4*/56#include "platform.h"7#include "transform.h"89#if defined(__cplusplus) || defined(c_plusplus)10extern "C" {11#endif1213/* RFC 7932 transforms string data */14static const BROTLI_MODEL("small") char kPrefixSuffix[217] =15"\1 \2, \10 of the \4 of \2s \1.\5 and \4 "16/* 0x _0 _2 __5 _E _3 _6 _8 _E */17"in \1\"\4 to \2\">\1\n\2. \1]\5 for \3 a \6 "18/* 2x _3_ _5 _A_ _D_ _F _2 _4 _A _E */19"that \1\'\6 with \6 from \4 by \1(\6. T"20/* 4x _5_ _7 _E _5 _A _C */21"he \4 on \4 as \4 is \4ing \2\n\t\1:\3ed "22/* 6x _3 _8 _D _2 _7_ _ _A _C */23"\2=\"\4 at \3ly \1,\2=\'\5.com/\7. This \5"24/* 8x _0 _ _3 _8 _C _E _ _1 _7 _F */25" not \3er \3al \4ful \4ive \5less \4es"26/* Ax _5 _9 _D _2 _7 _D */27"t \4ize \2\xc2\xa0\4ous \5 the \2e "; /* \0 - implicit trailing zero. */28/* Cx _2 _7___ ___ _A _F _5 _8 */2930static const BROTLI_MODEL("small") uint16_t kPrefixSuffixMap[50] = {310x00, 0x02, 0x05, 0x0E, 0x13, 0x16, 0x18, 0x1E, 0x23, 0x25,320x2A, 0x2D, 0x2F, 0x32, 0x34, 0x3A, 0x3E, 0x45, 0x47, 0x4E,330x55, 0x5A, 0x5C, 0x63, 0x68, 0x6D, 0x72, 0x77, 0x7A, 0x7C,340x80, 0x83, 0x88, 0x8C, 0x8E, 0x91, 0x97, 0x9F, 0xA5, 0xA9,350xAD, 0xB2, 0xB7, 0xBD, 0xC2, 0xC7, 0xCA, 0xCF, 0xD5, 0xD836};3738/* RFC 7932 transforms */39static const BROTLI_MODEL("small") uint8_t kTransformsData[] = {4049, BROTLI_TRANSFORM_IDENTITY, 49,4149, BROTLI_TRANSFORM_IDENTITY, 0,420, BROTLI_TRANSFORM_IDENTITY, 0,4349, BROTLI_TRANSFORM_OMIT_FIRST_1, 49,4449, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0,4549, BROTLI_TRANSFORM_IDENTITY, 47,460, BROTLI_TRANSFORM_IDENTITY, 49,474, BROTLI_TRANSFORM_IDENTITY, 0,4849, BROTLI_TRANSFORM_IDENTITY, 3,4949, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49,5049, BROTLI_TRANSFORM_IDENTITY, 6,5149, BROTLI_TRANSFORM_OMIT_FIRST_2, 49,5249, BROTLI_TRANSFORM_OMIT_LAST_1, 49,531, BROTLI_TRANSFORM_IDENTITY, 0,5449, BROTLI_TRANSFORM_IDENTITY, 1,550, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0,5649, BROTLI_TRANSFORM_IDENTITY, 7,5749, BROTLI_TRANSFORM_IDENTITY, 9,5848, BROTLI_TRANSFORM_IDENTITY, 0,5949, BROTLI_TRANSFORM_IDENTITY, 8,6049, BROTLI_TRANSFORM_IDENTITY, 5,6149, BROTLI_TRANSFORM_IDENTITY, 10,6249, BROTLI_TRANSFORM_IDENTITY, 11,6349, BROTLI_TRANSFORM_OMIT_LAST_3, 49,6449, BROTLI_TRANSFORM_IDENTITY, 13,6549, BROTLI_TRANSFORM_IDENTITY, 14,6649, BROTLI_TRANSFORM_OMIT_FIRST_3, 49,6749, BROTLI_TRANSFORM_OMIT_LAST_2, 49,6849, BROTLI_TRANSFORM_IDENTITY, 15,6949, BROTLI_TRANSFORM_IDENTITY, 16,700, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49,7149, BROTLI_TRANSFORM_IDENTITY, 12,725, BROTLI_TRANSFORM_IDENTITY, 49,730, BROTLI_TRANSFORM_IDENTITY, 1,7449, BROTLI_TRANSFORM_OMIT_FIRST_4, 49,7549, BROTLI_TRANSFORM_IDENTITY, 18,7649, BROTLI_TRANSFORM_IDENTITY, 17,7749, BROTLI_TRANSFORM_IDENTITY, 19,7849, BROTLI_TRANSFORM_IDENTITY, 20,7949, BROTLI_TRANSFORM_OMIT_FIRST_5, 49,8049, BROTLI_TRANSFORM_OMIT_FIRST_6, 49,8147, BROTLI_TRANSFORM_IDENTITY, 49,8249, BROTLI_TRANSFORM_OMIT_LAST_4, 49,8349, BROTLI_TRANSFORM_IDENTITY, 22,8449, BROTLI_TRANSFORM_UPPERCASE_ALL, 49,8549, BROTLI_TRANSFORM_IDENTITY, 23,8649, BROTLI_TRANSFORM_IDENTITY, 24,8749, BROTLI_TRANSFORM_IDENTITY, 25,8849, BROTLI_TRANSFORM_OMIT_LAST_7, 49,8949, BROTLI_TRANSFORM_OMIT_LAST_1, 26,9049, BROTLI_TRANSFORM_IDENTITY, 27,9149, BROTLI_TRANSFORM_IDENTITY, 28,920, BROTLI_TRANSFORM_IDENTITY, 12,9349, BROTLI_TRANSFORM_IDENTITY, 29,9449, BROTLI_TRANSFORM_OMIT_FIRST_9, 49,9549, BROTLI_TRANSFORM_OMIT_FIRST_7, 49,9649, BROTLI_TRANSFORM_OMIT_LAST_6, 49,9749, BROTLI_TRANSFORM_IDENTITY, 21,9849, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1,9949, BROTLI_TRANSFORM_OMIT_LAST_8, 49,10049, BROTLI_TRANSFORM_IDENTITY, 31,10149, BROTLI_TRANSFORM_IDENTITY, 32,10247, BROTLI_TRANSFORM_IDENTITY, 3,10349, BROTLI_TRANSFORM_OMIT_LAST_5, 49,10449, BROTLI_TRANSFORM_OMIT_LAST_9, 49,1050, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1,10649, BROTLI_TRANSFORM_UPPERCASE_FIRST, 8,1075, BROTLI_TRANSFORM_IDENTITY, 21,10849, BROTLI_TRANSFORM_UPPERCASE_ALL, 0,10949, BROTLI_TRANSFORM_UPPERCASE_FIRST, 10,11049, BROTLI_TRANSFORM_IDENTITY, 30,1110, BROTLI_TRANSFORM_IDENTITY, 5,11235, BROTLI_TRANSFORM_IDENTITY, 49,11347, BROTLI_TRANSFORM_IDENTITY, 2,11449, BROTLI_TRANSFORM_UPPERCASE_FIRST, 17,11549, BROTLI_TRANSFORM_IDENTITY, 36,11649, BROTLI_TRANSFORM_IDENTITY, 33,1175, BROTLI_TRANSFORM_IDENTITY, 0,11849, BROTLI_TRANSFORM_UPPERCASE_FIRST, 21,11949, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5,12049, BROTLI_TRANSFORM_IDENTITY, 37,1210, BROTLI_TRANSFORM_IDENTITY, 30,12249, BROTLI_TRANSFORM_IDENTITY, 38,1230, BROTLI_TRANSFORM_UPPERCASE_ALL, 0,12449, BROTLI_TRANSFORM_IDENTITY, 39,1250, BROTLI_TRANSFORM_UPPERCASE_ALL, 49,12649, BROTLI_TRANSFORM_IDENTITY, 34,12749, BROTLI_TRANSFORM_UPPERCASE_ALL, 8,12849, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12,1290, BROTLI_TRANSFORM_IDENTITY, 21,13049, BROTLI_TRANSFORM_IDENTITY, 40,1310, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12,13249, BROTLI_TRANSFORM_IDENTITY, 41,13349, BROTLI_TRANSFORM_IDENTITY, 42,13449, BROTLI_TRANSFORM_UPPERCASE_ALL, 17,13549, BROTLI_TRANSFORM_IDENTITY, 43,1360, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5,13749, BROTLI_TRANSFORM_UPPERCASE_ALL, 10,1380, BROTLI_TRANSFORM_IDENTITY, 34,13949, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33,14049, BROTLI_TRANSFORM_IDENTITY, 44,14149, BROTLI_TRANSFORM_UPPERCASE_ALL, 5,14245, BROTLI_TRANSFORM_IDENTITY, 49,1430, BROTLI_TRANSFORM_IDENTITY, 33,14449, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30,14549, BROTLI_TRANSFORM_UPPERCASE_ALL, 30,14649, BROTLI_TRANSFORM_IDENTITY, 46,14749, BROTLI_TRANSFORM_UPPERCASE_ALL, 1,14849, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34,1490, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33,1500, BROTLI_TRANSFORM_UPPERCASE_ALL, 30,1510, BROTLI_TRANSFORM_UPPERCASE_ALL, 1,15249, BROTLI_TRANSFORM_UPPERCASE_ALL, 33,15349, BROTLI_TRANSFORM_UPPERCASE_ALL, 21,15449, BROTLI_TRANSFORM_UPPERCASE_ALL, 12,1550, BROTLI_TRANSFORM_UPPERCASE_ALL, 5,15649, BROTLI_TRANSFORM_UPPERCASE_ALL, 34,1570, BROTLI_TRANSFORM_UPPERCASE_ALL, 12,1580, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30,1590, BROTLI_TRANSFORM_UPPERCASE_ALL, 34,1600, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34,161};162163static const BROTLI_MODEL("small")164BrotliTransforms kBrotliTransforms = {165sizeof(kPrefixSuffix),166(const uint8_t*)kPrefixSuffix,167kPrefixSuffixMap,168sizeof(kTransformsData) / (3 * sizeof(kTransformsData[0])),169kTransformsData,170NULL, /* no extra parameters */171{0, 12, 27, 23, 42, 63, 56, 48, 59, 64}172};173174const BrotliTransforms* BrotliGetTransforms(void) {175return &kBrotliTransforms;176}177178static int ToUpperCase(uint8_t* p) {179if (p[0] < 0xC0) {180if (p[0] >= 'a' && p[0] <= 'z') {181p[0] ^= 32;182}183return 1;184}185/* An overly simplified uppercasing model for UTF-8. */186if (p[0] < 0xE0) {187p[1] ^= 32;188return 2;189}190/* An arbitrary transform for three byte characters. */191p[2] ^= 5;192return 3;193}194195static int Shift(uint8_t* word, int word_len, uint16_t parameter) {196/* Limited sign extension: scalar < (1 << 24). */197uint32_t scalar =198(parameter & 0x7FFFu) + (0x1000000u - (parameter & 0x8000u));199if (word[0] < 0x80) {200/* 1-byte rune / 0sssssss / 7 bit scalar (ASCII). */201scalar += (uint32_t)word[0];202word[0] = (uint8_t)(scalar & 0x7Fu);203return 1;204} else if (word[0] < 0xC0) {205/* Continuation / 10AAAAAA. */206return 1;207} else if (word[0] < 0xE0) {208/* 2-byte rune / 110sssss AAssssss / 11 bit scalar. */209if (word_len < 2) return 1;210scalar += (uint32_t)((word[1] & 0x3Fu) | ((word[0] & 0x1Fu) << 6u));211word[0] = (uint8_t)(0xC0 | ((scalar >> 6u) & 0x1F));212word[1] = (uint8_t)((word[1] & 0xC0) | (scalar & 0x3F));213return 2;214} else if (word[0] < 0xF0) {215/* 3-byte rune / 1110ssss AAssssss BBssssss / 16 bit scalar. */216if (word_len < 3) return word_len;217scalar += (uint32_t)((word[2] & 0x3Fu) | ((word[1] & 0x3Fu) << 6u) |218((word[0] & 0x0Fu) << 12u));219word[0] = (uint8_t)(0xE0 | ((scalar >> 12u) & 0x0F));220word[1] = (uint8_t)((word[1] & 0xC0) | ((scalar >> 6u) & 0x3F));221word[2] = (uint8_t)((word[2] & 0xC0) | (scalar & 0x3F));222return 3;223} else if (word[0] < 0xF8) {224/* 4-byte rune / 11110sss AAssssss BBssssss CCssssss / 21 bit scalar. */225if (word_len < 4) return word_len;226scalar += (uint32_t)((word[3] & 0x3Fu) | ((word[2] & 0x3Fu) << 6u) |227((word[1] & 0x3Fu) << 12u) | ((word[0] & 0x07u) << 18u));228word[0] = (uint8_t)(0xF0 | ((scalar >> 18u) & 0x07));229word[1] = (uint8_t)((word[1] & 0xC0) | ((scalar >> 12u) & 0x3F));230word[2] = (uint8_t)((word[2] & 0xC0) | ((scalar >> 6u) & 0x3F));231word[3] = (uint8_t)((word[3] & 0xC0) | (scalar & 0x3F));232return 4;233}234return 1;235}236237int BrotliTransformDictionaryWord(uint8_t* dst, const uint8_t* word, int len,238const BrotliTransforms* transforms, int transform_idx) {239int idx = 0;240const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, transform_idx);241uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, transform_idx);242const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, transform_idx);243{244int prefix_len = *prefix++;245while (prefix_len--) { dst[idx++] = *prefix++; }246}247{248const int t = type;249int i = 0;250if (t <= BROTLI_TRANSFORM_OMIT_LAST_9) {251len -= t;252} else if (t >= BROTLI_TRANSFORM_OMIT_FIRST_1253&& t <= BROTLI_TRANSFORM_OMIT_FIRST_9) {254int skip = t - (BROTLI_TRANSFORM_OMIT_FIRST_1 - 1);255word += skip;256len -= skip;257}258while (i < len) { dst[idx++] = word[i++]; }259if (t == BROTLI_TRANSFORM_UPPERCASE_FIRST) {260ToUpperCase(&dst[idx - len]);261} else if (t == BROTLI_TRANSFORM_UPPERCASE_ALL) {262uint8_t* uppercase = &dst[idx - len];263while (len > 0) {264int step = ToUpperCase(uppercase);265uppercase += step;266len -= step;267}268} else if (t == BROTLI_TRANSFORM_SHIFT_FIRST) {269uint16_t param = (uint16_t)(transforms->params[transform_idx * 2]270+ (transforms->params[transform_idx * 2 + 1] << 8u));271Shift(&dst[idx - len], len, param);272} else if (t == BROTLI_TRANSFORM_SHIFT_ALL) {273uint16_t param = (uint16_t)(transforms->params[transform_idx * 2]274+ (transforms->params[transform_idx * 2 + 1] << 8u));275uint8_t* shift = &dst[idx - len];276while (len > 0) {277int step = Shift(shift, len, param);278shift += step;279len -= step;280}281}282}283{284int suffix_len = *suffix++;285while (suffix_len--) { dst[idx++] = *suffix++; }286return idx;287}288}289290#if defined(__cplusplus) || defined(c_plusplus)291} /* extern "C" */292#endif293294295