Path: blob/master/thirdparty/brotli/common/shared_dictionary.c
21512 views
/* Copyright 2017 Google Inc. All Rights Reserved.12Distributed under MIT license.3See file LICENSE for detail or copy at https://opensource.org/licenses/MIT4*/56/* Shared Dictionary definition and utilities. */78#include <brotli/shared_dictionary.h>910#include "dictionary.h"11#include "platform.h"12#include "shared_dictionary_internal.h"1314#if defined(__cplusplus) || defined(c_plusplus)15extern "C" {16#endif1718#if defined(BROTLI_EXPERIMENTAL)1920#define BROTLI_NUM_ENCODED_LENGTHS (SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH \21- SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH + 1)2223/* Max allowed by spec */24#define BROTLI_MAX_SIZE_BITS 15u2526/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */27static BROTLI_BOOL ReadBool(const uint8_t* encoded, size_t size, size_t* pos,28BROTLI_BOOL* result) {29uint8_t value;30size_t position = *pos;31if (position >= size) return BROTLI_FALSE; /* past file end */32value = encoded[position++];33if (value > 1) return BROTLI_FALSE; /* invalid bool */34*result = TO_BROTLI_BOOL(value);35*pos = position;36return BROTLI_TRUE; /* success */37}3839/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */40static BROTLI_BOOL ReadUint8(const uint8_t* encoded, size_t size, size_t* pos,41uint8_t* result) {42size_t position = *pos;43if (position + sizeof(uint8_t) > size) return BROTLI_FALSE;44*result = encoded[position++];45*pos = position;46return BROTLI_TRUE;47}4849/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */50static BROTLI_BOOL ReadUint16(const uint8_t* encoded, size_t size, size_t* pos,51uint16_t* result) {52size_t position = *pos;53if (position + sizeof(uint16_t) > size) return BROTLI_FALSE;54*result = BROTLI_UNALIGNED_LOAD16LE(&encoded[position]);55position += 2;56*pos = position;57return BROTLI_TRUE;58}5960/* Reads a varint into a uint32_t, and returns error if it's too large */61/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */62static BROTLI_BOOL ReadVarint32(const uint8_t* encoded, size_t size,63size_t* pos, uint32_t* result) {64int num = 0;65uint8_t byte;66*result = 0;67for (;;) {68if (*pos >= size) return BROTLI_FALSE;69byte = encoded[(*pos)++];70if (num == 4 && byte > 15) return BROTLI_FALSE;71*result |= (uint32_t)(byte & 127) << (num * 7);72if (byte < 128) return BROTLI_TRUE;73num++;74}75}7677/* Returns the total length of word list. */78static size_t BrotliSizeBitsToOffsets(const uint8_t* size_bits_by_length,79uint32_t* offsets_by_length) {80uint32_t pos = 0;81uint32_t i;82for (i = 0; i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; i++) {83offsets_by_length[i] = pos;84if (size_bits_by_length[i] != 0) {85pos += i << size_bits_by_length[i];86}87}88return pos;89}9091static BROTLI_BOOL ParseWordList(size_t size, const uint8_t* encoded,92size_t* pos, BrotliDictionary* out) {93size_t offset;94size_t i;95size_t position = *pos;96if (position + BROTLI_NUM_ENCODED_LENGTHS > size) {97return BROTLI_FALSE;98}99100memset(out->size_bits_by_length, 0, SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH);101memcpy(out->size_bits_by_length + SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH,102&encoded[position], BROTLI_NUM_ENCODED_LENGTHS);103for (i = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;104i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; i++) {105if (out->size_bits_by_length[i] > BROTLI_MAX_SIZE_BITS) {106return BROTLI_FALSE;107}108}109position += BROTLI_NUM_ENCODED_LENGTHS;110offset = BrotliSizeBitsToOffsets(111out->size_bits_by_length, out->offsets_by_length);112113out->data = &encoded[position];114out->data_size = offset;115position += offset;116if (position > size) return BROTLI_FALSE;117*pos = position;118return BROTLI_TRUE;119}120121/* Computes the cutOffTransforms of a BrotliTransforms which already has the122transforms data correctly filled in. */123static void ComputeCutoffTransforms(BrotliTransforms* transforms) {124uint32_t i;125for (i = 0; i < BROTLI_TRANSFORMS_MAX_CUT_OFF + 1; i++) {126transforms->cutOffTransforms[i] = -1;127}128for (i = 0; i < transforms->num_transforms; i++) {129const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, i);130uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, i);131const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, i);132if (type <= BROTLI_TRANSFORM_OMIT_LAST_9 && *prefix == 0 && *suffix == 0 &&133transforms->cutOffTransforms[type] == -1) {134transforms->cutOffTransforms[type] = (int16_t)i;135}136}137}138139static BROTLI_BOOL ParsePrefixSuffixTable(size_t size, const uint8_t* encoded,140size_t* pos, BrotliTransforms* out, uint16_t* out_table,141size_t* out_table_size) {142size_t position = *pos;143size_t offset = 0;144size_t stringlet_count = 0; /* NUM_PREFIX_SUFFIX */145size_t data_length = 0;146147/* PREFIX_SUFFIX_LENGTH */148if (!ReadUint16(encoded, size, &position, &out->prefix_suffix_size)) {149return BROTLI_FALSE;150}151data_length = out->prefix_suffix_size;152153/* Must at least have space for null terminator. */154if (data_length < 1) return BROTLI_FALSE;155out->prefix_suffix = &encoded[position];156if (position + data_length >= size) return BROTLI_FALSE;157while (BROTLI_TRUE) {158/* STRING_LENGTH */159size_t stringlet_len = encoded[position + offset];160out_table[stringlet_count] = (uint16_t)offset;161stringlet_count++;162offset++;163if (stringlet_len == 0) {164if (offset == data_length) {165break;166} else {167return BROTLI_FALSE;168}169}170if (stringlet_count > 255) return BROTLI_FALSE;171offset += stringlet_len;172if (offset >= data_length) return BROTLI_FALSE;173}174175position += data_length;176*pos = position;177*out_table_size = (uint16_t)stringlet_count;178return BROTLI_TRUE;179}180181static BROTLI_BOOL ParseTransformsList(size_t size, const uint8_t* encoded,182size_t* pos, BrotliTransforms* out, uint16_t* prefix_suffix_table,183size_t* prefix_suffix_count) {184uint32_t i;185BROTLI_BOOL has_params = BROTLI_FALSE;186BROTLI_BOOL prefix_suffix_ok = BROTLI_FALSE;187size_t position = *pos;188size_t stringlet_cnt = 0;189if (position >= size) return BROTLI_FALSE;190191prefix_suffix_ok = ParsePrefixSuffixTable(192size, encoded, &position, out, prefix_suffix_table, &stringlet_cnt);193if (!prefix_suffix_ok) return BROTLI_FALSE;194out->prefix_suffix_map = prefix_suffix_table;195*prefix_suffix_count = stringlet_cnt;196197out->num_transforms = encoded[position++];198out->transforms = &encoded[position];199position += (size_t)out->num_transforms * 3;200if (position > size) return BROTLI_FALSE;201/* Check for errors and read extra parameters. */202for (i = 0; i < out->num_transforms; i++) {203uint8_t prefix_id = BROTLI_TRANSFORM_PREFIX_ID(out, i);204uint8_t type = BROTLI_TRANSFORM_TYPE(out, i);205uint8_t suffix_id = BROTLI_TRANSFORM_SUFFIX_ID(out, i);206if (prefix_id >= stringlet_cnt) return BROTLI_FALSE;207if (type >= BROTLI_NUM_TRANSFORM_TYPES) return BROTLI_FALSE;208if (suffix_id >= stringlet_cnt) return BROTLI_FALSE;209if (type == BROTLI_TRANSFORM_SHIFT_FIRST ||210type == BROTLI_TRANSFORM_SHIFT_ALL) {211has_params = BROTLI_TRUE;212}213}214if (has_params) {215out->params = &encoded[position];216position += (size_t)out->num_transforms * 2;217if (position > size) return BROTLI_FALSE;218for (i = 0; i < out->num_transforms; i++) {219uint8_t type = BROTLI_TRANSFORM_TYPE(out, i);220if (type != BROTLI_TRANSFORM_SHIFT_FIRST &&221type != BROTLI_TRANSFORM_SHIFT_ALL) {222if (out->params[i * 2] != 0 || out->params[i * 2 + 1] != 0) {223return BROTLI_FALSE;224}225}226}227} else {228out->params = NULL;229}230ComputeCutoffTransforms(out);231*pos = position;232return BROTLI_TRUE;233}234235static BROTLI_BOOL DryParseDictionary(const uint8_t* encoded,236size_t size, uint32_t* num_prefix, BROTLI_BOOL* is_custom_static_dict) {237size_t pos = 0;238uint32_t chunk_size = 0;239uint8_t num_word_lists;240uint8_t num_transform_lists;241*is_custom_static_dict = BROTLI_FALSE;242*num_prefix = 0;243244/* Skip magic header bytes. */245pos += 2;246247/* LZ77_DICTIONARY_LENGTH */248if (!ReadVarint32(encoded, size, &pos, &chunk_size)) return BROTLI_FALSE;249if (chunk_size != 0) {250/* This limitation is not specified but the 32-bit Brotli decoder for now */251if (chunk_size > 1073741823) return BROTLI_FALSE;252*num_prefix = 1;253if (pos + chunk_size > size) return BROTLI_FALSE;254pos += chunk_size;255}256257if (!ReadUint8(encoded, size, &pos, &num_word_lists)) {258return BROTLI_FALSE;259}260if (!ReadUint8(encoded, size, &pos, &num_transform_lists)) {261return BROTLI_FALSE;262}263264if (num_word_lists > 0 || num_transform_lists > 0) {265*is_custom_static_dict = BROTLI_TRUE;266}267268return BROTLI_TRUE;269}270271static BROTLI_BOOL ParseDictionary(const uint8_t* encoded, size_t size,272BrotliSharedDictionary* dict) {273uint32_t i;274size_t pos = 0;275uint32_t chunk_size = 0;276size_t total_prefix_suffix_count = 0;277size_t transform_list_start[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];278uint16_t temporary_prefix_suffix_table[256];279280/* Skip magic header bytes. */281pos += 2;282283/* LZ77_DICTIONARY_LENGTH */284if (!ReadVarint32(encoded, size, &pos, &chunk_size)) return BROTLI_FALSE;285if (chunk_size != 0) {286if (pos + chunk_size > size) return BROTLI_FALSE;287dict->prefix_size[dict->num_prefix] = chunk_size;288dict->prefix[dict->num_prefix] = &encoded[pos];289dict->num_prefix++;290/* LZ77_DICTIONARY_LENGTH bytes. */291pos += chunk_size;292}293294/* NUM_WORD_LISTS */295if (!ReadUint8(encoded, size, &pos, &dict->num_word_lists)) {296return BROTLI_FALSE;297}298if (dict->num_word_lists > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {299return BROTLI_FALSE;300}301302if (dict->num_word_lists != 0) {303dict->words_instances = (BrotliDictionary*)dict->alloc_func(304dict->memory_manager_opaque,305dict->num_word_lists * sizeof(*dict->words_instances));306if (!dict->words_instances) return BROTLI_FALSE; /* OOM */307}308for (i = 0; i < dict->num_word_lists; i++) {309if (!ParseWordList(size, encoded, &pos, &dict->words_instances[i])) {310return BROTLI_FALSE;311}312}313314/* NUM_TRANSFORM_LISTS */315if (!ReadUint8(encoded, size, &pos, &dict->num_transform_lists)) {316return BROTLI_FALSE;317}318if (dict->num_transform_lists > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {319return BROTLI_FALSE;320}321322if (dict->num_transform_lists != 0) {323dict->transforms_instances = (BrotliTransforms*)dict->alloc_func(324dict->memory_manager_opaque,325dict->num_transform_lists * sizeof(*dict->transforms_instances));326if (!dict->transforms_instances) return BROTLI_FALSE; /* OOM */327}328for (i = 0; i < dict->num_transform_lists; i++) {329BROTLI_BOOL ok = BROTLI_FALSE;330size_t prefix_suffix_count = 0;331transform_list_start[i] = pos;332dict->transforms_instances[i].prefix_suffix_map =333temporary_prefix_suffix_table;334ok = ParseTransformsList(335size, encoded, &pos, &dict->transforms_instances[i],336temporary_prefix_suffix_table, &prefix_suffix_count);337if (!ok) return BROTLI_FALSE;338total_prefix_suffix_count += prefix_suffix_count;339}340if (total_prefix_suffix_count != 0) {341dict->prefix_suffix_maps = (uint16_t*)dict->alloc_func(342dict->memory_manager_opaque,343total_prefix_suffix_count * sizeof(*dict->prefix_suffix_maps));344if (!dict->prefix_suffix_maps) return BROTLI_FALSE; /* OOM */345}346total_prefix_suffix_count = 0;347for (i = 0; i < dict->num_transform_lists; i++) {348size_t prefix_suffix_count = 0;349size_t position = transform_list_start[i];350uint16_t* prefix_suffix_map =351&dict->prefix_suffix_maps[total_prefix_suffix_count];352BROTLI_BOOL ok = ParsePrefixSuffixTable(353size, encoded, &position, &dict->transforms_instances[i],354prefix_suffix_map, &prefix_suffix_count);355if (!ok) return BROTLI_FALSE;356dict->transforms_instances[i].prefix_suffix_map = prefix_suffix_map;357total_prefix_suffix_count += prefix_suffix_count;358}359360if (dict->num_word_lists != 0 || dict->num_transform_lists != 0) {361if (!ReadUint8(encoded, size, &pos, &dict->num_dictionaries)) {362return BROTLI_FALSE;363}364if (dict->num_dictionaries == 0 ||365dict->num_dictionaries > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {366return BROTLI_FALSE;367}368for (i = 0; i < dict->num_dictionaries; i++) {369uint8_t words_index;370uint8_t transforms_index;371if (!ReadUint8(encoded, size, &pos, &words_index)) {372return BROTLI_FALSE;373}374if (words_index > dict->num_word_lists) return BROTLI_FALSE;375if (!ReadUint8(encoded, size, &pos, &transforms_index)) {376return BROTLI_FALSE;377}378if (transforms_index > dict->num_transform_lists) return BROTLI_FALSE;379dict->words[i] = words_index == dict->num_word_lists ?380BrotliGetDictionary() : &dict->words_instances[words_index];381dict->transforms[i] = transforms_index == dict->num_transform_lists ?382BrotliGetTransforms(): &dict->transforms_instances[transforms_index];383}384/* CONTEXT_ENABLED */385if (!ReadBool(encoded, size, &pos, &dict->context_based)) {386return BROTLI_FALSE;387}388389/* CONTEXT_MAP */390if (dict->context_based) {391for (i = 0; i < SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS; i++) {392if (!ReadUint8(encoded, size, &pos, &dict->context_map[i])) {393return BROTLI_FALSE;394}395if (dict->context_map[i] >= dict->num_dictionaries) {396return BROTLI_FALSE;397}398}399}400} else {401dict->context_based = BROTLI_FALSE;402dict->num_dictionaries = 1;403dict->words[0] = BrotliGetDictionary();404dict->transforms[0] = BrotliGetTransforms();405}406407return BROTLI_TRUE;408}409410/* Decodes shared dictionary and verifies correctness.411Returns BROTLI_TRUE if dictionary is valid, BROTLI_FALSE otherwise.412The BrotliSharedDictionary must already have been initialized. If the413BrotliSharedDictionary already contains data, compound dictionaries414will be appended, but an error will be returned if it already has415custom words or transforms.416TODO(lode): link to RFC for shared brotli once published. */417static BROTLI_BOOL DecodeSharedDictionary(418const uint8_t* encoded, size_t size, BrotliSharedDictionary* dict) {419uint32_t num_prefix = 0;420BROTLI_BOOL is_custom_static_dict = BROTLI_FALSE;421BROTLI_BOOL has_custom_static_dict =422dict->num_word_lists > 0 || dict->num_transform_lists > 0;423424/* Check magic header bytes. */425if (size < 2) return BROTLI_FALSE;426if (encoded[0] != 0x91 || encoded[1] != 0) return BROTLI_FALSE;427428if (!DryParseDictionary(encoded, size, &num_prefix, &is_custom_static_dict)) {429return BROTLI_FALSE;430}431432if (num_prefix + dict->num_prefix > SHARED_BROTLI_MAX_COMPOUND_DICTS) {433return BROTLI_FALSE;434}435436/* Cannot combine different static dictionaries, only prefix dictionaries */437if (has_custom_static_dict && is_custom_static_dict) return BROTLI_FALSE;438439return ParseDictionary(encoded, size, dict);440}441442#endif /* BROTLI_EXPERIMENTAL */443444void BrotliSharedDictionaryDestroyInstance(445BrotliSharedDictionary* dict) {446if (!dict) {447return;448} else {449brotli_free_func free_func = dict->free_func;450void* opaque = dict->memory_manager_opaque;451/* Cleanup. */452free_func(opaque, dict->words_instances);453free_func(opaque, dict->transforms_instances);454free_func(opaque, dict->prefix_suffix_maps);455/* Self-destruction. */456free_func(opaque, dict);457}458}459460BROTLI_BOOL BrotliSharedDictionaryAttach(461BrotliSharedDictionary* dict, BrotliSharedDictionaryType type,462size_t data_size, const uint8_t data[BROTLI_ARRAY_PARAM(data_size)]) {463if (!dict) {464return BROTLI_FALSE;465}466#if defined(BROTLI_EXPERIMENTAL)467if (type == BROTLI_SHARED_DICTIONARY_SERIALIZED) {468return DecodeSharedDictionary(data, data_size, dict);469}470#endif /* BROTLI_EXPERIMENTAL */471if (type == BROTLI_SHARED_DICTIONARY_RAW) {472if (dict->num_prefix >= SHARED_BROTLI_MAX_COMPOUND_DICTS) {473return BROTLI_FALSE;474}475dict->prefix_size[dict->num_prefix] = data_size;476dict->prefix[dict->num_prefix] = data;477dict->num_prefix++;478return BROTLI_TRUE;479}480return BROTLI_FALSE;481}482483BrotliSharedDictionary* BrotliSharedDictionaryCreateInstance(484brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {485BrotliSharedDictionary* dict = 0;486if (!alloc_func && !free_func) {487dict = (BrotliSharedDictionary*)malloc(sizeof(BrotliSharedDictionary));488} else if (alloc_func && free_func) {489dict = (BrotliSharedDictionary*)alloc_func(490opaque, sizeof(BrotliSharedDictionary));491}492if (dict == 0) {493return 0;494}495496/* TODO(eustas): explicitly initialize all the fields? */497memset(dict, 0, sizeof(BrotliSharedDictionary));498499dict->context_based = BROTLI_FALSE;500dict->num_dictionaries = 1;501dict->num_word_lists = 0;502dict->num_transform_lists = 0;503504dict->words[0] = BrotliGetDictionary();505dict->transforms[0] = BrotliGetTransforms();506507dict->alloc_func = alloc_func ? alloc_func : BrotliDefaultAllocFunc;508dict->free_func = free_func ? free_func : BrotliDefaultFreeFunc;509dict->memory_manager_opaque = opaque;510511return dict;512}513514#if defined(__cplusplus) || defined(c_plusplus)515} /* extern "C" */516#endif517518519