Path: blob/master/thirdparty/brotli/common/shared_dictionary.c
9906 views
/* Copyright 2017 Google Inc. All Rights Reserved.12Distributed under MIT license.3See file LICENSE for detail or copy at https://opensource.org/licenses/MIT4*/56/* Shared Dictionary definition and utilities. */78#include <brotli/shared_dictionary.h>910#include <memory.h>11#include <stdlib.h> /* malloc, free */12#include <stdio.h>1314#include "dictionary.h"15#include "platform.h"16#include "shared_dictionary_internal.h"1718#if defined(__cplusplus) || defined(c_plusplus)19extern "C" {20#endif2122#if defined(BROTLI_EXPERIMENTAL)2324#define BROTLI_NUM_ENCODED_LENGTHS (SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH \25- SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH + 1)2627/* Max allowed by spec */28#define BROTLI_MAX_SIZE_BITS 15u2930/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */31static BROTLI_BOOL ReadBool(const uint8_t* encoded, size_t size, size_t* pos,32BROTLI_BOOL* result) {33uint8_t value;34size_t position = *pos;35if (position >= size) return BROTLI_FALSE; /* past file end */36value = encoded[position++];37if (value > 1) return BROTLI_FALSE; /* invalid bool */38*result = TO_BROTLI_BOOL(value);39*pos = position;40return BROTLI_TRUE; /* success */41}4243/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */44static BROTLI_BOOL ReadUint8(const uint8_t* encoded, size_t size, size_t* pos,45uint8_t* result) {46size_t position = *pos;47if (position + sizeof(uint8_t) > size) return BROTLI_FALSE;48*result = encoded[position++];49*pos = position;50return BROTLI_TRUE;51}5253/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */54static BROTLI_BOOL ReadUint16(const uint8_t* encoded, size_t size, size_t* pos,55uint16_t* result) {56size_t position = *pos;57if (position + sizeof(uint16_t) > size) return BROTLI_FALSE;58*result = BROTLI_UNALIGNED_LOAD16LE(&encoded[position]);59position += 2;60*pos = position;61return BROTLI_TRUE;62}6364/* Reads a varint into a uint32_t, and returns error if it's too large */65/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */66static BROTLI_BOOL ReadVarint32(const uint8_t* encoded, size_t size,67size_t* pos, uint32_t* result) {68int num = 0;69uint8_t byte;70*result = 0;71for (;;) {72if (*pos >= size) return BROTLI_FALSE;73byte = encoded[(*pos)++];74if (num == 4 && byte > 15) return BROTLI_FALSE;75*result |= (uint32_t)(byte & 127) << (num * 7);76if (byte < 128) return BROTLI_TRUE;77num++;78}79}8081/* Returns the total length of word list. */82static size_t BrotliSizeBitsToOffsets(const uint8_t* size_bits_by_length,83uint32_t* offsets_by_length) {84uint32_t pos = 0;85uint32_t i;86for (i = 0; i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; i++) {87offsets_by_length[i] = pos;88if (size_bits_by_length[i] != 0) {89pos += i << size_bits_by_length[i];90}91}92return pos;93}9495static BROTLI_BOOL ParseWordList(size_t size, const uint8_t* encoded,96size_t* pos, BrotliDictionary* out) {97size_t offset;98size_t i;99size_t position = *pos;100if (position + BROTLI_NUM_ENCODED_LENGTHS > size) {101return BROTLI_FALSE;102}103104memset(out->size_bits_by_length, 0, SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH);105memcpy(out->size_bits_by_length + SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH,106&encoded[position], BROTLI_NUM_ENCODED_LENGTHS);107for (i = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;108i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; i++) {109if (out->size_bits_by_length[i] > BROTLI_MAX_SIZE_BITS) {110return BROTLI_FALSE;111}112}113position += BROTLI_NUM_ENCODED_LENGTHS;114offset = BrotliSizeBitsToOffsets(115out->size_bits_by_length, out->offsets_by_length);116117out->data = &encoded[position];118out->data_size = offset;119position += offset;120if (position > size) return BROTLI_FALSE;121*pos = position;122return BROTLI_TRUE;123}124125/* Computes the cutOffTransforms of a BrotliTransforms which already has the126transforms data correctly filled in. */127static void ComputeCutoffTransforms(BrotliTransforms* transforms) {128uint32_t i;129for (i = 0; i < BROTLI_TRANSFORMS_MAX_CUT_OFF + 1; i++) {130transforms->cutOffTransforms[i] = -1;131}132for (i = 0; i < transforms->num_transforms; i++) {133const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, i);134uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, i);135const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, i);136if (type <= BROTLI_TRANSFORM_OMIT_LAST_9 && *prefix == 0 && *suffix == 0 &&137transforms->cutOffTransforms[type] == -1) {138transforms->cutOffTransforms[type] = (int16_t)i;139}140}141}142143static BROTLI_BOOL ParsePrefixSuffixTable(size_t size, const uint8_t* encoded,144size_t* pos, BrotliTransforms* out, uint16_t* out_table,145size_t* out_table_size) {146size_t position = *pos;147size_t offset = 0;148size_t stringlet_count = 0; /* NUM_PREFIX_SUFFIX */149size_t data_length = 0;150151/* PREFIX_SUFFIX_LENGTH */152if (!ReadUint16(encoded, size, &position, &out->prefix_suffix_size)) {153return BROTLI_FALSE;154}155data_length = out->prefix_suffix_size;156157/* Must at least have space for null terminator. */158if (data_length < 1) return BROTLI_FALSE;159out->prefix_suffix = &encoded[position];160if (position + data_length >= size) return BROTLI_FALSE;161while (BROTLI_TRUE) {162/* STRING_LENGTH */163size_t stringlet_len = encoded[position + offset];164out_table[stringlet_count] = (uint16_t)offset;165stringlet_count++;166offset++;167if (stringlet_len == 0) {168if (offset == data_length) {169break;170} else {171return BROTLI_FALSE;172}173}174if (stringlet_count > 255) return BROTLI_FALSE;175offset += stringlet_len;176if (offset >= data_length) return BROTLI_FALSE;177}178179position += data_length;180*pos = position;181*out_table_size = (uint16_t)stringlet_count;182return BROTLI_TRUE;183}184185static BROTLI_BOOL ParseTransformsList(size_t size, const uint8_t* encoded,186size_t* pos, BrotliTransforms* out, uint16_t* prefix_suffix_table,187size_t* prefix_suffix_count) {188uint32_t i;189BROTLI_BOOL has_params = BROTLI_FALSE;190BROTLI_BOOL prefix_suffix_ok = BROTLI_FALSE;191size_t position = *pos;192size_t stringlet_cnt = 0;193if (position >= size) return BROTLI_FALSE;194195prefix_suffix_ok = ParsePrefixSuffixTable(196size, encoded, &position, out, prefix_suffix_table, &stringlet_cnt);197if (!prefix_suffix_ok) return BROTLI_FALSE;198out->prefix_suffix_map = prefix_suffix_table;199*prefix_suffix_count = stringlet_cnt;200201out->num_transforms = encoded[position++];202out->transforms = &encoded[position];203position += (size_t)out->num_transforms * 3;204if (position > size) return BROTLI_FALSE;205/* Check for errors and read extra parameters. */206for (i = 0; i < out->num_transforms; i++) {207uint8_t prefix_id = BROTLI_TRANSFORM_PREFIX_ID(out, i);208uint8_t type = BROTLI_TRANSFORM_TYPE(out, i);209uint8_t suffix_id = BROTLI_TRANSFORM_SUFFIX_ID(out, i);210if (prefix_id >= stringlet_cnt) return BROTLI_FALSE;211if (type >= BROTLI_NUM_TRANSFORM_TYPES) return BROTLI_FALSE;212if (suffix_id >= stringlet_cnt) return BROTLI_FALSE;213if (type == BROTLI_TRANSFORM_SHIFT_FIRST ||214type == BROTLI_TRANSFORM_SHIFT_ALL) {215has_params = BROTLI_TRUE;216}217}218if (has_params) {219out->params = &encoded[position];220position += (size_t)out->num_transforms * 2;221if (position > size) return BROTLI_FALSE;222for (i = 0; i < out->num_transforms; i++) {223uint8_t type = BROTLI_TRANSFORM_TYPE(out, i);224if (type != BROTLI_TRANSFORM_SHIFT_FIRST &&225type != BROTLI_TRANSFORM_SHIFT_ALL) {226if (out->params[i * 2] != 0 || out->params[i * 2 + 1] != 0) {227return BROTLI_FALSE;228}229}230}231} else {232out->params = NULL;233}234ComputeCutoffTransforms(out);235*pos = position;236return BROTLI_TRUE;237}238239static BROTLI_BOOL DryParseDictionary(const uint8_t* encoded,240size_t size, uint32_t* num_prefix, BROTLI_BOOL* is_custom_static_dict) {241size_t pos = 0;242uint32_t chunk_size = 0;243uint8_t num_word_lists;244uint8_t num_transform_lists;245*is_custom_static_dict = BROTLI_FALSE;246*num_prefix = 0;247248/* Skip magic header bytes. */249pos += 2;250251/* LZ77_DICTIONARY_LENGTH */252if (!ReadVarint32(encoded, size, &pos, &chunk_size)) return BROTLI_FALSE;253if (chunk_size != 0) {254/* This limitation is not specified but the 32-bit Brotli decoder for now */255if (chunk_size > 1073741823) return BROTLI_FALSE;256*num_prefix = 1;257if (pos + chunk_size > size) return BROTLI_FALSE;258pos += chunk_size;259}260261if (!ReadUint8(encoded, size, &pos, &num_word_lists)) {262return BROTLI_FALSE;263}264if (!ReadUint8(encoded, size, &pos, &num_transform_lists)) {265return BROTLI_FALSE;266}267268if (num_word_lists > 0 || num_transform_lists > 0) {269*is_custom_static_dict = BROTLI_TRUE;270}271272return BROTLI_TRUE;273}274275static BROTLI_BOOL ParseDictionary(const uint8_t* encoded, size_t size,276BrotliSharedDictionary* dict) {277uint32_t i;278size_t pos = 0;279uint32_t chunk_size = 0;280size_t total_prefix_suffix_count = 0;281size_t trasform_list_start[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];282uint16_t temporary_prefix_suffix_table[256];283284/* Skip magic header bytes. */285pos += 2;286287/* LZ77_DICTIONARY_LENGTH */288if (!ReadVarint32(encoded, size, &pos, &chunk_size)) return BROTLI_FALSE;289if (chunk_size != 0) {290if (pos + chunk_size > size) return BROTLI_FALSE;291dict->prefix_size[dict->num_prefix] = chunk_size;292dict->prefix[dict->num_prefix] = &encoded[pos];293dict->num_prefix++;294/* LZ77_DICTIONARY_LENGTH bytes. */295pos += chunk_size;296}297298/* NUM_WORD_LISTS */299if (!ReadUint8(encoded, size, &pos, &dict->num_word_lists)) {300return BROTLI_FALSE;301}302if (dict->num_word_lists > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {303return BROTLI_FALSE;304}305306if (dict->num_word_lists != 0) {307dict->words_instances = (BrotliDictionary*)dict->alloc_func(308dict->memory_manager_opaque,309dict->num_word_lists * sizeof(*dict->words_instances));310if (!dict->words_instances) return BROTLI_FALSE; /* OOM */311}312for (i = 0; i < dict->num_word_lists; i++) {313if (!ParseWordList(size, encoded, &pos, &dict->words_instances[i])) {314return BROTLI_FALSE;315}316}317318/* NUM_TRANSFORM_LISTS */319if (!ReadUint8(encoded, size, &pos, &dict->num_transform_lists)) {320return BROTLI_FALSE;321}322if (dict->num_transform_lists > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {323return BROTLI_FALSE;324}325326if (dict->num_transform_lists != 0) {327dict->transforms_instances = (BrotliTransforms*)dict->alloc_func(328dict->memory_manager_opaque,329dict->num_transform_lists * sizeof(*dict->transforms_instances));330if (!dict->transforms_instances) return BROTLI_FALSE; /* OOM */331}332for (i = 0; i < dict->num_transform_lists; i++) {333BROTLI_BOOL ok = BROTLI_FALSE;334size_t prefix_suffix_count = 0;335trasform_list_start[i] = pos;336dict->transforms_instances[i].prefix_suffix_map =337temporary_prefix_suffix_table;338ok = ParseTransformsList(339size, encoded, &pos, &dict->transforms_instances[i],340temporary_prefix_suffix_table, &prefix_suffix_count);341if (!ok) return BROTLI_FALSE;342total_prefix_suffix_count += prefix_suffix_count;343}344if (total_prefix_suffix_count != 0) {345dict->prefix_suffix_maps = (uint16_t*)dict->alloc_func(346dict->memory_manager_opaque,347total_prefix_suffix_count * sizeof(*dict->prefix_suffix_maps));348if (!dict->prefix_suffix_maps) return BROTLI_FALSE; /* OOM */349}350total_prefix_suffix_count = 0;351for (i = 0; i < dict->num_transform_lists; i++) {352size_t prefix_suffix_count = 0;353size_t position = trasform_list_start[i];354uint16_t* prefix_suffix_map =355&dict->prefix_suffix_maps[total_prefix_suffix_count];356BROTLI_BOOL ok = ParsePrefixSuffixTable(357size, encoded, &position, &dict->transforms_instances[i],358prefix_suffix_map, &prefix_suffix_count);359if (!ok) return BROTLI_FALSE;360dict->transforms_instances[i].prefix_suffix_map = prefix_suffix_map;361total_prefix_suffix_count += prefix_suffix_count;362}363364if (dict->num_word_lists != 0 || dict->num_transform_lists != 0) {365if (!ReadUint8(encoded, size, &pos, &dict->num_dictionaries)) {366return BROTLI_FALSE;367}368if (dict->num_dictionaries == 0 ||369dict->num_dictionaries > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {370return BROTLI_FALSE;371}372for (i = 0; i < dict->num_dictionaries; i++) {373uint8_t words_index;374uint8_t transforms_index;375if (!ReadUint8(encoded, size, &pos, &words_index)) {376return BROTLI_FALSE;377}378if (words_index > dict->num_word_lists) return BROTLI_FALSE;379if (!ReadUint8(encoded, size, &pos, &transforms_index)) {380return BROTLI_FALSE;381}382if (transforms_index > dict->num_transform_lists) return BROTLI_FALSE;383dict->words[i] = words_index == dict->num_word_lists ?384BrotliGetDictionary() : &dict->words_instances[words_index];385dict->transforms[i] = transforms_index == dict->num_transform_lists ?386BrotliGetTransforms(): &dict->transforms_instances[transforms_index];387}388/* CONTEXT_ENABLED */389if (!ReadBool(encoded, size, &pos, &dict->context_based)) {390return BROTLI_FALSE;391}392393/* CONTEXT_MAP */394if (dict->context_based) {395for (i = 0; i < SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS; i++) {396if (!ReadUint8(encoded, size, &pos, &dict->context_map[i])) {397return BROTLI_FALSE;398}399if (dict->context_map[i] >= dict->num_dictionaries) {400return BROTLI_FALSE;401}402}403}404} else {405dict->context_based = BROTLI_FALSE;406dict->num_dictionaries = 1;407dict->words[0] = BrotliGetDictionary();408dict->transforms[0] = BrotliGetTransforms();409}410411return BROTLI_TRUE;412}413414/* Decodes shared dictionary and verifies correctness.415Returns BROTLI_TRUE if dictionary is valid, BROTLI_FALSE otherwise.416The BrotliSharedDictionary must already have been initialized. If the417BrotliSharedDictionary already contains data, compound dictionaries418will be appended, but an error will be returned if it already has419custom words or transforms.420TODO(lode): link to RFC for shared brotli once published. */421static BROTLI_BOOL DecodeSharedDictionary(422const uint8_t* encoded, size_t size, BrotliSharedDictionary* dict) {423uint32_t num_prefix = 0;424BROTLI_BOOL is_custom_static_dict = BROTLI_FALSE;425BROTLI_BOOL has_custom_static_dict =426dict->num_word_lists > 0 || dict->num_transform_lists > 0;427428/* Check magic header bytes. */429if (size < 2) return BROTLI_FALSE;430if (encoded[0] != 0x91 || encoded[1] != 0) return BROTLI_FALSE;431432if (!DryParseDictionary(encoded, size, &num_prefix, &is_custom_static_dict)) {433return BROTLI_FALSE;434}435436if (num_prefix + dict->num_prefix > SHARED_BROTLI_MAX_COMPOUND_DICTS) {437return BROTLI_FALSE;438}439440/* Cannot combine different static dictionaries, only prefix dictionaries */441if (has_custom_static_dict && is_custom_static_dict) return BROTLI_FALSE;442443return ParseDictionary(encoded, size, dict);444}445446#endif /* BROTLI_EXPERIMENTAL */447448void BrotliSharedDictionaryDestroyInstance(449BrotliSharedDictionary* dict) {450if (!dict) {451return;452} else {453brotli_free_func free_func = dict->free_func;454void* opaque = dict->memory_manager_opaque;455/* Cleanup. */456free_func(opaque, dict->words_instances);457free_func(opaque, dict->transforms_instances);458free_func(opaque, dict->prefix_suffix_maps);459/* Self-destruction. */460free_func(opaque, dict);461}462}463464BROTLI_BOOL BrotliSharedDictionaryAttach(465BrotliSharedDictionary* dict, BrotliSharedDictionaryType type,466size_t data_size, const uint8_t data[BROTLI_ARRAY_PARAM(data_size)]) {467if (!dict) {468return BROTLI_FALSE;469}470#if defined(BROTLI_EXPERIMENTAL)471if (type == BROTLI_SHARED_DICTIONARY_SERIALIZED) {472return DecodeSharedDictionary(data, data_size, dict);473}474#endif /* BROTLI_EXPERIMENTAL */475if (type == BROTLI_SHARED_DICTIONARY_RAW) {476if (dict->num_prefix >= SHARED_BROTLI_MAX_COMPOUND_DICTS) {477return BROTLI_FALSE;478}479dict->prefix_size[dict->num_prefix] = data_size;480dict->prefix[dict->num_prefix] = data;481dict->num_prefix++;482return BROTLI_TRUE;483}484return BROTLI_FALSE;485}486487BrotliSharedDictionary* BrotliSharedDictionaryCreateInstance(488brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {489BrotliSharedDictionary* dict = 0;490if (!alloc_func && !free_func) {491dict = (BrotliSharedDictionary*)malloc(sizeof(BrotliSharedDictionary));492} else if (alloc_func && free_func) {493dict = (BrotliSharedDictionary*)alloc_func(494opaque, sizeof(BrotliSharedDictionary));495}496if (dict == 0) {497return 0;498}499500/* TODO(eustas): explicitly initialize all the fields? */501memset(dict, 0, sizeof(BrotliSharedDictionary));502503dict->context_based = BROTLI_FALSE;504dict->num_dictionaries = 1;505dict->num_word_lists = 0;506dict->num_transform_lists = 0;507508dict->words[0] = BrotliGetDictionary();509dict->transforms[0] = BrotliGetTransforms();510511dict->alloc_func = alloc_func ? alloc_func : BrotliDefaultAllocFunc;512dict->free_func = free_func ? free_func : BrotliDefaultFreeFunc;513dict->memory_manager_opaque = opaque;514515return dict;516}517518#if defined(__cplusplus) || defined(c_plusplus)519} /* extern "C" */520#endif521522523