Path: blob/main/contrib/libcbor/src/cbor/internal/unicode.c
39566 views
/*1* Copyright (c) 2014-2020 Pavel Kalvoda <[email protected]>2*3* libcbor is free software; you can redistribute it and/or modify4* it under the terms of the MIT license. See LICENSE for details.5*/67#include "unicode.h"8#include <stdint.h>910#define UTF8_ACCEPT 011#define UTF8_REJECT 11213static const uint8_t utf8d[] = {140, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,150, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,160, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00..1f */170, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,180, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,190, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 20..3f */200, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,220, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40..5f */230, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,240, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,250, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60..7f */261, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,271, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9,289, 9, 9, 9, 9, 9, 9, 9, 9, 9, /* 80..9f */297, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,307, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,317, 7, 7, 7, 7, 7, 7, 7, 7, 7, /* a0..bf */328, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2,332, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,342, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* c0..df */350xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3,360x3, 0x3, 0x4, 0x3, 0x3, /* e0..ef */370xb, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,380x8, 0x8, 0x8, 0x8, 0x8, /* f0..ff */390x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, 0x1, 0x1, 0x4,400x6, 0x1, 0x1, 0x1, 0x1, /* s0..s0 */411, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,421, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,431, 0, 1, 0, 1, 1, 1, 1, 1, 1, /* s1..s2 */441, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1,451, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,461, 2, 1, 1, 1, 1, 1, 1, 1, 1, /* s3..s4 */471, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1,481, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,491, 3, 1, 3, 1, 1, 1, 1, 1, 1, /* s5..s6 */501, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1,511, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1,521, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* s7..s8 */53};5455/* Copyright of this function: (c) 2008-2009 Bjoern Hoehrmann56* <[email protected]> */57/* See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. */58uint32_t _cbor_unicode_decode(uint32_t* state, uint32_t* codep, uint32_t byte) {59uint32_t type = utf8d[byte];6061*codep = (*state != UTF8_ACCEPT) ? (byte & 0x3fu) | (*codep << 6)62: (0xff >> type) & (byte);6364*state = utf8d[256 + *state * 16 + type];65return *state;66}6768size_t _cbor_unicode_codepoint_count(cbor_data source, size_t source_length,69struct _cbor_unicode_status* status) {70*status =71(struct _cbor_unicode_status){.location = 0, .status = _CBOR_UNICODE_OK};72uint32_t codepoint, state = UTF8_ACCEPT, res;73size_t pos = 0, count = 0;7475for (; pos < source_length; pos++) {76res = _cbor_unicode_decode(&state, &codepoint, source[pos]);7778if (res == UTF8_ACCEPT) {79count++;80} else if (res == UTF8_REJECT) {81goto error;82}83}8485/* Unfinished multibyte codepoint */86if (state != UTF8_ACCEPT) goto error;8788return count;8990error:91*status = (struct _cbor_unicode_status){.location = pos,92.status = _CBOR_UNICODE_BADCP};93return 0;94}959697