Path: blob/master/compat/jansson/load.c
1299 views
/*1* Copyright (c) 2009-2013 Petri Lehtinen <[email protected]>2*3* Jansson is free software; you can redistribute it and/or modify4* it under the terms of the MIT license. See LICENSE for details.5*/67#ifndef _GNU_SOURCE8#define _GNU_SOURCE9#endif1011#include <errno.h>12#include <limits.h>13#include <stdio.h>14#include <stdlib.h>15#include <string.h>16#include <assert.h>1718#include "jansson.h"19#include "jansson_private.h"20#include "strbuffer.h"21#include "utf.h"2223#define STREAM_STATE_OK 024#define STREAM_STATE_EOF -125#define STREAM_STATE_ERROR -22627#define TOKEN_INVALID -128#define TOKEN_EOF 029#define TOKEN_STRING 25630#define TOKEN_INTEGER 25731#define TOKEN_REAL 25832#define TOKEN_TRUE 25933#define TOKEN_FALSE 26034#define TOKEN_NULL 2613536/* Locale independent versions of isxxx() functions */37#define l_isupper(c) ('A' <= (c) && (c) <= 'Z')38#define l_islower(c) ('a' <= (c) && (c) <= 'z')39#define l_isalpha(c) (l_isupper(c) || l_islower(c))40#define l_isdigit(c) ('0' <= (c) && (c) <= '9')41#define l_isxdigit(c) \42(l_isdigit(c) || ('A' <= (c) && (c) <= 'F') || ('a' <= (c) && (c) <= 'f'))4344/* Read one byte from stream, convert to unsigned char, then int, and45return. return EOF on end of file. This corresponds to the46behaviour of fgetc(). */47typedef int (*get_func)(void *data);4849typedef struct {50get_func get;51void *data;52char buffer[5];53size_t buffer_pos;54int state;55int line;56int column, last_column;57size_t position;58} stream_t;5960typedef struct {61stream_t stream;62strbuffer_t saved_text;63int token;64union {65char *string;66json_int_t integer;67double real;68} value;69} lex_t;7071#define stream_to_lex(stream) container_of(stream, lex_t, stream)727374/*** error reporting ***/7576static void error_set(json_error_t *error, const lex_t *lex,77const char *msg, ...)78{79va_list ap;80char msg_text[JSON_ERROR_TEXT_LENGTH];81char msg_with_context[JSON_ERROR_TEXT_LENGTH];8283int line = -1, col = -1;84size_t pos = 0;85const char *result = msg_text;8687if(!error)88return;8990va_start(ap, msg);91vsnprintf(msg_text, JSON_ERROR_TEXT_LENGTH, msg, ap);92msg_text[JSON_ERROR_TEXT_LENGTH - 1] = '\0';93va_end(ap);9495if(lex)96{97const char *saved_text = strbuffer_value(&lex->saved_text);9899line = lex->stream.line;100col = lex->stream.column;101pos = lex->stream.position;102103if(saved_text && saved_text[0])104{105if(lex->saved_text.length <= 20) {106snprintf(msg_with_context, JSON_ERROR_TEXT_LENGTH,107"%s near '%s'", msg_text, saved_text);108msg_with_context[JSON_ERROR_TEXT_LENGTH - 1] = '\0';109result = msg_with_context;110}111}112else113{114if(lex->stream.state == STREAM_STATE_ERROR) {115/* No context for UTF-8 decoding errors */116result = msg_text;117}118else {119snprintf(msg_with_context, JSON_ERROR_TEXT_LENGTH,120"%s near end of file", msg_text);121msg_with_context[JSON_ERROR_TEXT_LENGTH - 1] = '\0';122result = msg_with_context;123}124}125}126127jsonp_error_set(error, line, col, pos, "%s", result);128}129130131/*** lexical analyzer ***/132133static void134stream_init(stream_t *stream, get_func get, void *data)135{136stream->get = get;137stream->data = data;138stream->buffer[0] = '\0';139stream->buffer_pos = 0;140141stream->state = STREAM_STATE_OK;142stream->line = 1;143stream->column = 0;144stream->position = 0;145}146147static int stream_get(stream_t *stream, json_error_t *error)148{149int c;150151if(stream->state != STREAM_STATE_OK)152return stream->state;153154if(!stream->buffer[stream->buffer_pos])155{156c = stream->get(stream->data);157if(c == EOF) {158stream->state = STREAM_STATE_EOF;159return STREAM_STATE_EOF;160}161162stream->buffer[0] = c;163stream->buffer_pos = 0;164165if(0x80 <= c && c <= 0xFF)166{167/* multi-byte UTF-8 sequence */168int i, count;169170count = utf8_check_first(c);171if(!count)172goto out;173174assert(count >= 2);175176for(i = 1; i < count; i++)177stream->buffer[i] = stream->get(stream->data);178179if(!utf8_check_full(stream->buffer, count, NULL))180goto out;181182stream->buffer[count] = '\0';183}184else185stream->buffer[1] = '\0';186}187188c = stream->buffer[stream->buffer_pos++];189190stream->position++;191if(c == '\n') {192stream->line++;193stream->last_column = stream->column;194stream->column = 0;195}196else if(utf8_check_first(c)) {197/* track the Unicode character column, so increment only if198this is the first character of a UTF-8 sequence */199stream->column++;200}201202return c;203204out:205stream->state = STREAM_STATE_ERROR;206error_set(error, stream_to_lex(stream), "unable to decode byte 0x%x", c);207return STREAM_STATE_ERROR;208}209210static void stream_unget(stream_t *stream, int c)211{212if(c == STREAM_STATE_EOF || c == STREAM_STATE_ERROR)213return;214215stream->position--;216if(c == '\n') {217stream->line--;218stream->column = stream->last_column;219}220else if(utf8_check_first(c))221stream->column--;222223assert(stream->buffer_pos > 0);224stream->buffer_pos--;225assert(stream->buffer[stream->buffer_pos] == c);226}227228229static int lex_get(lex_t *lex, json_error_t *error)230{231return stream_get(&lex->stream, error);232}233234static void lex_save(lex_t *lex, int c)235{236strbuffer_append_byte(&lex->saved_text, c);237}238239static int lex_get_save(lex_t *lex, json_error_t *error)240{241int c = stream_get(&lex->stream, error);242if(c != STREAM_STATE_EOF && c != STREAM_STATE_ERROR)243lex_save(lex, c);244return c;245}246247static void lex_unget(lex_t *lex, int c)248{249stream_unget(&lex->stream, c);250}251252static void lex_unget_unsave(lex_t *lex, int c)253{254if(c != STREAM_STATE_EOF && c != STREAM_STATE_ERROR) {255/* Since we treat warnings as errors, when assertions are turned256* off the "d" variable would be set but never used. Which is257* treated as an error by GCC.258*/259#ifndef NDEBUG260char d;261#endif262stream_unget(&lex->stream, c);263#ifndef NDEBUG264d =265#endif266strbuffer_pop(&lex->saved_text);267assert(c == d);268}269}270271static void lex_save_cached(lex_t *lex)272{273while(lex->stream.buffer[lex->stream.buffer_pos] != '\0')274{275lex_save(lex, lex->stream.buffer[lex->stream.buffer_pos]);276lex->stream.buffer_pos++;277lex->stream.position++;278}279}280281/* assumes that str points to 'u' plus at least 4 valid hex digits */282static int32_t decode_unicode_escape(const char *str)283{284int i;285int32_t value = 0;286287assert(str[0] == 'u');288289for(i = 1; i <= 4; i++) {290char c = str[i];291value <<= 4;292if(l_isdigit(c))293value += c - '0';294else if(l_islower(c))295value += c - 'a' + 10;296else if(l_isupper(c))297value += c - 'A' + 10;298else299assert(0);300}301302return value;303}304305static void lex_scan_string(lex_t *lex, json_error_t *error)306{307int c;308const char *p;309char *t;310int i;311312lex->value.string = NULL;313lex->token = TOKEN_INVALID;314315c = lex_get_save(lex, error);316317while(c != '"') {318if(c == STREAM_STATE_ERROR)319goto out;320321else if(c == STREAM_STATE_EOF) {322error_set(error, lex, "premature end of input");323goto out;324}325326else if(0 <= c && c <= 0x1F) {327/* control character */328lex_unget_unsave(lex, c);329if(c == '\n')330error_set(error, lex, "unexpected newline", c);331else332error_set(error, lex, "control character 0x%x", c);333goto out;334}335336else if(c == '\\') {337c = lex_get_save(lex, error);338if(c == 'u') {339c = lex_get_save(lex, error);340for(i = 0; i < 4; i++) {341if(!l_isxdigit(c)) {342error_set(error, lex, "invalid escape");343goto out;344}345c = lex_get_save(lex, error);346}347}348else if(c == '"' || c == '\\' || c == '/' || c == 'b' ||349c == 'f' || c == 'n' || c == 'r' || c == 't')350c = lex_get_save(lex, error);351else {352error_set(error, lex, "invalid escape");353goto out;354}355}356else357c = lex_get_save(lex, error);358}359360/* the actual value is at most of the same length as the source361string, because:362- shortcut escapes (e.g. "\t") (length 2) are converted to 1 byte363- a single \uXXXX escape (length 6) is converted to at most 3 bytes364- two \uXXXX escapes (length 12) forming an UTF-16 surrogate pair365are converted to 4 bytes366*/367lex->value.string = jsonp_malloc(lex->saved_text.length + 1);368if(!lex->value.string) {369/* this is not very nice, since TOKEN_INVALID is returned */370goto out;371}372373/* the target */374t = lex->value.string;375376/* + 1 to skip the " */377p = strbuffer_value(&lex->saved_text) + 1;378379while(*p != '"') {380if(*p == '\\') {381p++;382if(*p == 'u') {383char buffer[4];384int length;385int32_t value;386387value = decode_unicode_escape(p);388p += 5;389390if(0xD800 <= value && value <= 0xDBFF) {391/* surrogate pair */392if(*p == '\\' && *(p + 1) == 'u') {393int32_t value2 = decode_unicode_escape(++p);394p += 5;395396if(0xDC00 <= value2 && value2 <= 0xDFFF) {397/* valid second surrogate */398value =399((value - 0xD800) << 10) +400(value2 - 0xDC00) +4010x10000;402}403else {404/* invalid second surrogate */405error_set(error, lex,406"invalid Unicode '\\u%04X\\u%04X'",407value, value2);408goto out;409}410}411else {412/* no second surrogate */413error_set(error, lex, "invalid Unicode '\\u%04X'",414value);415goto out;416}417}418else if(0xDC00 <= value && value <= 0xDFFF) {419error_set(error, lex, "invalid Unicode '\\u%04X'", value);420goto out;421}422else if(value == 0)423{424error_set(error, lex, "\\u0000 is not allowed");425goto out;426}427428if(utf8_encode(value, buffer, &length))429assert(0);430431memcpy(t, buffer, length);432t += length;433}434else {435switch(*p) {436case '"': case '\\': case '/':437*t = *p; break;438case 'b': *t = '\b'; break;439case 'f': *t = '\f'; break;440case 'n': *t = '\n'; break;441case 'r': *t = '\r'; break;442case 't': *t = '\t'; break;443default: assert(0);444}445t++;446p++;447}448}449else450*(t++) = *(p++);451}452*t = '\0';453lex->token = TOKEN_STRING;454return;455456out:457jsonp_free(lex->value.string);458}459460#ifndef JANSSON_USING_CMAKE /* disabled if using cmake */461#if JSON_INTEGER_IS_LONG_LONG462#ifdef _MSC_VER /* Microsoft Visual Studio */463#define json_strtoint _strtoi64464#else465#define json_strtoint strtoll466#endif467#else468#define json_strtoint strtol469#endif470#endif471472static int lex_scan_number(lex_t *lex, int c, json_error_t *error)473{474const char *saved_text;475char *end;476double value;477478lex->token = TOKEN_INVALID;479480if(c == '-')481c = lex_get_save(lex, error);482483if(c == '0') {484c = lex_get_save(lex, error);485if(l_isdigit(c)) {486lex_unget_unsave(lex, c);487goto out;488}489}490else if(l_isdigit(c)) {491c = lex_get_save(lex, error);492while(l_isdigit(c))493c = lex_get_save(lex, error);494}495else {496lex_unget_unsave(lex, c);497goto out;498}499500if(c != '.' && c != 'E' && c != 'e') {501json_int_t value;502503lex_unget_unsave(lex, c);504505saved_text = strbuffer_value(&lex->saved_text);506507errno = 0;508value = json_strtoint(saved_text, &end, 10);509if(errno == ERANGE) {510if(value < 0)511error_set(error, lex, "too big negative integer");512else513error_set(error, lex, "too big integer");514goto out;515}516517assert(end == saved_text + lex->saved_text.length);518519lex->token = TOKEN_INTEGER;520lex->value.integer = value;521return 0;522}523524if(c == '.') {525c = lex_get(lex, error);526if(!l_isdigit(c)) {527lex_unget(lex, c);528goto out;529}530lex_save(lex, c);531532c = lex_get_save(lex, error);533while(l_isdigit(c))534c = lex_get_save(lex, error);535}536537if(c == 'E' || c == 'e') {538c = lex_get_save(lex, error);539if(c == '+' || c == '-')540c = lex_get_save(lex, error);541542if(!l_isdigit(c)) {543lex_unget_unsave(lex, c);544goto out;545}546547c = lex_get_save(lex, error);548while(l_isdigit(c))549c = lex_get_save(lex, error);550}551552lex_unget_unsave(lex, c);553554if(jsonp_strtod(&lex->saved_text, &value)) {555error_set(error, lex, "real number overflow");556goto out;557}558559lex->token = TOKEN_REAL;560lex->value.real = value;561return 0;562563out:564return -1;565}566567static int lex_scan(lex_t *lex, json_error_t *error)568{569int c;570571strbuffer_clear(&lex->saved_text);572573if(lex->token == TOKEN_STRING) {574jsonp_free(lex->value.string);575lex->value.string = NULL;576}577578c = lex_get(lex, error);579while(c == ' ' || c == '\t' || c == '\n' || c == '\r')580c = lex_get(lex, error);581582if(c == STREAM_STATE_EOF) {583lex->token = TOKEN_EOF;584goto out;585}586587if(c == STREAM_STATE_ERROR) {588lex->token = TOKEN_INVALID;589goto out;590}591592lex_save(lex, c);593594if(c == '{' || c == '}' || c == '[' || c == ']' || c == ':' || c == ',')595lex->token = c;596597else if(c == '"')598lex_scan_string(lex, error);599600else if(l_isdigit(c) || c == '-') {601if(lex_scan_number(lex, c, error))602goto out;603}604605else if(l_isalpha(c)) {606/* eat up the whole identifier for clearer error messages */607const char *saved_text;608609c = lex_get_save(lex, error);610while(l_isalpha(c))611c = lex_get_save(lex, error);612lex_unget_unsave(lex, c);613614saved_text = strbuffer_value(&lex->saved_text);615616if(strcmp(saved_text, "true") == 0)617lex->token = TOKEN_TRUE;618else if(strcmp(saved_text, "false") == 0)619lex->token = TOKEN_FALSE;620else if(strcmp(saved_text, "null") == 0)621lex->token = TOKEN_NULL;622else623lex->token = TOKEN_INVALID;624}625626else {627/* save the rest of the input UTF-8 sequence to get an error628message of valid UTF-8 */629lex_save_cached(lex);630lex->token = TOKEN_INVALID;631}632633out:634return lex->token;635}636637static char *lex_steal_string(lex_t *lex)638{639char *result = NULL;640if(lex->token == TOKEN_STRING)641{642result = lex->value.string;643lex->value.string = NULL;644}645return result;646}647648static int lex_init(lex_t *lex, get_func get, void *data)649{650stream_init(&lex->stream, get, data);651if(strbuffer_init(&lex->saved_text))652return -1;653654lex->token = TOKEN_INVALID;655return 0;656}657658static void lex_close(lex_t *lex)659{660if(lex->token == TOKEN_STRING)661jsonp_free(lex->value.string);662strbuffer_close(&lex->saved_text);663}664665666/*** parser ***/667668static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error);669670static json_t *parse_object(lex_t *lex, size_t flags, json_error_t *error)671{672json_t *object = json_object();673if(!object)674return NULL;675676lex_scan(lex, error);677if(lex->token == '}')678return object;679680while(1) {681char *key;682json_t *value;683684if(lex->token != TOKEN_STRING) {685error_set(error, lex, "string or '}' expected");686goto error;687}688689key = lex_steal_string(lex);690if(!key)691return NULL;692693if(flags & JSON_REJECT_DUPLICATES) {694if(json_object_get(object, key)) {695jsonp_free(key);696error_set(error, lex, "duplicate object key");697goto error;698}699}700701lex_scan(lex, error);702if(lex->token != ':') {703jsonp_free(key);704error_set(error, lex, "':' expected");705goto error;706}707708lex_scan(lex, error);709value = parse_value(lex, flags, error);710if(!value) {711jsonp_free(key);712goto error;713}714715if(json_object_set_nocheck(object, key, value)) {716jsonp_free(key);717json_decref(value);718goto error;719}720721json_decref(value);722jsonp_free(key);723724lex_scan(lex, error);725if(lex->token != ',')726break;727728lex_scan(lex, error);729}730731if(lex->token != '}') {732error_set(error, lex, "'}' expected");733goto error;734}735736return object;737738error:739json_decref(object);740return NULL;741}742743static json_t *parse_array(lex_t *lex, size_t flags, json_error_t *error)744{745json_t *array = json_array();746if(!array)747return NULL;748749lex_scan(lex, error);750if(lex->token == ']')751return array;752753while(lex->token) {754json_t *elem = parse_value(lex, flags, error);755if(!elem)756goto error;757758if(json_array_append(array, elem)) {759json_decref(elem);760goto error;761}762json_decref(elem);763764lex_scan(lex, error);765if(lex->token != ',')766break;767768lex_scan(lex, error);769}770771if(lex->token != ']') {772error_set(error, lex, "']' expected");773goto error;774}775776return array;777778error:779json_decref(array);780return NULL;781}782783static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error)784{785json_t *json;786double value;787788switch(lex->token) {789case TOKEN_STRING: {790json = json_string_nocheck(lex->value.string);791break;792}793794case TOKEN_INTEGER: {795if (flags & JSON_DECODE_INT_AS_REAL) {796if(jsonp_strtod(&lex->saved_text, &value)) {797error_set(error, lex, "real number overflow");798return NULL;799}800json = json_real(value);801} else {802json = json_integer(lex->value.integer);803}804break;805}806807case TOKEN_REAL: {808json = json_real(lex->value.real);809break;810}811812case TOKEN_TRUE:813json = json_true();814break;815816case TOKEN_FALSE:817json = json_false();818break;819820case TOKEN_NULL:821json = json_null();822break;823824case '{':825json = parse_object(lex, flags, error);826break;827828case '[':829json = parse_array(lex, flags, error);830break;831832case TOKEN_INVALID:833error_set(error, lex, "invalid token");834return NULL;835836default:837error_set(error, lex, "unexpected token");838return NULL;839}840841if(!json)842return NULL;843844return json;845}846847static json_t *parse_json(lex_t *lex, size_t flags, json_error_t *error)848{849json_t *result;850851lex_scan(lex, error);852if(!(flags & JSON_DECODE_ANY)) {853if(lex->token != '[' && lex->token != '{') {854error_set(error, lex, "'[' or '{' expected");855return NULL;856}857}858859result = parse_value(lex, flags, error);860if(!result)861return NULL;862863if(!(flags & JSON_DISABLE_EOF_CHECK)) {864lex_scan(lex, error);865if(lex->token != TOKEN_EOF) {866error_set(error, lex, "end of file expected");867json_decref(result);868return NULL;869}870}871872if(error) {873/* Save the position even though there was no error */874error->position = lex->stream.position;875}876877return result;878}879880typedef struct881{882const char *data;883int pos;884} string_data_t;885886static int string_get(void *data)887{888char c;889string_data_t *stream = (string_data_t *)data;890c = stream->data[stream->pos];891if(c == '\0')892return EOF;893else894{895stream->pos++;896return (unsigned char)c;897}898}899900json_t *json_loads(const char *string, size_t flags, json_error_t *error)901{902lex_t lex;903json_t *result;904string_data_t stream_data;905906jsonp_error_init(error, "<string>");907908if (string == NULL) {909error_set(error, NULL, "wrong arguments");910return NULL;911}912913stream_data.data = string;914stream_data.pos = 0;915916if(lex_init(&lex, string_get, (void *)&stream_data))917return NULL;918919result = parse_json(&lex, flags, error);920921lex_close(&lex);922return result;923}924925typedef struct926{927const char *data;928size_t len;929size_t pos;930} buffer_data_t;931932static int buffer_get(void *data)933{934char c;935buffer_data_t *stream = data;936if(stream->pos >= stream->len)937return EOF;938939c = stream->data[stream->pos];940stream->pos++;941return (unsigned char)c;942}943944json_t *json_loadb(const char *buffer, size_t buflen, size_t flags, json_error_t *error)945{946lex_t lex;947json_t *result;948buffer_data_t stream_data;949950jsonp_error_init(error, "<buffer>");951952if (buffer == NULL) {953error_set(error, NULL, "wrong arguments");954return NULL;955}956957stream_data.data = buffer;958stream_data.pos = 0;959stream_data.len = buflen;960961if(lex_init(&lex, buffer_get, (void *)&stream_data))962return NULL;963964result = parse_json(&lex, flags, error);965966lex_close(&lex);967return result;968}969970json_t *json_loadf(FILE *input, size_t flags, json_error_t *error)971{972lex_t lex;973const char *source;974json_t *result;975976if(input == stdin)977source = "<stdin>";978else979source = "<stream>";980981jsonp_error_init(error, source);982983if (input == NULL) {984error_set(error, NULL, "wrong arguments");985return NULL;986}987988if(lex_init(&lex, (get_func)fgetc, input))989return NULL;990991result = parse_json(&lex, flags, error);992993lex_close(&lex);994return result;995}996997json_t *json_load_file(const char *path, size_t flags, json_error_t *error)998{999json_t *result;1000FILE *fp;10011002jsonp_error_init(error, path);10031004if (path == NULL) {1005error_set(error, NULL, "wrong arguments");1006return NULL;1007}10081009fp = fopen(path, "rb");1010if(!fp)1011{1012error_set(error, NULL, "unable to open %s: %s",1013path, strerror(errno));1014return NULL;1015}10161017result = json_loadf(fp, flags, error);10181019fclose(fp);1020return result;1021}10221023#define MAX_BUF_LEN 102410241025typedef struct1026{1027char data[MAX_BUF_LEN];1028size_t len;1029size_t pos;1030json_load_callback_t callback;1031void *arg;1032} callback_data_t;10331034static int callback_get(void *data)1035{1036char c;1037callback_data_t *stream = data;10381039if(stream->pos >= stream->len) {1040stream->pos = 0;1041stream->len = stream->callback(stream->data, MAX_BUF_LEN, stream->arg);1042if(stream->len == 0 || stream->len == (size_t)-1)1043return EOF;1044}10451046c = stream->data[stream->pos];1047stream->pos++;1048return (unsigned char)c;1049}10501051json_t *json_load_callback(json_load_callback_t callback, void *arg, size_t flags, json_error_t *error)1052{1053lex_t lex;1054json_t *result;10551056callback_data_t stream_data;10571058memset(&stream_data, 0, sizeof(stream_data));1059stream_data.callback = callback;1060stream_data.arg = arg;10611062jsonp_error_init(error, "<callback>");10631064if (callback == NULL) {1065error_set(error, NULL, "wrong arguments");1066return NULL;1067}10681069if(lex_init(&lex, (get_func)callback_get, &stream_data))1070return NULL;10711072result = parse_json(&lex, flags, error);10731074lex_close(&lex);1075return result;1076}107710781079