Path: blob/trunk/third_party/closure/goog/net/streams/jsonstreamparser.js
1865 views
// Copyright 2015 The Closure Library Authors. All Rights Reserved.1//2// Licensed under the Apache License, Version 2.0 (the "License");3// you may not use this file except in compliance with the License.4// You may obtain a copy of the License at5//6// http://www.apache.org/licenses/LICENSE-2.07//8// Unless required by applicable law or agreed to in writing, software9// distributed under the License is distributed on an "AS-IS" BASIS,10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.11// See the License for the specific language governing permissions and12// limitations under the License.1314/**15* @fileoverview the default JSON stream parser.16*17* The default JSON parser decodes the input stream (string) under the18* following rules:19* 1. The stream represents a valid JSON array (must start with a "[" and close20* with the corresponding "]"). Each element of this array is assumed to be21* either an array or an object, and will be decoded as a JS object and22* delivered. Compact array format that is not valid JSON is also supported,23* e.g. [1,,2].24* 2. All JSON elements in the buffer will be decoded and delivered in a batch.25* 3. If a high-level API does not support batch delivery (e.g. grpc), then26* a wrapper is expected to deliver individual elements separately27* and in order.28* 4. The parser is expected to drop any data (without breaking the29* specified MIME format) that is not visible to the client: e.g. new lines30* for pretty printing; no-op data for keep-alive support.31* 5. Fail-fast: any invalid content should abort the stream by setting the32* state of the parser to "invalid".33*34* The parser is a streamed JSON parser and is optimized in such a way35* that it only scans the message boundary and the actual decoding of JSON36* strings and construction of JS object are done by JSON.parse (native37* code).38*/3940goog.provide('goog.net.streams.JsonStreamParser');41goog.provide('goog.net.streams.JsonStreamParser.Options');4243goog.require('goog.asserts');44goog.require('goog.json');45goog.require('goog.net.streams.StreamParser');46goog.require('goog.net.streams.utils');474849goog.scope(function() {505152var utils = goog.module.get('goog.net.streams.utils');535455/**56* The default JSON stream parser.57*58* @param {!goog.net.streams.JsonStreamParser.Options=} opt_options59* Configuration for the new JsonStreamParser instance.60* @constructor61* @struct62* @implements {goog.net.streams.StreamParser}63* @final64* @package65*/66goog.net.streams.JsonStreamParser = function(opt_options) {67/**68* The current error message, if any.69* @private {?string}70*/71this.errorMessage_ = null;7273/**74* The currently buffered result (parsed JSON objects).75* @private {!Array<string|!Object>}76*/77this.result_ = [];7879/**80* The currently buffered input.81* @private {string}82*/83this.buffer_ = '';8485/**86* The current stack.87* @private {!Array<!Parser.State_>}88*/89this.stack_ = [];9091/**92* The current depth of the nested JSON structure.93* @private {number}94*/95this.depth_ = 0;9697/**98* The current position in the streamed data.99* @private {number}100*/101this.pos_ = 0;102103/**104* The current state of whether the parser is decoding a '\' escaped string.105* @private {boolean}106*/107this.slashed_ = false;108109/**110* The current unicode char count. 0 means no unicode, 1-4 otherwise.111* @private {number}112*/113this.unicodeCount_ = 0;114115/**116* The regexp for parsing string input.117* @private {!RegExp}118*/119this.stringInputPattern_ = /[\\"]/g;120121/**122* The current stream state.123* @private {goog.net.streams.JsonStreamParser.StreamState_}124*/125this.streamState_ = Parser.StreamState_.INIT;126127/**128* The current parser state.129* @private {goog.net.streams.JsonStreamParser.State_}130*/131this.state_ = Parser.State_.INIT;132133/**134* Whether allows compact JSON array format, e.g. "[1, ,2]".135* @private {boolean}136*/137this.allowCompactJsonArrayFormat_ =138!!(opt_options && opt_options.allowCompactJsonArrayFormat);139140/**141* Whether to deliver the raw message string without decoding into JS object.142* @private {boolean}143*/144this.deliverMessageAsRawString_ =145!!(opt_options && opt_options.deliverMessageAsRawString);146};147148149/**150* Configuration spec for newly created JSON stream parser:151*152* allowCompactJsonArrayFormat: whether allows compact JSON array format, where153* null is represented as empty string, e.g. "[1, ,2]".154*155* deliverMessageAsRawString: whether to deliver the raw message string without156* decoding into JS object. Semantically insignificant whitespaces in the157* input may be kept or ignored.158*159* @typedef {{160* allowCompactJsonArrayFormat: (boolean|undefined),161* deliverMessageAsRawString: (boolean|undefined),162* }}163*/164goog.net.streams.JsonStreamParser.Options;165166167var Parser = goog.net.streams.JsonStreamParser;168169170/**171* The stream state.172* @private @enum {number}173*/174Parser.StreamState_ = {175INIT: 0,176ARRAY_OPEN: 1,177ARRAY_END: 2,178INVALID: 3179};180181182/**183* The parser state.184* @private @enum {number}185*/186Parser.State_ = {187INIT: 0,188VALUE: 1,189OBJECT_OPEN: 2,190OBJECT_END: 3,191ARRAY_OPEN: 4,192ARRAY_END: 5,193STRING: 6,194KEY_START: 7,195KEY_END: 8,196TRUE1: 9, // T and expecting RUE ...197TRUE2: 10,198TRUE3: 11,199FALSE1: 12, // F and expecting ALSE ...200FALSE2: 13,201FALSE3: 14,202FALSE4: 15,203NULL1: 16, // N and expecting ULL ...204NULL2: 17,205NULL3: 18,206NUM_DECIMAL_POINT: 19,207NUM_DIGIT: 20208};209210211/**212* @override213*/214Parser.prototype.isInputValid = function() {215return this.streamState_ != Parser.StreamState_.INVALID;216};217218219/**220* @override221*/222Parser.prototype.getErrorMessage = function() {223return this.errorMessage_;224};225226227/**228* @return {boolean} Whether the parser has reached the end of the stream229*230* TODO(updogliu): move this API to the base type.231*/232Parser.prototype.done = function() {233return this.streamState_ === Parser.StreamState_.ARRAY_END;234};235236237/**238* Get the part of input that is after the end of the stream. Call this only239* when {@code this.done()} is true.240*241* @return {string} The extra input242*243* TODO(updogliu): move this API to the base type.244*/245Parser.prototype.getExtraInput = function() {246return this.buffer_;247};248249250/**251* @param {string|!ArrayBuffer|!Array<number>} input252* The current input string (always)253* @param {number} pos The position in the current input that triggers the error254* @throws {!Error} Throws an error indicating where the stream is broken255* @private256*/257Parser.prototype.error_ = function(input, pos) {258this.streamState_ = Parser.StreamState_.INVALID;259this.errorMessage_ = 'The stream is broken @' + this.pos_ + '/' + pos +260'. With input:\n' + input;261throw Error(this.errorMessage_);262};263264265/**266* @throws {Error} Throws an error message if the input is invalid.267* @override268*/269Parser.prototype.parse = function(input) {270goog.asserts.assertString(input);271272// captures273var parser = this;274var stack = parser.stack_;275var pattern = parser.stringInputPattern_;276var State = Parser.State_; // enums277278var num = input.length;279280var streamStart = 0;281282var msgStart = -1;283284var i = 0;285286while (i < num) {287switch (parser.streamState_) {288case Parser.StreamState_.INVALID:289parser.error_(input, i);290return null;291292case Parser.StreamState_.ARRAY_END:293if (readMore()) {294parser.error_(input, i);295}296return null;297298case Parser.StreamState_.INIT:299if (readMore()) {300var current = input[i++];301parser.pos_++;302303if (current === '[') {304parser.streamState_ = Parser.StreamState_.ARRAY_OPEN;305306streamStart = i;307parser.state_ = State.ARRAY_OPEN;308309continue;310} else {311parser.error_(input, i);312}313}314return null;315316case Parser.StreamState_.ARRAY_OPEN:317parseData();318319if (parser.depth_ === 0 && parser.state_ == State.ARRAY_END) {320parser.streamState_ = Parser.StreamState_.ARRAY_END;321parser.buffer_ = input.substring(i);322} else {323if (msgStart === -1) {324parser.buffer_ += input.substring(streamStart);325} else {326parser.buffer_ = input.substring(msgStart);327}328}329330if (parser.result_.length > 0) {331var msgs = parser.result_;332parser.result_ = [];333return msgs;334}335return null;336}337}338339return null;340341/**342* @return {boolean} true if the parser needs parse more data343*/344function readMore() {345skipWhitespace();346return i < num;347}348349/**350* Skip as many whitespaces as possible, and increments current index of351* stream to next available char.352*/353function skipWhitespace() {354while (i < input.length) {355if (utils.isJsonWhitespace(input[i])) {356i++;357parser.pos_++;358continue;359}360break;361}362}363364/**365* Parse the input JSON elements with a streamed state machine.366*/367function parseData() {368var current;369370while (true) {371current = input[i++];372if (!current) {373break;374}375376parser.pos_++;377378switch (parser.state_) {379case State.INIT:380if (current === '{') {381parser.state_ = State.OBJECT_OPEN;382} else if (current === '[') {383parser.state_ = State.ARRAY_OPEN;384} else if (!utils.isJsonWhitespace(current)) {385parser.error_(input, i);386}387continue;388389case State.KEY_START:390case State.OBJECT_OPEN:391if (utils.isJsonWhitespace(current)) {392continue;393}394if (parser.state_ === State.KEY_START) {395stack.push(State.KEY_END);396} else {397if (current === '}') {398addMessage('{}');399parser.state_ = nextState();400continue;401} else {402stack.push(State.OBJECT_END);403}404}405if (current === '"') {406parser.state_ = State.STRING;407} else {408parser.error_(input, i);409}410continue;411412413case State.KEY_END:414case State.OBJECT_END:415if (utils.isJsonWhitespace(current)) {416continue;417}418if (current === ':') {419if (parser.state_ === State.OBJECT_END) {420stack.push(State.OBJECT_END);421parser.depth_++;422}423parser.state_ = State.VALUE;424} else if (current === '}') {425parser.depth_--;426addMessage();427parser.state_ = nextState();428} else if (current === ',') {429if (parser.state_ === State.OBJECT_END) {430stack.push(State.OBJECT_END);431}432parser.state_ = State.KEY_START;433} else {434parser.error_(input, i);435}436continue;437438case State.ARRAY_OPEN:439case State.VALUE:440if (utils.isJsonWhitespace(current)) {441continue;442}443if (parser.state_ === State.ARRAY_OPEN) {444parser.depth_++;445parser.state_ = State.VALUE;446if (current === ']') {447parser.depth_--;448if (parser.depth_ === 0) {449parser.state_ = State.ARRAY_END;450return;451}452453addMessage('[]');454455parser.state_ = nextState();456continue;457} else {458stack.push(State.ARRAY_END);459}460}461if (current === '"')462parser.state_ = State.STRING;463else if (current === '{')464parser.state_ = State.OBJECT_OPEN;465else if (current === '[')466parser.state_ = State.ARRAY_OPEN;467else if (current === 't')468parser.state_ = State.TRUE1;469else if (current === 'f')470parser.state_ = State.FALSE1;471else if (current === 'n')472parser.state_ = State.NULL1;473else if (current === '-') {474// continue475} else if ('0123456789'.indexOf(current) !== -1) {476parser.state_ = State.NUM_DIGIT;477} else if (current === ',' && parser.allowCompactJsonArrayFormat_) {478parser.state_ = State.VALUE;479} else if (current === ']' && parser.allowCompactJsonArrayFormat_) {480i--;481parser.pos_--;482parser.state_ = nextState();483} else {484parser.error_(input, i);485}486continue;487488case State.ARRAY_END:489if (current === ',') {490stack.push(State.ARRAY_END);491parser.state_ = State.VALUE;492493if (parser.depth_ === 1) {494msgStart = i; // skip ',', including a leading one495}496} else if (current === ']') {497parser.depth_--;498if (parser.depth_ === 0) {499return;500}501502addMessage();503parser.state_ = nextState();504} else if (utils.isJsonWhitespace(current)) {505continue;506} else {507parser.error_(input, i);508}509continue;510511case State.STRING:512var old = i;513514STRING_LOOP: while (true) {515while (parser.unicodeCount_ > 0) {516current = input[i++];517if (parser.unicodeCount_ === 4) {518parser.unicodeCount_ = 0;519} else {520parser.unicodeCount_++;521}522if (!current) {523break STRING_LOOP;524}525}526527if (current === '"' && !parser.slashed_) {528parser.state_ = nextState();529break;530}531if (current === '\\' && !parser.slashed_) {532parser.slashed_ = true;533current = input[i++];534if (!current) {535break;536}537}538if (parser.slashed_) {539parser.slashed_ = false;540if (current === 'u') {541parser.unicodeCount_ = 1;542}543current = input[i++];544if (!current) {545break;546} else {547continue;548}549}550551pattern.lastIndex = i;552var patternResult = pattern.exec(input);553if (!patternResult) {554i = input.length + 1;555break;556}557i = patternResult.index + 1;558current = input[patternResult.index];559if (!current) {560break;561}562}563564parser.pos_ += (i - old);565566continue;567568case State.TRUE1:569if (!current) {570continue;571}572if (current === 'r') {573parser.state_ = State.TRUE2;574} else {575parser.error_(input, i);576}577continue;578579case State.TRUE2:580if (!current) {581continue;582}583if (current === 'u') {584parser.state_ = State.TRUE3;585} else {586parser.error_(input, i);587}588continue;589590case State.TRUE3:591if (!current) {592continue;593}594if (current === 'e') {595parser.state_ = nextState();596} else {597parser.error_(input, i);598}599continue;600601case State.FALSE1:602if (!current) {603continue;604}605if (current === 'a') {606parser.state_ = State.FALSE2;607} else {608parser.error_(input, i);609}610continue;611612case State.FALSE2:613if (!current) {614continue;615}616if (current === 'l') {617parser.state_ = State.FALSE3;618} else {619parser.error_(input, i);620}621continue;622623case State.FALSE3:624if (!current) {625continue;626}627if (current === 's') {628parser.state_ = State.FALSE4;629} else {630parser.error_(input, i);631}632continue;633634case State.FALSE4:635if (!current) {636continue;637}638if (current === 'e') {639parser.state_ = nextState();640} else {641parser.error_(input, i);642}643continue;644645case State.NULL1:646if (!current) {647continue;648}649if (current === 'u') {650parser.state_ = State.NULL2;651} else {652parser.error_(input, i);653}654continue;655656case State.NULL2:657if (!current) {658continue;659}660if (current === 'l') {661parser.state_ = State.NULL3;662} else {663parser.error_(input, i);664}665continue;666667case State.NULL3:668if (!current) {669continue;670}671if (current === 'l') {672parser.state_ = nextState();673} else {674parser.error_(input, i);675}676continue;677678case State.NUM_DECIMAL_POINT:679if (current === '.') {680parser.state_ = State.NUM_DIGIT;681} else {682parser.error_(input, i);683}684continue;685686case State.NUM_DIGIT: // no need for a full validation here687if ('0123456789.eE+-'.indexOf(current) !== -1) {688continue;689} else {690i--;691parser.pos_--;692parser.state_ = nextState();693}694continue;695696default:697parser.error_(input, i);698}699}700}701702/**703* @return {!goog.net.streams.JsonStreamParser.State_} the next state704* from the stack, or the general VALUE state.705*/706function nextState() {707var state = stack.pop();708if (state != null) {709return state;710} else {711return State.VALUE;712}713}714715/**716* @param {(string)=} opt_data The message to add717*/718function addMessage(opt_data) {719if (parser.depth_ > 1) {720return;721}722723goog.asserts.assert(opt_data !== ''); // '' not possible724725if (!opt_data) {726if (msgStart === -1) {727opt_data = parser.buffer_ + input.substring(streamStart, i);728} else {729opt_data = input.substring(msgStart, i);730}731}732733if (parser.deliverMessageAsRawString_) {734parser.result_.push(opt_data);735} else {736parser.result_.push(737goog.asserts.assertInstanceof(goog.json.parse(opt_data), Object));738}739msgStart = i;740}741};742743}); // goog.scope744745746