CoCalc -- internal.go

GitHub Repository: aos/grafana-agent
Path: blob/main/pkg/river/parser/internal.go
⁴⁰⁹⁶ views
1
package parser
2

3
import (
4
	"fmt"
5
	"strings"
6

7
	"github.com/grafana/agent/pkg/river/ast"
8
	"github.com/grafana/agent/pkg/river/diag"
9
	"github.com/grafana/agent/pkg/river/scanner"
10
	"github.com/grafana/agent/pkg/river/token"
11
)
12

13
// parser implements the River parser.
14
//
15
// It is only safe for callers to use exported methods as entrypoints for
16
// parsing.
17
//
18
// Each Parse* and parse* method will describe the EBNF grammar being used for
19
// parsing that non-terminal. The EBNF grammar will be written as LL(1) and
20
// should directly represent the code.
21
//
22
// The parser will continue on encountering errors to allow a more complete
23
// list of errors to be returned to the user. The resulting AST should be
24
// discarded if errors were encountered during parsing.
25
type parser struct {
26
	file     *token.File
27
	diags    diag.Diagnostics
28
	scanner  *scanner.Scanner
29
	comments []ast.CommentGroup
30

31
	pos token.Pos   // Current token position
32
	tok token.Token // Current token
33
	lit string      // Current token literal
34

35
	// Position of the last error written. Two parse errors on the same line are
36
	// ignored.
37
	lastError token.Position
38
}
39

40
// newParser creates a new parser which will parse the provided src.
41
func newParser(filename string, src []byte) *parser {
42
	file := token.NewFile(filename)
43

44
	p := &parser{
45
		file: file,
46
	}
47

48
	p.scanner = scanner.New(file, src, func(pos token.Pos, msg string) {
49
		p.diags.Add(diag.Diagnostic{
50
			Severity: diag.SeverityLevelError,
51
			StartPos: file.PositionFor(pos),
52
			Message:  msg,
53
		})
54
	}, scanner.IncludeComments)
55

56
	p.next()
57
	return p
58
}
59

60
// next advances the parser to the next non-comment token.
61
func (p *parser) next() {
62
	p.next0()
63

64
	for p.tok == token.COMMENT {
65
		p.consumeCommentGroup()
66
	}
67
}
68

69
// next0 advances the parser to the next token. next0 should not be used
70
// directly by parse methods; call next instead.
71
func (p *parser) next0() { p.pos, p.tok, p.lit = p.scanner.Scan() }
72

73
// consumeCommentGroup consumes a group of adjacent comments, adding it to p's
74
// comment list.
75
func (p *parser) consumeCommentGroup() {
76
	var list []*ast.Comment
77

78
	endline := p.pos.Position().Line
79
	for p.tok == token.COMMENT && p.pos.Position().Line <= endline+1 {
80
		var comment *ast.Comment
81
		comment, endline = p.consumeComment()
82
		list = append(list, comment)
83
	}
84

85
	p.comments = append(p.comments, ast.CommentGroup(list))
86
}
87

88
// consumeComment consumes a comment and returns it with the line number it
89
// ends on.
90
func (p *parser) consumeComment() (comment *ast.Comment, endline int) {
91
	endline = p.pos.Position().Line
92

93
	if p.lit[1] == '*' {
94
		// Block comments may end on a different line than where they start. Scan
95
		// the comment for newlines and adjust endline accordingly.
96
		//
97
		// NOTE: don't use range here, since range will unnecessarily decode
98
		// Unicode code points and slow down the parser.
99
		for i := 0; i < len(p.lit); i++ {
100
			if p.lit[i] == '\n' {
101
				endline++
102
			}
103
		}
104
	}
105

106
	comment = &ast.Comment{StartPos: p.pos, Text: p.lit}
107
	p.next0()
108
	return
109
}
110

111
// advance consumes tokens up to (but not including) the specified token.
112
// advance will stop consuming tokens if EOF is reached before to.
113
func (p *parser) advance(to token.Token) {
114
	for p.tok != token.EOF {
115
		if p.tok == to {
116
			return
117
		}
118
		p.next()
119
	}
120
}
121

122
// advanceAny consumes tokens up to (but not including) any of the tokens in
123
// the to set.
124
func (p *parser) advanceAny(to map[token.Token]struct{}) {
125
	for p.tok != token.EOF {
126
		if _, inSet := to[p.tok]; inSet {
127
			return
128
		}
129
		p.next()
130
	}
131
}
132

133
// expect consumes the next token. It records an error if the consumed token
134
// was not t.
135
func (p *parser) expect(t token.Token) (pos token.Pos, tok token.Token, lit string) {
136
	pos, tok, lit = p.pos, p.tok, p.lit
137
	if tok != t {
138
		p.addErrorf("expected %s, got %s", t, p.tok)
139
	}
140
	p.next()
141
	return
142
}
143

144
func (p *parser) addErrorf(format string, args ...interface{}) {
145
	pos := p.file.PositionFor(p.pos)
146

147
	// Ignore errors which occur on the same line.
148
	if p.lastError.Line == pos.Line {
149
		return
150
	}
151
	p.lastError = pos
152

153
	p.diags.Add(diag.Diagnostic{
154
		Severity: diag.SeverityLevelError,
155
		StartPos: pos,
156
		Message:  fmt.Sprintf(format, args...),
157
	})
158
}
159

160
// ParseFile parses an entire file.
161
//
162
//	File = Body
163
func (p *parser) ParseFile() *ast.File {
164
	body := p.parseBody(token.EOF)
165

166
	return &ast.File{
167
		Name:     p.file.Name(),
168
		Body:     body,
169
		Comments: p.comments,
170
	}
171
}
172

173
// parseBody parses a series of statements up to and including the "until"
174
// token, which terminates the body.
175
//
176
//	Body = [ Statement { terminator Statement } ]
177
func (p *parser) parseBody(until token.Token) ast.Body {
178
	var body ast.Body
179

180
	for p.tok != until && p.tok != token.EOF {
181
		stmt := p.parseStatement()
182
		if stmt != nil {
183
			body = append(body, stmt)
184
		}
185

186
		if p.tok == until {
187
			break
188
		}
189

190
		if p.tok != token.TERMINATOR {
191
			p.addErrorf("expected %s, got %s", token.TERMINATOR, p.tok)
192
			p.consumeStatement()
193
		}
194
		p.next()
195
	}
196

197
	return body
198
}
199

200
// consumeStatement consumes tokens for the remainder of a statement (i.e., up
201
// to but not including a terminator). consumeStatement will keep track of the
202
// number of {}, [], and () pairs, only returning after the count of pairs is
203
// <= 0.
204
func (p *parser) consumeStatement() {
205
	var curlyPairs, brackPairs, parenPairs int
206

207
	for p.tok != token.EOF {
208
		switch p.tok {
209
		case token.LCURLY:
210
			curlyPairs++
211
		case token.RCURLY:
212
			curlyPairs--
213
		case token.LBRACK:
214
			brackPairs++
215
		case token.RBRACK:
216
			brackPairs--
217
		case token.LPAREN:
218
			parenPairs++
219
		case token.RPAREN:
220
			parenPairs--
221
		}
222

223
		if p.tok == token.TERMINATOR {
224
			// Only return after we've consumed all pairs. It's possible for pairs to
225
			// be less than zero if our statement started in a surrounding pair.
226
			if curlyPairs <= 0 && brackPairs <= 0 && parenPairs <= 0 {
227
				return
228
			}
229
		}
230

231
		p.next()
232
	}
233
}
234

235
// parseStatement parses an individual statement within a body.
236
//
237
//	Statement = Attribute | Block
238
//	Attribute = identifier "=" Expression
239
//	Block     = BlockName "{" Body "}"
240
func (p *parser) parseStatement() ast.Stmt {
241
	blockName := p.parseBlockName()
242
	if blockName == nil {
243
		// parseBlockName failed; skip to the next identifier which would start a
244
		// new Statement.
245
		p.advance(token.IDENT)
246
		return nil
247
	}
248

249
	// p.tok is now the first token after the identifier in the attribute or
250
	// block name.
251
	switch p.tok {
252
	case token.ASSIGN: // Attribute
253
		p.next() // Consume "="
254

255
		if len(blockName.Fragments) != 1 {
256
			attrName := strings.Join(blockName.Fragments, ".")
257
			p.diags.Add(diag.Diagnostic{
258
				Severity: diag.SeverityLevelError,
259
				StartPos: blockName.Start.Position(),
260
				EndPos:   blockName.Start.Add(len(attrName) - 1).Position(),
261
				Message:  `attribute names may only consist of a single identifier with no "."`,
262
			})
263
		} else if blockName.LabelPos != token.NoPos {
264
			p.diags.Add(diag.Diagnostic{
265
				Severity: diag.SeverityLevelError,
266
				StartPos: blockName.LabelPos.Position(),
267
				// Add 1 to the end position to add in the end quote, which is stripped from the label value.
268
				EndPos:  blockName.LabelPos.Add(len(blockName.Label) + 1).Position(),
269
				Message: `attribute names may not have labels`,
270
			})
271
		}
272

273
		return &ast.AttributeStmt{
274
			Name: &ast.Ident{
275
				Name:    blockName.Fragments[0],
276
				NamePos: blockName.Start,
277
			},
278
			Value: p.ParseExpression(),
279
		}
280

281
	case token.LCURLY: // Block
282
		block := &ast.BlockStmt{
283
			Name:     blockName.Fragments,
284
			NamePos:  blockName.Start,
285
			Label:    blockName.Label,
286
			LabelPos: blockName.LabelPos,
287
		}
288

289
		block.LCurlyPos, _, _ = p.expect(token.LCURLY)
290
		block.Body = p.parseBody(token.RCURLY)
291
		block.RCurlyPos, _, _ = p.expect(token.RCURLY)
292

293
		return block
294

295
	default:
296
		if blockName.ValidAttribute() {
297
			// The blockname could be used for an attribute or a block (no label,
298
			// only one name fragment), so inform the user of both cases.
299
			p.addErrorf("expected attribute assignment or block body, got %s", p.tok)
300
		} else {
301
			p.addErrorf("expected block body, got %s", p.tok)
302
		}
303

304
		// Give up on this statement and skip to the next identifier.
305
		p.advance(token.IDENT)
306
		return nil
307
	}
308
}
309

310
// parseBlockName parses the name used for a block.
311
//
312
//	BlockName = identifier { "." identifier } [ string ]
313
func (p *parser) parseBlockName() *blockName {
314
	if p.tok != token.IDENT {
315
		p.addErrorf("expected identifier, got %s", p.tok)
316
		return nil
317
	}
318

319
	var bn blockName
320

321
	bn.Fragments = append(bn.Fragments, p.lit) // Append first identifier
322
	bn.Start = p.pos
323
	p.next()
324

325
	// { "." identifier }
326
	for p.tok == token.DOT {
327
		p.next() // consume "."
328

329
		if p.tok != token.IDENT {
330
			p.addErrorf("expected identifier, got %s", p.tok)
331

332
			// Continue here to parse as much as possible, even though the block name
333
			// will be malformed.
334
		}
335

336
		bn.Fragments = append(bn.Fragments, p.lit)
337
		p.next()
338
	}
339

340
	// [ string ]
341
	if p.tok != token.ASSIGN && p.tok != token.LCURLY {
342
		if p.tok == token.STRING {
343
			// Strip the quotes if it's non-empty. We then require any non-empty
344
			// label to be a valid identifier.
345
			if len(p.lit) > 2 {
346
				bn.Label = p.lit[1 : len(p.lit)-1]
347
				if !isValidIdentifier(bn.Label) {
348
					p.addErrorf("expected block label to be a valid identifier")
349
				}
350
			}
351
			bn.LabelPos = p.pos
352
		} else {
353
			p.addErrorf("expected block label, got %s", p.tok)
354
		}
355
		p.next()
356
	}
357

358
	return &bn
359
}
360

361
type blockName struct {
362
	Fragments []string // Name fragments (i.e., `a.b.c`)
363
	Label     string   // Optional user label
364

365
	Start    token.Pos
366
	LabelPos token.Pos
367
}
368

369
// ValidAttribute returns true if the blockName can be used as an attribute
370
// name.
371
func (n blockName) ValidAttribute() bool {
372
	return len(n.Fragments) == 1 && n.Label == ""
373
}
374

375
// ParseExpression parses a single expression.
376
//
377
//	Expression = BinOpExpr
378
func (p *parser) ParseExpression() ast.Expr {
379
	return p.parseBinOp(1)
380
}
381

382
// parseBinOp is the entrypoint for binary expressions. If there is no binary
383
// expressions in the current state, a single operand will be returned instead.
384
//
385
//	BinOpExpr = OrExpr
386
//	OrExpr    = AndExpr { "||"   AndExpr }
387
//	AndExpr   = CmpExpr { "&&"   CmpExpr }
388
//	CmpExpr   = AddExpr { cmp_op AddExpr }
389
//	AddExpr   = MulExpr { add_op MulExpr }
390
//	MulExpr   = PowExpr { mul_op PowExpr }
391
//
392
// parseBinOp avoids the need for multiple non-terminal functions by providing
393
// context for operator precedence in recursive calls. inPrec specifies the
394
// incoming operator precedence. On the first call to parseBinOp, inPrec should
395
// be 1.
396
//
397
// parseBinOp can only handle left-associative operators, so PowExpr is handled
398
// by parsePowExpr.
399
func (p *parser) parseBinOp(inPrec int) ast.Expr {
400
	// The EBNF documented by the function can be generalized into:
401
	//
402
	//     CurPrecExpr = NextPrecExpr { cur_prec_ops NextPrecExpr }
403
	//
404
	// The code below implements this specific grammar, continually collecting
405
	// everything at the same precedence level into the LHS of the expression
406
	// while recursively calling parseBinOp for higher-precedence operations.
407

408
	lhs := p.parsePowExpr()
409

410
	for {
411
		tok, pos, prec := p.tok, p.pos, p.tok.BinaryPrecedence()
412
		if prec < inPrec {
413
			// The next operator is lower precedence; drop up a level in our call
414
			// stack.
415
			return lhs
416
		}
417
		p.next() // Consume the operator
418

419
		// Recurse with a higher precedence level, which ensures that operators at
420
		// the same precedence level don't get handled in the recursive call.
421
		rhs := p.parseBinOp(prec + 1)
422

423
		lhs = &ast.BinaryExpr{
424
			Left:    lhs,
425
			Kind:    tok,
426
			KindPos: pos,
427
			Right:   rhs,
428
		}
429
	}
430
}
431

432
// parsePowExpr is like parseBinOp but handles the right-associative pow
433
// operator.
434
//
435
//	PowExpr = UnaryExpr [ "^" PowExpr ]
436
func (p *parser) parsePowExpr() ast.Expr {
437
	lhs := p.parseUnaryExpr()
438

439
	if p.tok == token.POW {
440
		pos := p.pos
441
		p.next() // Consume ^
442

443
		return &ast.BinaryExpr{
444
			Left:    lhs,
445
			Kind:    token.POW,
446
			KindPos: pos,
447
			Right:   p.parsePowExpr(),
448
		}
449
	}
450

451
	return lhs
452
}
453

454
// parseUnaryExpr parses a unary expression.
455
//
456
//	UnaryExpr = OperExpr | unary_op UnaryExpr
457
//
458
//	OperExpr   = PrimaryExpr { AccessExpr | IndexExpr | CallExpr }
459
//	AccessExpr = "." identifier
460
//	IndexExpr  = "[" Expression "]"
461
//	CallExpr   = "(" [ ExpressionList ] ")"
462
func (p *parser) parseUnaryExpr() ast.Expr {
463
	if isUnaryOp(p.tok) {
464
		op, pos := p.tok, p.pos
465
		p.next() // Consume op
466

467
		return &ast.UnaryExpr{
468
			Kind:    op,
469
			KindPos: pos,
470
			Value:   p.parseUnaryExpr(),
471
		}
472
	}
473

474
	primary := p.parsePrimaryExpr()
475

476
NextOper:
477
	for {
478
		switch p.tok {
479
		case token.DOT: // AccessExpr
480
			p.next()
481
			namePos, _, name := p.expect(token.IDENT)
482

483
			primary = &ast.AccessExpr{
484
				Value: primary,
485
				Name: &ast.Ident{
486
					Name:    name,
487
					NamePos: namePos,
488
				},
489
			}
490

491
		case token.LBRACK: // IndexExpr
492
			lBrack, _, _ := p.expect(token.LBRACK)
493
			index := p.ParseExpression()
494
			rBrack, _, _ := p.expect(token.RBRACK)
495

496
			primary = &ast.IndexExpr{
497
				Value:     primary,
498
				LBrackPos: lBrack,
499
				Index:     index,
500
				RBrackPos: rBrack,
501
			}
502

503
		case token.LPAREN: // CallExpr
504
			var args []ast.Expr
505

506
			lParen, _, _ := p.expect(token.LPAREN)
507
			if p.tok != token.RPAREN {
508
				args = p.parseExpressionList(token.RPAREN)
509
			}
510
			rParen, _, _ := p.expect(token.RPAREN)
511

512
			primary = &ast.CallExpr{
513
				Value:     primary,
514
				LParenPos: lParen,
515
				Args:      args,
516
				RParenPos: rParen,
517
			}
518

519
		case token.STRING, token.LCURLY:
520
			// A user might be trying to assign a block to an attribute. let's
521
			// attempt to parse the remainder as a block to tell them something is
522
			// wrong.
523
			//
524
			// If we can't parse the remainder of the expression as a block, we give
525
			// up and parse the remainder of the entire statement.
526
			if p.tok == token.STRING {
527
				p.next()
528
			}
529
			if _, tok, _ := p.expect(token.LCURLY); tok != token.LCURLY {
530
				p.consumeStatement()
531
				return primary
532
			}
533
			p.parseBody(token.RCURLY)
534

535
			end, tok, _ := p.expect(token.RCURLY)
536
			if tok != token.RCURLY {
537
				p.consumeStatement()
538
				return primary
539
			}
540

541
			p.diags.Add(diag.Diagnostic{
542
				Severity: diag.SeverityLevelError,
543
				StartPos: ast.StartPos(primary).Position(),
544
				EndPos:   end.Position(),
545
				Message:  "cannot use a block as an expression",
546
			})
547

548
		default:
549
			break NextOper
550
		}
551
	}
552

553
	return primary
554
}
555

556
func isUnaryOp(tok token.Token) bool {
557
	switch tok {
558
	case token.NOT, token.SUB:
559
		return true
560
	default:
561
		return false
562
	}
563
}
564

565
// parsePrimaryExpr parses a primary expression.
566
//
567
//	PrimaryExpr = LiteralValue | ArrayExpr | ObjectExpr
568
//
569
//	LiteralValue = identifier | string | number | float | bool | null |
570
//	               "(" Expression ")"
571
//
572
//	ArrayExpr  = "[" [ ExpressionList ] "]"
573
//	ObjectExpr = "{" [ FieldList ] "}"
574
func (p *parser) parsePrimaryExpr() ast.Expr {
575
	switch p.tok {
576
	case token.IDENT:
577
		res := &ast.IdentifierExpr{
578
			Ident: &ast.Ident{
579
				Name:    p.lit,
580
				NamePos: p.pos,
581
			},
582
		}
583
		p.next()
584
		return res
585

586
	case token.STRING, token.NUMBER, token.FLOAT, token.BOOL, token.NULL:
587
		res := &ast.LiteralExpr{
588
			Kind:     p.tok,
589
			Value:    p.lit,
590
			ValuePos: p.pos,
591
		}
592
		p.next()
593
		return res
594

595
	case token.LPAREN:
596
		lParen, _, _ := p.expect(token.LPAREN)
597
		expr := p.ParseExpression()
598
		rParen, _, _ := p.expect(token.RPAREN)
599

600
		return &ast.ParenExpr{
601
			LParenPos: lParen,
602
			Inner:     expr,
603
			RParenPos: rParen,
604
		}
605

606
	case token.LBRACK:
607
		var res ast.ArrayExpr
608

609
		res.LBrackPos, _, _ = p.expect(token.LBRACK)
610
		if p.tok != token.RBRACK {
611
			res.Elements = p.parseExpressionList(token.RBRACK)
612
		}
613
		res.RBrackPos, _, _ = p.expect(token.RBRACK)
614
		return &res
615

616
	case token.LCURLY:
617
		var res ast.ObjectExpr
618

619
		res.LCurlyPos, _, _ = p.expect(token.LCURLY)
620
		if p.tok != token.RBRACK {
621
			res.Fields = p.parseFieldList(token.RCURLY)
622
		}
623
		res.RCurlyPos, _, _ = p.expect(token.RCURLY)
624
		return &res
625
	}
626

627
	p.addErrorf("expected expression, got %s", p.tok)
628
	res := &ast.LiteralExpr{Kind: token.NULL, Value: "null", ValuePos: p.pos}
629
	p.advanceAny(statementEnd) // Eat up the rest of the line
630
	return res
631
}
632

633
var statementEnd = map[token.Token]struct{}{
634
	token.TERMINATOR: {},
635
	token.RPAREN:     {},
636
	token.RCURLY:     {},
637
	token.RBRACK:     {},
638
	token.COMMA:      {},
639
}
640

641
// parseExpressionList parses a list of expressions.
642
//
643
//	ExpressionList = Expression { "," Expression } [ "," ]
644
func (p *parser) parseExpressionList(until token.Token) []ast.Expr {
645
	var exprs []ast.Expr
646

647
	for p.tok != until && p.tok != token.EOF {
648
		exprs = append(exprs, p.ParseExpression())
649

650
		if p.tok == until {
651
			break
652
		}
653
		if p.tok != token.COMMA {
654
			p.addErrorf("missing ',' in expression list")
655
		}
656
		p.next()
657
	}
658

659
	return exprs
660
}
661

662
// parseFieldList parses a list of fields in an object.
663
//
664
//	FieldList = Field { "," Field } [ "," ]
665
func (p *parser) parseFieldList(until token.Token) []*ast.ObjectField {
666
	var fields []*ast.ObjectField
667

668
	for p.tok != until && p.tok != token.EOF {
669
		fields = append(fields, p.parseField())
670

671
		if p.tok == until {
672
			break
673
		}
674
		if p.tok != token.COMMA {
675
			p.addErrorf("missing ',' in field list")
676
		}
677
		p.next()
678
	}
679

680
	return fields
681
}
682

683
// parseField parses a field in an object.
684
//
685
//	Field = ( string | identifier ) "=" Expression
686
func (p *parser) parseField() *ast.ObjectField {
687
	var field ast.ObjectField
688

689
	if p.tok == token.STRING || p.tok == token.IDENT {
690
		field.Name = &ast.Ident{
691
			Name:    p.lit,
692
			NamePos: p.pos,
693
		}
694
		if p.tok == token.STRING && len(p.lit) > 2 {
695
			// The field name is a string literal; unwrap the quotes.
696
			field.Name.Name = p.lit[1 : len(p.lit)-1]
697
			field.Quoted = true
698
		}
699
		p.next() // Consume field name
700
	} else {
701
		p.addErrorf("expected field name (string or identifier), got %s", p.tok)
702
		p.advance(token.ASSIGN)
703
	}
704

705
	p.expect(token.ASSIGN)
706

707
	field.Value = p.ParseExpression()
708
	return &field
709
}
710

711
func isValidIdentifier(in string) bool {
712
	s := scanner.New(nil, []byte(in), nil, 0)
713
	_, tok, lit := s.Scan()
714
	return tok == token.IDENT && lit == in
715
}
716

717
Product

Resources

Company