Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
aos
GitHub Repository: aos/grafana-agent
Path: blob/main/pkg/river/parser/internal.go
4096 views
1
package parser
2
3
import (
4
"fmt"
5
"strings"
6
7
"github.com/grafana/agent/pkg/river/ast"
8
"github.com/grafana/agent/pkg/river/diag"
9
"github.com/grafana/agent/pkg/river/scanner"
10
"github.com/grafana/agent/pkg/river/token"
11
)
12
13
// parser implements the River parser.
14
//
15
// It is only safe for callers to use exported methods as entrypoints for
16
// parsing.
17
//
18
// Each Parse* and parse* method will describe the EBNF grammar being used for
19
// parsing that non-terminal. The EBNF grammar will be written as LL(1) and
20
// should directly represent the code.
21
//
22
// The parser will continue on encountering errors to allow a more complete
23
// list of errors to be returned to the user. The resulting AST should be
24
// discarded if errors were encountered during parsing.
25
type parser struct {
26
file *token.File
27
diags diag.Diagnostics
28
scanner *scanner.Scanner
29
comments []ast.CommentGroup
30
31
pos token.Pos // Current token position
32
tok token.Token // Current token
33
lit string // Current token literal
34
35
// Position of the last error written. Two parse errors on the same line are
36
// ignored.
37
lastError token.Position
38
}
39
40
// newParser creates a new parser which will parse the provided src.
41
func newParser(filename string, src []byte) *parser {
42
file := token.NewFile(filename)
43
44
p := &parser{
45
file: file,
46
}
47
48
p.scanner = scanner.New(file, src, func(pos token.Pos, msg string) {
49
p.diags.Add(diag.Diagnostic{
50
Severity: diag.SeverityLevelError,
51
StartPos: file.PositionFor(pos),
52
Message: msg,
53
})
54
}, scanner.IncludeComments)
55
56
p.next()
57
return p
58
}
59
60
// next advances the parser to the next non-comment token.
61
func (p *parser) next() {
62
p.next0()
63
64
for p.tok == token.COMMENT {
65
p.consumeCommentGroup()
66
}
67
}
68
69
// next0 advances the parser to the next token. next0 should not be used
70
// directly by parse methods; call next instead.
71
func (p *parser) next0() { p.pos, p.tok, p.lit = p.scanner.Scan() }
72
73
// consumeCommentGroup consumes a group of adjacent comments, adding it to p's
74
// comment list.
75
func (p *parser) consumeCommentGroup() {
76
var list []*ast.Comment
77
78
endline := p.pos.Position().Line
79
for p.tok == token.COMMENT && p.pos.Position().Line <= endline+1 {
80
var comment *ast.Comment
81
comment, endline = p.consumeComment()
82
list = append(list, comment)
83
}
84
85
p.comments = append(p.comments, ast.CommentGroup(list))
86
}
87
88
// consumeComment consumes a comment and returns it with the line number it
89
// ends on.
90
func (p *parser) consumeComment() (comment *ast.Comment, endline int) {
91
endline = p.pos.Position().Line
92
93
if p.lit[1] == '*' {
94
// Block comments may end on a different line than where they start. Scan
95
// the comment for newlines and adjust endline accordingly.
96
//
97
// NOTE: don't use range here, since range will unnecessarily decode
98
// Unicode code points and slow down the parser.
99
for i := 0; i < len(p.lit); i++ {
100
if p.lit[i] == '\n' {
101
endline++
102
}
103
}
104
}
105
106
comment = &ast.Comment{StartPos: p.pos, Text: p.lit}
107
p.next0()
108
return
109
}
110
111
// advance consumes tokens up to (but not including) the specified token.
112
// advance will stop consuming tokens if EOF is reached before to.
113
func (p *parser) advance(to token.Token) {
114
for p.tok != token.EOF {
115
if p.tok == to {
116
return
117
}
118
p.next()
119
}
120
}
121
122
// advanceAny consumes tokens up to (but not including) any of the tokens in
123
// the to set.
124
func (p *parser) advanceAny(to map[token.Token]struct{}) {
125
for p.tok != token.EOF {
126
if _, inSet := to[p.tok]; inSet {
127
return
128
}
129
p.next()
130
}
131
}
132
133
// expect consumes the next token. It records an error if the consumed token
134
// was not t.
135
func (p *parser) expect(t token.Token) (pos token.Pos, tok token.Token, lit string) {
136
pos, tok, lit = p.pos, p.tok, p.lit
137
if tok != t {
138
p.addErrorf("expected %s, got %s", t, p.tok)
139
}
140
p.next()
141
return
142
}
143
144
func (p *parser) addErrorf(format string, args ...interface{}) {
145
pos := p.file.PositionFor(p.pos)
146
147
// Ignore errors which occur on the same line.
148
if p.lastError.Line == pos.Line {
149
return
150
}
151
p.lastError = pos
152
153
p.diags.Add(diag.Diagnostic{
154
Severity: diag.SeverityLevelError,
155
StartPos: pos,
156
Message: fmt.Sprintf(format, args...),
157
})
158
}
159
160
// ParseFile parses an entire file.
161
//
162
// File = Body
163
func (p *parser) ParseFile() *ast.File {
164
body := p.parseBody(token.EOF)
165
166
return &ast.File{
167
Name: p.file.Name(),
168
Body: body,
169
Comments: p.comments,
170
}
171
}
172
173
// parseBody parses a series of statements up to and including the "until"
174
// token, which terminates the body.
175
//
176
// Body = [ Statement { terminator Statement } ]
177
func (p *parser) parseBody(until token.Token) ast.Body {
178
var body ast.Body
179
180
for p.tok != until && p.tok != token.EOF {
181
stmt := p.parseStatement()
182
if stmt != nil {
183
body = append(body, stmt)
184
}
185
186
if p.tok == until {
187
break
188
}
189
190
if p.tok != token.TERMINATOR {
191
p.addErrorf("expected %s, got %s", token.TERMINATOR, p.tok)
192
p.consumeStatement()
193
}
194
p.next()
195
}
196
197
return body
198
}
199
200
// consumeStatement consumes tokens for the remainder of a statement (i.e., up
201
// to but not including a terminator). consumeStatement will keep track of the
202
// number of {}, [], and () pairs, only returning after the count of pairs is
203
// <= 0.
204
func (p *parser) consumeStatement() {
205
var curlyPairs, brackPairs, parenPairs int
206
207
for p.tok != token.EOF {
208
switch p.tok {
209
case token.LCURLY:
210
curlyPairs++
211
case token.RCURLY:
212
curlyPairs--
213
case token.LBRACK:
214
brackPairs++
215
case token.RBRACK:
216
brackPairs--
217
case token.LPAREN:
218
parenPairs++
219
case token.RPAREN:
220
parenPairs--
221
}
222
223
if p.tok == token.TERMINATOR {
224
// Only return after we've consumed all pairs. It's possible for pairs to
225
// be less than zero if our statement started in a surrounding pair.
226
if curlyPairs <= 0 && brackPairs <= 0 && parenPairs <= 0 {
227
return
228
}
229
}
230
231
p.next()
232
}
233
}
234
235
// parseStatement parses an individual statement within a body.
236
//
237
// Statement = Attribute | Block
238
// Attribute = identifier "=" Expression
239
// Block = BlockName "{" Body "}"
240
func (p *parser) parseStatement() ast.Stmt {
241
blockName := p.parseBlockName()
242
if blockName == nil {
243
// parseBlockName failed; skip to the next identifier which would start a
244
// new Statement.
245
p.advance(token.IDENT)
246
return nil
247
}
248
249
// p.tok is now the first token after the identifier in the attribute or
250
// block name.
251
switch p.tok {
252
case token.ASSIGN: // Attribute
253
p.next() // Consume "="
254
255
if len(blockName.Fragments) != 1 {
256
attrName := strings.Join(blockName.Fragments, ".")
257
p.diags.Add(diag.Diagnostic{
258
Severity: diag.SeverityLevelError,
259
StartPos: blockName.Start.Position(),
260
EndPos: blockName.Start.Add(len(attrName) - 1).Position(),
261
Message: `attribute names may only consist of a single identifier with no "."`,
262
})
263
} else if blockName.LabelPos != token.NoPos {
264
p.diags.Add(diag.Diagnostic{
265
Severity: diag.SeverityLevelError,
266
StartPos: blockName.LabelPos.Position(),
267
// Add 1 to the end position to add in the end quote, which is stripped from the label value.
268
EndPos: blockName.LabelPos.Add(len(blockName.Label) + 1).Position(),
269
Message: `attribute names may not have labels`,
270
})
271
}
272
273
return &ast.AttributeStmt{
274
Name: &ast.Ident{
275
Name: blockName.Fragments[0],
276
NamePos: blockName.Start,
277
},
278
Value: p.ParseExpression(),
279
}
280
281
case token.LCURLY: // Block
282
block := &ast.BlockStmt{
283
Name: blockName.Fragments,
284
NamePos: blockName.Start,
285
Label: blockName.Label,
286
LabelPos: blockName.LabelPos,
287
}
288
289
block.LCurlyPos, _, _ = p.expect(token.LCURLY)
290
block.Body = p.parseBody(token.RCURLY)
291
block.RCurlyPos, _, _ = p.expect(token.RCURLY)
292
293
return block
294
295
default:
296
if blockName.ValidAttribute() {
297
// The blockname could be used for an attribute or a block (no label,
298
// only one name fragment), so inform the user of both cases.
299
p.addErrorf("expected attribute assignment or block body, got %s", p.tok)
300
} else {
301
p.addErrorf("expected block body, got %s", p.tok)
302
}
303
304
// Give up on this statement and skip to the next identifier.
305
p.advance(token.IDENT)
306
return nil
307
}
308
}
309
310
// parseBlockName parses the name used for a block.
311
//
312
// BlockName = identifier { "." identifier } [ string ]
313
func (p *parser) parseBlockName() *blockName {
314
if p.tok != token.IDENT {
315
p.addErrorf("expected identifier, got %s", p.tok)
316
return nil
317
}
318
319
var bn blockName
320
321
bn.Fragments = append(bn.Fragments, p.lit) // Append first identifier
322
bn.Start = p.pos
323
p.next()
324
325
// { "." identifier }
326
for p.tok == token.DOT {
327
p.next() // consume "."
328
329
if p.tok != token.IDENT {
330
p.addErrorf("expected identifier, got %s", p.tok)
331
332
// Continue here to parse as much as possible, even though the block name
333
// will be malformed.
334
}
335
336
bn.Fragments = append(bn.Fragments, p.lit)
337
p.next()
338
}
339
340
// [ string ]
341
if p.tok != token.ASSIGN && p.tok != token.LCURLY {
342
if p.tok == token.STRING {
343
// Strip the quotes if it's non-empty. We then require any non-empty
344
// label to be a valid identifier.
345
if len(p.lit) > 2 {
346
bn.Label = p.lit[1 : len(p.lit)-1]
347
if !isValidIdentifier(bn.Label) {
348
p.addErrorf("expected block label to be a valid identifier")
349
}
350
}
351
bn.LabelPos = p.pos
352
} else {
353
p.addErrorf("expected block label, got %s", p.tok)
354
}
355
p.next()
356
}
357
358
return &bn
359
}
360
361
type blockName struct {
362
Fragments []string // Name fragments (i.e., `a.b.c`)
363
Label string // Optional user label
364
365
Start token.Pos
366
LabelPos token.Pos
367
}
368
369
// ValidAttribute returns true if the blockName can be used as an attribute
370
// name.
371
func (n blockName) ValidAttribute() bool {
372
return len(n.Fragments) == 1 && n.Label == ""
373
}
374
375
// ParseExpression parses a single expression.
376
//
377
// Expression = BinOpExpr
378
func (p *parser) ParseExpression() ast.Expr {
379
return p.parseBinOp(1)
380
}
381
382
// parseBinOp is the entrypoint for binary expressions. If there is no binary
383
// expressions in the current state, a single operand will be returned instead.
384
//
385
// BinOpExpr = OrExpr
386
// OrExpr = AndExpr { "||" AndExpr }
387
// AndExpr = CmpExpr { "&&" CmpExpr }
388
// CmpExpr = AddExpr { cmp_op AddExpr }
389
// AddExpr = MulExpr { add_op MulExpr }
390
// MulExpr = PowExpr { mul_op PowExpr }
391
//
392
// parseBinOp avoids the need for multiple non-terminal functions by providing
393
// context for operator precedence in recursive calls. inPrec specifies the
394
// incoming operator precedence. On the first call to parseBinOp, inPrec should
395
// be 1.
396
//
397
// parseBinOp can only handle left-associative operators, so PowExpr is handled
398
// by parsePowExpr.
399
func (p *parser) parseBinOp(inPrec int) ast.Expr {
400
// The EBNF documented by the function can be generalized into:
401
//
402
// CurPrecExpr = NextPrecExpr { cur_prec_ops NextPrecExpr }
403
//
404
// The code below implements this specific grammar, continually collecting
405
// everything at the same precedence level into the LHS of the expression
406
// while recursively calling parseBinOp for higher-precedence operations.
407
408
lhs := p.parsePowExpr()
409
410
for {
411
tok, pos, prec := p.tok, p.pos, p.tok.BinaryPrecedence()
412
if prec < inPrec {
413
// The next operator is lower precedence; drop up a level in our call
414
// stack.
415
return lhs
416
}
417
p.next() // Consume the operator
418
419
// Recurse with a higher precedence level, which ensures that operators at
420
// the same precedence level don't get handled in the recursive call.
421
rhs := p.parseBinOp(prec + 1)
422
423
lhs = &ast.BinaryExpr{
424
Left: lhs,
425
Kind: tok,
426
KindPos: pos,
427
Right: rhs,
428
}
429
}
430
}
431
432
// parsePowExpr is like parseBinOp but handles the right-associative pow
433
// operator.
434
//
435
// PowExpr = UnaryExpr [ "^" PowExpr ]
436
func (p *parser) parsePowExpr() ast.Expr {
437
lhs := p.parseUnaryExpr()
438
439
if p.tok == token.POW {
440
pos := p.pos
441
p.next() // Consume ^
442
443
return &ast.BinaryExpr{
444
Left: lhs,
445
Kind: token.POW,
446
KindPos: pos,
447
Right: p.parsePowExpr(),
448
}
449
}
450
451
return lhs
452
}
453
454
// parseUnaryExpr parses a unary expression.
455
//
456
// UnaryExpr = OperExpr | unary_op UnaryExpr
457
//
458
// OperExpr = PrimaryExpr { AccessExpr | IndexExpr | CallExpr }
459
// AccessExpr = "." identifier
460
// IndexExpr = "[" Expression "]"
461
// CallExpr = "(" [ ExpressionList ] ")"
462
func (p *parser) parseUnaryExpr() ast.Expr {
463
if isUnaryOp(p.tok) {
464
op, pos := p.tok, p.pos
465
p.next() // Consume op
466
467
return &ast.UnaryExpr{
468
Kind: op,
469
KindPos: pos,
470
Value: p.parseUnaryExpr(),
471
}
472
}
473
474
primary := p.parsePrimaryExpr()
475
476
NextOper:
477
for {
478
switch p.tok {
479
case token.DOT: // AccessExpr
480
p.next()
481
namePos, _, name := p.expect(token.IDENT)
482
483
primary = &ast.AccessExpr{
484
Value: primary,
485
Name: &ast.Ident{
486
Name: name,
487
NamePos: namePos,
488
},
489
}
490
491
case token.LBRACK: // IndexExpr
492
lBrack, _, _ := p.expect(token.LBRACK)
493
index := p.ParseExpression()
494
rBrack, _, _ := p.expect(token.RBRACK)
495
496
primary = &ast.IndexExpr{
497
Value: primary,
498
LBrackPos: lBrack,
499
Index: index,
500
RBrackPos: rBrack,
501
}
502
503
case token.LPAREN: // CallExpr
504
var args []ast.Expr
505
506
lParen, _, _ := p.expect(token.LPAREN)
507
if p.tok != token.RPAREN {
508
args = p.parseExpressionList(token.RPAREN)
509
}
510
rParen, _, _ := p.expect(token.RPAREN)
511
512
primary = &ast.CallExpr{
513
Value: primary,
514
LParenPos: lParen,
515
Args: args,
516
RParenPos: rParen,
517
}
518
519
case token.STRING, token.LCURLY:
520
// A user might be trying to assign a block to an attribute. let's
521
// attempt to parse the remainder as a block to tell them something is
522
// wrong.
523
//
524
// If we can't parse the remainder of the expression as a block, we give
525
// up and parse the remainder of the entire statement.
526
if p.tok == token.STRING {
527
p.next()
528
}
529
if _, tok, _ := p.expect(token.LCURLY); tok != token.LCURLY {
530
p.consumeStatement()
531
return primary
532
}
533
p.parseBody(token.RCURLY)
534
535
end, tok, _ := p.expect(token.RCURLY)
536
if tok != token.RCURLY {
537
p.consumeStatement()
538
return primary
539
}
540
541
p.diags.Add(diag.Diagnostic{
542
Severity: diag.SeverityLevelError,
543
StartPos: ast.StartPos(primary).Position(),
544
EndPos: end.Position(),
545
Message: "cannot use a block as an expression",
546
})
547
548
default:
549
break NextOper
550
}
551
}
552
553
return primary
554
}
555
556
func isUnaryOp(tok token.Token) bool {
557
switch tok {
558
case token.NOT, token.SUB:
559
return true
560
default:
561
return false
562
}
563
}
564
565
// parsePrimaryExpr parses a primary expression.
566
//
567
// PrimaryExpr = LiteralValue | ArrayExpr | ObjectExpr
568
//
569
// LiteralValue = identifier | string | number | float | bool | null |
570
// "(" Expression ")"
571
//
572
// ArrayExpr = "[" [ ExpressionList ] "]"
573
// ObjectExpr = "{" [ FieldList ] "}"
574
func (p *parser) parsePrimaryExpr() ast.Expr {
575
switch p.tok {
576
case token.IDENT:
577
res := &ast.IdentifierExpr{
578
Ident: &ast.Ident{
579
Name: p.lit,
580
NamePos: p.pos,
581
},
582
}
583
p.next()
584
return res
585
586
case token.STRING, token.NUMBER, token.FLOAT, token.BOOL, token.NULL:
587
res := &ast.LiteralExpr{
588
Kind: p.tok,
589
Value: p.lit,
590
ValuePos: p.pos,
591
}
592
p.next()
593
return res
594
595
case token.LPAREN:
596
lParen, _, _ := p.expect(token.LPAREN)
597
expr := p.ParseExpression()
598
rParen, _, _ := p.expect(token.RPAREN)
599
600
return &ast.ParenExpr{
601
LParenPos: lParen,
602
Inner: expr,
603
RParenPos: rParen,
604
}
605
606
case token.LBRACK:
607
var res ast.ArrayExpr
608
609
res.LBrackPos, _, _ = p.expect(token.LBRACK)
610
if p.tok != token.RBRACK {
611
res.Elements = p.parseExpressionList(token.RBRACK)
612
}
613
res.RBrackPos, _, _ = p.expect(token.RBRACK)
614
return &res
615
616
case token.LCURLY:
617
var res ast.ObjectExpr
618
619
res.LCurlyPos, _, _ = p.expect(token.LCURLY)
620
if p.tok != token.RBRACK {
621
res.Fields = p.parseFieldList(token.RCURLY)
622
}
623
res.RCurlyPos, _, _ = p.expect(token.RCURLY)
624
return &res
625
}
626
627
p.addErrorf("expected expression, got %s", p.tok)
628
res := &ast.LiteralExpr{Kind: token.NULL, Value: "null", ValuePos: p.pos}
629
p.advanceAny(statementEnd) // Eat up the rest of the line
630
return res
631
}
632
633
var statementEnd = map[token.Token]struct{}{
634
token.TERMINATOR: {},
635
token.RPAREN: {},
636
token.RCURLY: {},
637
token.RBRACK: {},
638
token.COMMA: {},
639
}
640
641
// parseExpressionList parses a list of expressions.
642
//
643
// ExpressionList = Expression { "," Expression } [ "," ]
644
func (p *parser) parseExpressionList(until token.Token) []ast.Expr {
645
var exprs []ast.Expr
646
647
for p.tok != until && p.tok != token.EOF {
648
exprs = append(exprs, p.ParseExpression())
649
650
if p.tok == until {
651
break
652
}
653
if p.tok != token.COMMA {
654
p.addErrorf("missing ',' in expression list")
655
}
656
p.next()
657
}
658
659
return exprs
660
}
661
662
// parseFieldList parses a list of fields in an object.
663
//
664
// FieldList = Field { "," Field } [ "," ]
665
func (p *parser) parseFieldList(until token.Token) []*ast.ObjectField {
666
var fields []*ast.ObjectField
667
668
for p.tok != until && p.tok != token.EOF {
669
fields = append(fields, p.parseField())
670
671
if p.tok == until {
672
break
673
}
674
if p.tok != token.COMMA {
675
p.addErrorf("missing ',' in field list")
676
}
677
p.next()
678
}
679
680
return fields
681
}
682
683
// parseField parses a field in an object.
684
//
685
// Field = ( string | identifier ) "=" Expression
686
func (p *parser) parseField() *ast.ObjectField {
687
var field ast.ObjectField
688
689
if p.tok == token.STRING || p.tok == token.IDENT {
690
field.Name = &ast.Ident{
691
Name: p.lit,
692
NamePos: p.pos,
693
}
694
if p.tok == token.STRING && len(p.lit) > 2 {
695
// The field name is a string literal; unwrap the quotes.
696
field.Name.Name = p.lit[1 : len(p.lit)-1]
697
field.Quoted = true
698
}
699
p.next() // Consume field name
700
} else {
701
p.addErrorf("expected field name (string or identifier), got %s", p.tok)
702
p.advance(token.ASSIGN)
703
}
704
705
p.expect(token.ASSIGN)
706
707
field.Value = p.ParseExpression()
708
return &field
709
}
710
711
func isValidIdentifier(in string) bool {
712
s := scanner.New(nil, []byte(in), nil, 0)
713
_, tok, lit := s.Scan()
714
return tok == token.IDENT && lit == in
715
}
716
717