// BUILD file parser. // This is a yacc grammar. Its lexer is in lex.go. // // For a good introduction to writing yacc grammars, see // Kernighan and Pike's book The Unix Programming Environment. // // The definitive yacc manual is // Stephen C. Johnson and Ravi Sethi, "Yacc: A Parser Generator", // online at http://plan9.bell-labs.com/sys/doc/yacc.pdf. %{ package build %} // The generated parser puts these fields in a struct named yySymType. // (The name %union is historical, but it is inaccurate for Go.) %union { // input tokens tok string // raw input syntax str string // decoding of quoted string pos Position // position of token triple bool // was string triple quoted? // partial syntax trees expr Expr exprs []Expr forc *ForClause ifs []*IfClause forifs *ForClauseWithIfClausesOpt forsifs []*ForClauseWithIfClausesOpt string *StringExpr strings []*StringExpr block CodeBlock // supporting information comma Position // position of trailing comma in list, if present lastRule Expr // most recent rule, to attach line comments to } // These declarations set the type for a $ reference ($$, $1, $2, ...) // based on the kind of symbol it refers to. Other fields can be referred // to explicitly, as in $1. // // %token is for input tokens generated by the lexer. // %type is for higher-level grammar rules defined here. // // It is possible to put multiple tokens per line, but it is easier to // keep ordered using a sparser one-per-line list. %token '%' %token '(' %token ')' %token '*' %token '+' %token ',' %token '-' %token '.' %token '/' %token ':' %token '<' %token '=' %token '>' %token '[' %token ']' %token '{' %token '}' // By convention, yacc token names are all caps. // However, we do not want to export them from the Go package // we are creating, so prefix them all with underscores. %token _AUGM // augmented assignment %token _AND // keyword and %token _COMMENT // top-level # comment %token _EOF // end of file %token _EQ // operator == %token _FOR // keyword for %token _GE // operator >= %token _IDENT // non-keyword identifier or number %token _IF // keyword if %token _ELSE // keyword else %token _ELIF // keyword elif %token _IN // keyword in %token _IS // keyword is %token _LAMBDA // keyword lambda %token _LOAD // keyword load %token _LE // operator <= %token _NE // operator != %token _NOT // keyword not %token _OR // keyword or %token _PYTHON // uninterpreted Python block %token _STRING // quoted string %token _DEF // keyword def %token _RETURN // keyword return %token _INDENT // indentation %token _UNINDENT // unindentation %type comma_opt %type expr %type expr_opt %type primary_expr %type exprs %type exprs_opt %type primary_exprs %type for_clause %type for_clause_with_if_clauses_opt %type for_clauses_with_if_clauses_opt %type ident %type if_clauses_opt %type stmts %type stmt // a simple_stmt or a for/if/def block %type block_stmt // a single for/if/def statement %type if_else_block // a single if-else statement %type simple_stmt // One or many small_stmts on one line, e.g. 'a = f(x); return str(a)' %type small_stmt // A single statement, e.g. 'a = f(x)' %type small_stmts_continuation // A sequence of `';' small_stmt` %type keyvalue %type keyvalues %type keyvalues_no_comma %type string %type strings %type suite // Operator precedence. // Operators listed lower in the table bind tighter. // We tag rules with this fake, low precedence to indicate // that when the rule is involved in a shift/reduce // conflict, we prefer that the parser shift (try for a longer parse). // Shifting is the default resolution anyway, but stating it explicitly // silences yacc's warning for that specific case. %left ShiftInstead %left '\n' %left _ASSERT // '=' and augmented assignments have the lowest precedence // e.g. "x = a if c > 0 else 'bar'" // followed by // 'if' and 'else' which have lower precedence than all other operators. // e.g. "a, b if c > 0 else 'foo'" is either a tuple of (a,b) or 'foo' // and not a tuple of "(a, (b if ... ))" %left '=' _AUGM %left _IF _ELSE _ELIF %left ',' %left ':' %left _IN _NOT _IS %left _OR %left _AND %left '<' '>' _EQ _NE _LE _GE %left '+' '-' %left '*' '/' '%' %left '.' '[' '(' %right _UNARY %left _STRING %% // Grammar rules. // // A note on names: if foo is a rule, then foos is a sequence of foos // (with interleaved commas or other syntax as appropriate) // and foo_opt is an optional foo. file: stmts _EOF { yylex.(*input).file = &File{Stmt: $1} return 0 } suite: '\n' _INDENT stmts _UNINDENT { $$ = CodeBlock{ Start: $2, Statements: $3, End: End{Pos: $4}, } } | simple_stmt { // simple_stmt is never empty start, _ := $1[0].Span() _, end := $1[len($1)-1].Span() $$ = CodeBlock{ Start: start, Statements: $1, End: End{Pos: end}, } } stmts: { $$ = nil $$ = nil } | stmts stmt { // If this statement follows a comment block, // attach the comments to the statement. if cb, ok := $1.(*CommentBlock); ok { $$ = append($1[:len($1)-1], $2...) $2[0].Comment().Before = cb.After $$ = $2[len($2)-1] break } // Otherwise add to list. $$ = append($1, $2...) $$ = $2[len($2)-1] // Consider this input: // // foo() // # bar // baz() // // If we've just parsed baz(), the # bar is attached to // foo() as an After comment. Make it a Before comment // for baz() instead. if x := $1; x != nil { com := x.Comment() // stmt is never empty $2[0].Comment().Before = com.After com.After = nil } } | stmts '\n' { // Blank line; sever last rule from future comments. $$ = $1 $$ = nil } | stmts _COMMENT '\n' { $$ = $1 $$ = $1 if $$ == nil { cb := &CommentBlock{Start: $2} $$ = append($$, cb) $$ = cb } com := $$.Comment() com.After = append(com.After, Comment{Start: $2, Token: $2}) } stmt: simple_stmt { $$ = $1 } | block_stmt { $$ = []Expr{$1} } block_stmt: _DEF _IDENT '(' exprs_opt ')' ':' suite { $$ = &FuncDef{ Start: $1, Name: $2, ListStart: $3, Args: $4, Body: $7, End: $7.End, ForceCompact: forceCompact($3, $4, $5), ForceMultiLine: forceMultiLine($3, $4, $5), } } | _FOR primary_exprs _IN expr ':' suite { $$ = &ForLoop{ Start: $1, LoopVars: $2, Iterable: $4, Body: $6, End: $6.End, } } | if_else_block { $$ = $1 } if_else_block: _IF expr ':' suite { $$ = &IfElse{ Start: $1, Conditions: []Condition{ Condition{ If: $2, Then: $4, }, }, End: $4.End, } } | if_else_block elif expr ':' suite { block := $1.(*IfElse) block.Conditions = append(block.Conditions, Condition{ If: $3, Then: $5, }) block.End = $5.End $$ = block } | if_else_block _ELSE ':' suite { block := $1.(*IfElse) block.Conditions = append(block.Conditions, Condition{ Then: $4, }) block.End = $4.End $$ = block } elif: _ELSE _IF | _ELIF simple_stmt: small_stmt small_stmts_continuation semi_opt '\n' { $$ = append([]Expr{$1}, $2...) $$ = $$[len($$)-1] } small_stmts_continuation: { $$ = []Expr{} } | small_stmts_continuation ';' small_stmt { $$ = append($1, $3) } small_stmt: expr %prec ShiftInstead | _RETURN expr { _, end := $2.Span() $$ = &ReturnExpr{ X: $2, End: end, } } | _RETURN { $$ = &ReturnExpr{End: $1} } | _PYTHON { $$ = &PythonBlock{Start: $1, Token: $1} } semi_opt: | ';' primary_expr: ident | primary_expr '.' _IDENT { $$ = &DotExpr{ X: $1, Dot: $2, NamePos: $3, Name: $3, } } | _LOAD '(' exprs_opt ')' { $$ = &CallExpr{ X: &LiteralExpr{Start: $1, Token: "load"}, ListStart: $2, List: $3, End: End{Pos: $4}, ForceCompact: forceCompact($2, $3, $4), ForceMultiLine: forceMultiLine($2, $3, $4), } } | primary_expr '(' exprs_opt ')' { $$ = &CallExpr{ X: $1, ListStart: $2, List: $3, End: End{Pos: $4}, ForceCompact: forceCompact($2, $3, $4), ForceMultiLine: forceMultiLine($2, $3, $4), } } | primary_expr '[' expr ']' { $$ = &IndexExpr{ X: $1, IndexStart: $2, Y: $3, End: $4, } } | primary_expr '[' expr_opt ':' expr_opt ']' { $$ = &SliceExpr{ X: $1, SliceStart: $2, From: $3, FirstColon: $4, To: $5, End: $6, } } | primary_expr '[' expr_opt ':' expr_opt ':' expr_opt ']' { $$ = &SliceExpr{ X: $1, SliceStart: $2, From: $3, FirstColon: $4, To: $5, SecondColon: $6, Step: $7, End: $8, } } | primary_expr '(' expr for_clauses_with_if_clauses_opt ')' { $$ = &CallExpr{ X: $1, ListStart: $2, List: []Expr{ &ListForExpr{ Brack: "", Start: $2, X: $3, For: $4, End: End{Pos: $5}, }, }, End: End{Pos: $5}, } } | strings %prec ShiftInstead { if len($1) == 1 { $$ = $1[0] break } $$ = $1[0] for _, x := range $1[1:] { _, end := $$.Span() $$ = binary($$, end, "+", x) } } | '[' exprs_opt ']' { $$ = &ListExpr{ Start: $1, List: $2, Comma: $2, End: End{Pos: $3}, ForceMultiLine: forceMultiLine($1, $2, $3), } } | '[' expr for_clauses_with_if_clauses_opt ']' { exprStart, _ := $2.Span() $$ = &ListForExpr{ Brack: "[]", Start: $1, X: $2, For: $3, End: End{Pos: $4}, ForceMultiLine: $1.Line != exprStart.Line, } } | '(' expr for_clauses_with_if_clauses_opt ')' { exprStart, _ := $2.Span() $$ = &ListForExpr{ Brack: "()", Start: $1, X: $2, For: $3, End: End{Pos: $4}, ForceMultiLine: $1.Line != exprStart.Line, } } | '{' keyvalue for_clauses_with_if_clauses_opt '}' { exprStart, _ := $2.Span() $$ = &ListForExpr{ Brack: "{}", Start: $1, X: $2, For: $3, End: End{Pos: $4}, ForceMultiLine: $1.Line != exprStart.Line, } } | '{' keyvalues '}' { $$ = &DictExpr{ Start: $1, List: $2, Comma: $2, End: End{Pos: $3}, ForceMultiLine: forceMultiLine($1, $2, $3), } } | '{' exprs_opt '}' { $$ = &SetExpr{ Start: $1, List: $2, Comma: $2, End: End{Pos: $3}, ForceMultiLine: forceMultiLine($1, $2, $3), } } | '(' exprs_opt ')' { if len($2) == 1 && $2.Line == 0 { // Just a parenthesized expression, not a tuple. $$ = &ParenExpr{ Start: $1, X: $2[0], End: End{Pos: $3}, ForceMultiLine: forceMultiLine($1, $2, $3), } } else { $$ = &TupleExpr{ Start: $1, List: $2, Comma: $2, End: End{Pos: $3}, ForceCompact: forceCompact($1, $2, $3), ForceMultiLine: forceMultiLine($1, $2, $3), } } } | '-' primary_expr %prec _UNARY { $$ = unary($1, $1, $2) } expr: primary_expr | _LAMBDA exprs ':' expr { $$ = &LambdaExpr{ Lambda: $1, Var: $2, Colon: $3, Expr: $4, } } | _NOT expr %prec _UNARY { $$ = unary($1, $1, $2) } | '*' expr %prec _UNARY { $$ = unary($1, $1, $2) } | expr '*' expr { $$ = binary($1, $2, $2, $3) } | expr '%' expr { $$ = binary($1, $2, $2, $3) } | expr '/' expr { $$ = binary($1, $2, $2, $3) } | expr '+' expr { $$ = binary($1, $2, $2, $3) } | expr '-' expr { $$ = binary($1, $2, $2, $3) } | expr '<' expr { $$ = binary($1, $2, $2, $3) } | expr '>' expr { $$ = binary($1, $2, $2, $3) } | expr _EQ expr { $$ = binary($1, $2, $2, $3) } | expr _LE expr { $$ = binary($1, $2, $2, $3) } | expr _NE expr { $$ = binary($1, $2, $2, $3) } | expr _GE expr { $$ = binary($1, $2, $2, $3) } | expr '=' expr { $$ = binary($1, $2, $2, $3) } | expr _AUGM expr { $$ = binary($1, $2, $2, $3) } | expr _IN expr { $$ = binary($1, $2, $2, $3) } | expr _NOT _IN expr { $$ = binary($1, $2, "not in", $4) } | expr _OR expr { $$ = binary($1, $2, $2, $3) } | expr _AND expr { $$ = binary($1, $2, $2, $3) } | expr _IS expr { if b, ok := $3.(*UnaryExpr); ok && b.Op == "not" { $$ = binary($1, $2, "is not", b.X) } else { $$ = binary($1, $2, $2, $3) } } | expr _IF expr _ELSE expr { $$ = &ConditionalExpr{ Then: $1, IfStart: $2, Test: $3, ElseStart: $4, Else: $5, } } expr_opt: { $$ = nil } | expr // comma_opt is an optional comma. If the comma is present, // the rule's value is the position of the comma. Otherwise // the rule's value is the zero position. Tracking this // lets us distinguish (x) and (x,). comma_opt: { $$ = Position{} } | ',' keyvalue: expr ':' expr { $$ = &KeyValueExpr{ Key: $1, Colon: $2, Value: $3, } } keyvalues_no_comma: keyvalue { $$ = []Expr{$1} } | keyvalues_no_comma ',' keyvalue { $$ = append($1, $3) } keyvalues: keyvalues_no_comma { $$ = $1 } | keyvalues_no_comma ',' { $$ = $1 } exprs: expr { $$ = []Expr{$1} } | exprs ',' expr { $$ = append($1, $3) } exprs_opt: { $$, $$ = nil, Position{} } | exprs comma_opt { $$, $$ = $1, $2 } primary_exprs: primary_expr { $$ = []Expr{$1} } | primary_exprs ',' primary_expr { $$ = append($1, $3) } string: _STRING { $$ = &StringExpr{ Start: $1, Value: $1, TripleQuote: $1, End: $1.add($1), Token: $1, } } strings: string { $$ = []*StringExpr{$1} } | strings string { $$ = append($1, $2) } ident: _IDENT { $$ = &LiteralExpr{Start: $1, Token: $1} } for_clause: _FOR primary_exprs _IN expr { $$ = &ForClause{ For: $1, Var: $2, In: $3, Expr: $4, } } for_clause_with_if_clauses_opt: for_clause if_clauses_opt { $$ = &ForClauseWithIfClausesOpt{ For: $1, Ifs: $2, } } for_clauses_with_if_clauses_opt: for_clause_with_if_clauses_opt { $$ = []*ForClauseWithIfClausesOpt{$1} } | for_clauses_with_if_clauses_opt for_clause_with_if_clauses_opt { $$ = append($1, $2) } if_clauses_opt: { $$ = nil } | if_clauses_opt _IF expr { $$ = append($1, &IfClause{ If: $2, Cond: $3, }) } %% // Go helper code. // unary returns a unary expression with the given // position, operator, and subexpression. func unary(pos Position, op string, x Expr) Expr { return &UnaryExpr{ OpStart: pos, Op: op, X: x, } } // binary returns a binary expression with the given // operands, position, and operator. func binary(x Expr, pos Position, op string, y Expr) Expr { _, xend := x.Span() ystart, _ := y.Span() return &BinaryExpr{ X: x, OpStart: pos, Op: op, LineBreak: xend.Line < ystart.Line, Y: y, } } // isSimpleExpression returns whether an expression is simple and allowed to exist in // compact forms of sequences. // The formal criteria are the following: an expression is considered simple if it's // a literal (variable, string or a number), a literal with a unary operator or an empty sequence. func isSimpleExpression(expr *Expr) bool { switch x := (*expr).(type) { case *LiteralExpr, *StringExpr: return true case *UnaryExpr: _, ok := x.X.(*LiteralExpr) return ok case *ListExpr: return len(x.List) == 0 case *TupleExpr: return len(x.List) == 0 case *DictExpr: return len(x.List) == 0 case *SetExpr: return len(x.List) == 0 default: return false } } // forceCompact returns the setting for the ForceCompact field for a call or tuple. // // NOTE 1: The field is called ForceCompact, not ForceSingleLine, // because it only affects the formatting associated with the call or tuple syntax, // not the formatting of the arguments. For example: // // call([ // 1, // 2, // 3, // ]) // // is still a compact call even though it runs on multiple lines. // // In contrast the multiline form puts a linebreak after the (. // // call( // [ // 1, // 2, // 3, // ], // ) // // NOTE 2: Because of NOTE 1, we cannot use start and end on the // same line as a signal for compact mode: the formatting of an // embedded list might move the end to a different line, which would // then look different on rereading and cause buildifier not to be // idempotent. Instead, we have to look at properties guaranteed // to be preserved by the reformatting, namely that the opening // paren and the first expression are on the same line and that // each subsequent expression begins on the same line as the last // one ended (no line breaks after comma). func forceCompact(start Position, list []Expr, end Position) bool { if len(list) <= 1 { // The call or tuple will probably be compact anyway; don't force it. return false } // If there are any named arguments or non-string, non-literal // arguments, cannot force compact mode. line := start.Line for _, x := range list { start, end := x.Span() if start.Line != line { return false } line = end.Line if !isSimpleExpression(&x) { return false } } return end.Line == line } // forceMultiLine returns the setting for the ForceMultiLine field. func forceMultiLine(start Position, list []Expr, end Position) bool { if len(list) > 1 { // The call will be multiline anyway, because it has multiple elements. Don't force it. return false } if len(list) == 0 { // Empty list: use position of brackets. return start.Line != end.Line } // Single-element list. // Check whether opening bracket is on different line than beginning of // element, or closing bracket is on different line than end of element. elemStart, elemEnd := list[0].Span() return start.Line != elemStart.Line || end.Line != elemEnd.Line }