123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870 |
- package build
- import (
- "bytes"
- "fmt"
- "strings"
- "unicode/utf8"
- "github.com/bazelbuild/buildtools/tables"
- )
- func Parse(filename string, data []byte) (*File, error) {
- in := newInput(filename, data)
- return in.parse()
- }
- type input struct {
-
- filename string
- complete []byte
- remaining []byte
- token []byte
- lastToken string
- pos Position
- lineComments []Comment
- suffixComments []Comment
- endStmt int
- depth int
- cleanLine bool
- indent int
- indents []int
-
- file *File
- parseError error
-
- pre []Expr
- post []Expr
- }
- func newInput(filename string, data []byte) *input {
-
-
-
- data = append(data, '\n')
- return &input{
- filename: filename,
- complete: data,
- remaining: data,
- pos: Position{Line: 1, LineRune: 1, Byte: 0},
- cleanLine: true,
- indents: []int{0},
- endStmt: -1,
- }
- }
- func (in *input) currentIndent() int {
- return in.indents[len(in.indents)-1]
- }
- func (in *input) parse() (f *File, err error) {
-
-
-
-
- defer func() {
- if e := recover(); e != nil {
- if e == in.parseError {
- err = in.parseError
- } else {
- err = fmt.Errorf("%s:%d:%d: internal error: %v", in.filename, in.pos.Line, in.pos.LineRune, e)
- }
- }
- }()
-
- yyParse(in)
- if in.parseError != nil {
- return nil, in.parseError
- }
- in.file.Path = in.filename
-
- in.assignComments()
- return in.file, nil
- }
- func (in *input) Error(s string) {
- if s == "syntax error" && in.lastToken != "" {
- s += " near " + in.lastToken
- }
- in.parseError = fmt.Errorf("%s:%d:%d: %v", in.filename, in.pos.Line, in.pos.LineRune, s)
- panic(in.parseError)
- }
- func (in *input) eof() bool {
- return len(in.remaining) == 0
- }
- func (in *input) peekRune() int {
- if len(in.remaining) == 0 {
- return 0
- }
- r, _ := utf8.DecodeRune(in.remaining)
- return int(r)
- }
- func (in *input) readRune() int {
- if len(in.remaining) == 0 {
- in.Error("internal lexer error: readRune at EOF")
- }
- r, size := utf8.DecodeRune(in.remaining)
- in.remaining = in.remaining[size:]
- if r == '\n' {
- in.pos.Line++
- in.pos.LineRune = 1
- } else {
- in.pos.LineRune++
- }
- in.pos.Byte += size
- return int(r)
- }
- func (in *input) startToken(val *yySymType) {
- in.token = in.remaining
- val.tok = ""
- val.pos = in.pos
- }
- func (in *input) endToken(val *yySymType) {
- if val.tok == "" {
- tok := string(in.token[:len(in.token)-len(in.remaining)])
- val.tok = tok
- in.lastToken = val.tok
- }
- }
- func (in *input) Lex(val *yySymType) int {
-
- countNL := 0
- for !in.eof() {
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- if in.endStmt != -1 && len(in.remaining) == in.endStmt {
- in.endStmt = -1
- }
-
-
-
- c := in.peekRune()
- if c == ' ' || c == '\t' || c == '\r' || c == '\n' {
- if c == '\n' {
- in.indent = 0
- in.cleanLine = true
- if in.endStmt == -1 {
-
- in.startToken(val)
- in.readRune()
- in.endToken(val)
- return '\n'
- }
- countNL++
- } else if c == ' ' && in.cleanLine {
- in.indent++
- }
- in.readRune()
- continue
- }
-
- if c == '#' {
-
-
- in.indent = 0
- in.cleanLine = true
-
-
-
-
-
-
- i := bytes.LastIndex(in.complete[:in.pos.Byte], []byte("\n"))
- prefix := bytes.TrimSpace(in.complete[i+1 : in.pos.Byte])
- isSuffix := true
- if len(prefix) == 0 ||
- prefix[len(prefix)-1] == '[' ||
- prefix[len(prefix)-1] == '(' ||
- prefix[len(prefix)-1] == '{' {
- isSuffix = false
- }
-
- in.startToken(val)
- for len(in.remaining) > 0 && in.peekRune() != '\n' {
- in.readRune()
- }
- in.endToken(val)
- val.tok = strings.TrimRight(val.tok, "\n")
- in.lastToken = "comment"
-
-
-
- if in.endStmt == -1 {
-
- return _COMMENT
- }
-
- if countNL > 1 {
- in.lineComments = append(in.lineComments, Comment{val.pos, ""})
- }
- if isSuffix {
- in.suffixComments = append(in.suffixComments, Comment{val.pos, val.tok})
- } else {
- in.lineComments = append(in.lineComments, Comment{val.pos, val.tok})
- }
- countNL = 0
- continue
- }
- if c == '\\' && len(in.remaining) >= 2 && in.remaining[1] == '\n' {
- // We can ignore a trailing \ at end of line together with the \n.
- in.readRune()
- in.readRune()
- continue
- }
- // Found non-space non-comment.
- break
- }
- // Check for changes in indentation
- // Skip if --format_bzl is set to false, if we're inside a statement, or if there were non-space
-
- if tables.FormatBzlFiles && in.endStmt == -1 && in.cleanLine {
- if in.indent > in.currentIndent() {
-
- in.indents = append(in.indents, in.indent)
- in.lastToken = "indent"
- in.cleanLine = false
- return _INDENT
- } else if in.indent < in.currentIndent() {
-
- in.indents = in.indents[:len(in.indents)-1]
-
-
-
- if in.indent > in.currentIndent() {
- in.pos = val.pos
- in.Error("unexpected indentation")
- }
- in.lastToken = "unindent"
- return _UNINDENT
- }
- }
- in.cleanLine = false
-
- if in.eof() && in.currentIndent() > 0 {
- in.indents = in.indents[:len(in.indents)-1]
- in.lastToken = "unindent"
- return _UNINDENT
- }
-
- in.startToken(val)
- defer in.endToken(val)
-
- if in.eof() {
- in.lastToken = "EOF"
- return _EOF
- }
-
- if in.endStmt == -1 {
- in.endStmt = len(in.skipStmt(in.remaining))
- }
-
- switch c := in.peekRune(); c {
- case '[', '(', '{':
- in.depth++
- in.readRune()
- return c
- case ']', ')', '}':
- in.depth--
- in.readRune()
- return c
- case '.', ':', ';', ',':
- in.readRune()
- return c
- case '<', '>', '=', '!', '+', '-', '*', '/', '%':
- in.readRune()
- if c == '/' && in.peekRune() == '/' {
-
- in.readRune()
- }
- if in.peekRune() == '=' {
- in.readRune()
- switch c {
- case '<':
- return _LE
- case '>':
- return _GE
- case '=':
- return _EQ
- case '!':
- return _NE
- default:
- return _AUGM
- }
- }
- return c
- case 'r':
- if len(in.remaining) < 2 || in.remaining[1] != '"' && in.remaining[1] != '\'' {
- break
- }
- in.readRune()
- c = in.peekRune()
- fallthrough
- case '"', '\'':
- quote := c
- if len(in.remaining) >= 3 && in.remaining[0] == byte(quote) && in.remaining[1] == byte(quote) && in.remaining[2] == byte(quote) {
-
- in.readRune()
- in.readRune()
- in.readRune()
- var c1, c2, c3 int
- for {
- if in.eof() {
- in.pos = val.pos
- in.Error("unexpected EOF in string")
- }
- c1, c2, c3 = c2, c3, in.readRune()
- if c1 == quote && c2 == quote && c3 == quote {
- break
- }
- if c3 == '\\' {
- if in.eof() {
- in.pos = val.pos
- in.Error("unexpected EOF in string")
- }
- in.readRune()
- }
- }
- } else {
- in.readRune()
- for {
- if in.eof() {
- in.pos = val.pos
- in.Error("unexpected EOF in string")
- }
- if in.peekRune() == '\n' {
- in.Error("unexpected newline in string")
- }
- c := in.readRune()
- if c == quote {
- break
- }
- if c == '\\' {
- if in.eof() {
- in.pos = val.pos
- in.Error("unexpected EOF in string")
- }
- in.readRune()
- }
- }
- }
- in.endToken(val)
- s, triple, err := unquote(val.tok)
- if err != nil {
- in.Error(fmt.Sprint(err))
- }
- val.str = s
- val.triple = triple
- return _STRING
- }
- // Checked all punctuation. Must be identifier token.
- if c := in.peekRune(); !isIdent(c) {
- in.Error(fmt.Sprintf("unexpected input character %#q", c))
- }
- if !tables.FormatBzlFiles {
- // Look for raw Python block (class, def, if, etc at beginning of line) and pass through.
- if in.depth == 0 && in.pos.LineRune == 1 && hasPythonPrefix(in.remaining) {
- // Find end of Python block and advance input beyond it.
- // Have to loop calling readRune in order to maintain line number info.
- rest := in.skipStmt(in.remaining)
- for len(in.remaining) > len(rest) {
- in.readRune()
- }
- return _PYTHON
- }
- }
- // Scan over alphanumeric identifier.
- for {
- c := in.peekRune()
- if !isIdent(c) {
- break
- }
- in.readRune()
- }
- // Call endToken to set val.tok to identifier we just scanned,
- // so we can look to see if val.tok is a keyword.
- in.endToken(val)
- if k := keywordToken[val.tok]; k != 0 {
- return k
- }
- return _IDENT
- }
- // isIdent reports whether c is an identifier rune.
- // We treat all non-ASCII runes as identifier runes.
- func isIdent(c int) bool {
- return '0' <= c && c <= '9' ||
- 'A' <= c && c <= 'Z' ||
- 'a' <= c && c <= 'z' ||
- c == '_' ||
- c >= 0x80
- }
- // keywordToken records the special tokens for
- // strings that should not be treated as ordinary identifiers.
- var keywordToken = map[string]int{
- "and": _AND,
- "for": _FOR,
- "if": _IF,
- "else": _ELSE,
- "elif": _ELIF,
- "in": _IN,
- "is": _IS,
- "lambda": _LAMBDA,
- "load": _LOAD,
- "not": _NOT,
- "or": _OR,
- "def": _DEF,
- "return": _RETURN,
- }
- // Python scanning.
- // About 1% of BUILD files embed arbitrary Python into the file.
- // We do not attempt to parse it. Instead, we lex just enough to scan
- // beyond it, treating the Python block as an unintepreted blob.
- // hasPythonPrefix reports whether p begins with a keyword that would
- // introduce an uninterpreted Python block.
- func hasPythonPrefix(p []byte) bool {
- if tables.FormatBzlFiles {
- return false
- }
- for _, pre := range prefixes {
- if hasPrefixSpace(p, pre) {
- return true
- }
- }
- return false
- }
- // These keywords introduce uninterpreted Python blocks.
- var prefixes = []string{
- "assert",
- "class",
- "def",
- "del",
- "for",
- "if",
- "try",
- "else",
- "elif",
- "except",
- }
- // hasPrefixSpace reports whether p begins with pre followed by a space or colon.
- func hasPrefixSpace(p []byte, pre string) bool {
- if len(p) <= len(pre) || p[len(pre)] != ' ' && p[len(pre)] != '\t' && p[len(pre)] != ':' {
- return false
- }
- for i := range pre {
- if p[i] != pre[i] {
- return false
- }
- }
- return true
- }
- // A utility function for the legacy formatter.
- // Returns whether a given code starts with a top-level statement (maybe with some preceeding
- // comments and blank lines)
- func isOutsideBlock(b []byte) bool {
- isBlankLine := true
- isComment := false
- for _, c := range b {
- switch {
- case c == ' ' || c == '\t' || c == '\r':
- isBlankLine = false
- case c == '#':
- isBlankLine = false
- isComment = true
- case c == '\n':
- isBlankLine = true
- isComment = false
- default:
- if !isComment {
- return isBlankLine
- }
- }
- }
- return true
- }
- // skipStmt returns the data remaining after the statement beginning at p.
- // It does not advance the input position.
- // (The only reason for the input receiver is to be able to call in.Error.)
- func (in *input) skipStmt(p []byte) []byte {
- quote := byte(0) // if non-zero, the kind of quote we're in
- tripleQuote := false
- depth := 0
- var rest []byte
- defer func() {
- if quote != 0 {
- in.Error("EOF scanning Python quoted string")
- }
- }()
-
-
-
- for i := 0; i < len(p); i++ {
- c := p[i]
- if quote != 0 && c == quote && !tripleQuote {
- quote = 0
- continue
- }
- if quote != 0 && c == quote && tripleQuote && i+2 < len(p) && p[i+1] == quote && p[i+2] == quote {
- i += 2
- quote = 0
- tripleQuote = false
- continue
- }
- if quote != 0 {
- if c == '\\' {
- i++ // skip escaped char
- }
- continue
- }
- if c == '\'' || c == '"' {
- if i+2 < len(p) && p[i+1] == c && p[i+2] == c {
- quote = c
- tripleQuote = true
- i += 2
- continue
- }
- quote = c
- continue
- }
- if depth == 0 && i > 0 && p[i] == '\n' && p[i-1] != '\\' {
- // Possible stopping point. Save the earliest one we find.
- if rest == nil {
- rest = p[i:]
- }
- if tables.FormatBzlFiles {
- // In the bzl files mode we only care about the end of the statement, we've found it.
- return rest
- }
- // In the legacy mode we need to find where the current block ends
- if isOutsideBlock(p[i+1:]) {
- return rest
- }
- // Not a stopping point after all.
- rest = nil
- }
- switch c {
- case '#':
- // Skip comment.
- for i < len(p) && p[i] != '\n' {
- i++
- }
- // Rewind 1 position back because \n should be handled at the next iteration
- i--
- case '(', '[', '{':
- depth++
- case ')', ']', '}':
- depth--
- }
- }
- return rest
- }
- // Comment assignment.
- // We build two lists of all subexpressions, preorder and postorder.
- // The preorder list is ordered by start location, with outer expressions first.
- // The postorder list is ordered by end location, with outer expressions last.
- // We use the preorder list to assign each whole-line comment to the syntax
- // immediately following it, and we use the postorder list to assign each
- // end-of-line comment to the syntax immediately preceding it.
- // order walks the expression adding it and its subexpressions to the
- // preorder and postorder lists.
- func (in *input) order(v Expr) {
- if v != nil {
- in.pre = append(in.pre, v)
- }
- switch v := v.(type) {
- default:
- panic(fmt.Errorf("order: unexpected type %T", v))
- case nil:
- // nothing
- case *End:
- // nothing
- case *File:
- for _, stmt := range v.Stmt {
- in.order(stmt)
- }
- case *CommentBlock:
- // nothing
- case *CallExpr:
- in.order(v.X)
- for _, x := range v.List {
- in.order(x)
- }
- in.order(&v.End)
- case *PythonBlock:
- // nothing
- case *LiteralExpr:
- // nothing
- case *StringExpr:
- // nothing
- case *DotExpr:
- in.order(v.X)
- case *ListExpr:
- for _, x := range v.List {
- in.order(x)
- }
- in.order(&v.End)
- case *ListForExpr:
- in.order(v.X)
- for _, c := range v.For {
- in.order(c)
- }
- in.order(&v.End)
- case *SetExpr:
- for _, x := range v.List {
- in.order(x)
- }
- in.order(&v.End)
- case *ForClauseWithIfClausesOpt:
- in.order(v.For)
- for _, c := range v.Ifs {
- in.order(c)
- }
- case *ForClause:
- for _, name := range v.Var {
- in.order(name)
- }
- in.order(v.Expr)
- case *IfClause:
- in.order(v.Cond)
- case *KeyValueExpr:
- in.order(v.Key)
- in.order(v.Value)
- case *DictExpr:
- for _, x := range v.List {
- in.order(x)
- }
- in.order(&v.End)
- case *TupleExpr:
- for _, x := range v.List {
- in.order(x)
- }
- in.order(&v.End)
- case *UnaryExpr:
- in.order(v.X)
- case *BinaryExpr:
- in.order(v.X)
- in.order(v.Y)
- case *ConditionalExpr:
- in.order(v.Then)
- in.order(v.Test)
- in.order(v.Else)
- case *ParenExpr:
- in.order(v.X)
- in.order(&v.End)
- case *SliceExpr:
- in.order(v.X)
- in.order(v.From)
- in.order(v.To)
- in.order(v.Step)
- case *IndexExpr:
- in.order(v.X)
- in.order(v.Y)
- case *LambdaExpr:
- for _, name := range v.Var {
- in.order(name)
- }
- in.order(v.Expr)
- case *ReturnExpr:
- if v.X != nil {
- in.order(v.X)
- }
- case *FuncDef:
- for _, x := range v.Args {
- in.order(x)
- }
- for _, x := range v.Body.Statements {
- in.order(x)
- }
- case *ForLoop:
- for _, x := range v.LoopVars {
- in.order(x)
- }
- in.order(v.Iterable)
- for _, x := range v.Body.Statements {
- in.order(x)
- }
- case *IfElse:
- for _, condition := range v.Conditions {
- in.order(condition.If)
- for _, x := range condition.Then.Statements {
- in.order(x)
- }
- }
- }
- if v != nil {
- in.post = append(in.post, v)
- }
- }
- // assignComments attaches comments to nearby syntax.
- func (in *input) assignComments() {
- // Generate preorder and postorder lists.
- in.order(in.file)
- // Assign line comments to syntax immediately following.
- line := in.lineComments
- for _, x := range in.pre {
- start, _ := x.Span()
- xcom := x.Comment()
- for len(line) > 0 && start.Byte >= line[0].Start.Byte {
- xcom.Before = append(xcom.Before, line[0])
- line = line[1:]
- }
- }
- // Remaining line comments go at end of file.
- in.file.After = append(in.file.After, line...)
- // Assign suffix comments to syntax immediately before.
- suffix := in.suffixComments
- for i := len(in.post) - 1; i >= 0; i-- {
- x := in.post[i]
- // Do not assign suffix comments to file
- switch x.(type) {
- case *File:
- continue
- }
- _, end := x.Span()
- xcom := x.Comment()
- for len(suffix) > 0 && end.Byte <= suffix[len(suffix)-1].Start.Byte {
- xcom.Suffix = append(xcom.Suffix, suffix[len(suffix)-1])
- suffix = suffix[:len(suffix)-1]
- }
- }
- // We assigned suffix comments in reverse.
- // If multiple suffix comments were appended to the same
- // expression node, they are now in reverse. Fix that.
- for _, x := range in.post {
- reverseComments(x.Comment().Suffix)
- }
- // Remaining suffix comments go at beginning of file.
- in.file.Before = append(in.file.Before, suffix...)
- }
- // reverseComments reverses the []Comment list.
- func reverseComments(list []Comment) {
- for i, j := 0, len(list)-1; i < j; i, j = i+1, j-1 {
- list[i], list[j] = list[j], list[i]
- }
- }
|