lex.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870
  1. /*
  2. Copyright 2016 Google Inc. All Rights Reserved.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. // Lexical scanning for BUILD file parser.
  14. package build
  15. import (
  16. "bytes"
  17. "fmt"
  18. "strings"
  19. "unicode/utf8"
  20. "github.com/bazelbuild/buildtools/tables"
  21. )
  22. // Parse parses the input data and returns the corresponding parse tree.
  23. //
  24. // The filename is used only for generating error messages.
  25. func Parse(filename string, data []byte) (*File, error) {
  26. in := newInput(filename, data)
  27. return in.parse()
  28. }
  29. // An input represents a single input file being parsed.
  30. type input struct {
  31. // Lexing state.
  32. filename string // name of input file, for errors
  33. complete []byte // entire input
  34. remaining []byte // remaining input
  35. token []byte // token being scanned
  36. lastToken string // most recently returned token, for error messages
  37. pos Position // current input position
  38. lineComments []Comment // accumulated line comments
  39. suffixComments []Comment // accumulated suffix comments
  40. endStmt int // position of the end of the current statement
  41. depth int // nesting of [ ] { } ( )
  42. cleanLine bool // true if the current line only contains whitespace before the current position
  43. indent int // current line indentation in spaces
  44. indents []int // stack of indentation levels in spaces
  45. // Parser state.
  46. file *File // returned top-level syntax tree
  47. parseError error // error encountered during parsing
  48. // Comment assignment state.
  49. pre []Expr // all expressions, in preorder traversal
  50. post []Expr // all expressions, in postorder traversal
  51. }
  52. func newInput(filename string, data []byte) *input {
  53. // The syntax requires that each simple statement ends with '\n', however it's optional at EOF.
  54. // If `data` doesn't end with '\n' we add it here to keep parser simple.
  55. // It shouldn't affect neither the parsed tree nor its formatting.
  56. data = append(data, '\n')
  57. return &input{
  58. filename: filename,
  59. complete: data,
  60. remaining: data,
  61. pos: Position{Line: 1, LineRune: 1, Byte: 0},
  62. cleanLine: true,
  63. indents: []int{0},
  64. endStmt: -1, // -1 denotes it's not inside a statement
  65. }
  66. }
  67. func (in *input) currentIndent() int {
  68. return in.indents[len(in.indents)-1]
  69. }
  70. // parse parses the input file.
  71. func (in *input) parse() (f *File, err error) {
  72. // The parser panics for both routine errors like syntax errors
  73. // and for programmer bugs like array index errors.
  74. // Turn both into error returns. Catching bug panics is
  75. // especially important when processing many files.
  76. defer func() {
  77. if e := recover(); e != nil {
  78. if e == in.parseError {
  79. err = in.parseError
  80. } else {
  81. err = fmt.Errorf("%s:%d:%d: internal error: %v", in.filename, in.pos.Line, in.pos.LineRune, e)
  82. }
  83. }
  84. }()
  85. // Invoke the parser generated from parse.y.
  86. yyParse(in)
  87. if in.parseError != nil {
  88. return nil, in.parseError
  89. }
  90. in.file.Path = in.filename
  91. // Assign comments to nearby syntax.
  92. in.assignComments()
  93. return in.file, nil
  94. }
  95. // Error is called to report an error.
  96. // When called by the generated code s is always "syntax error".
  97. // Error does not return: it panics.
  98. func (in *input) Error(s string) {
  99. if s == "syntax error" && in.lastToken != "" {
  100. s += " near " + in.lastToken
  101. }
  102. in.parseError = fmt.Errorf("%s:%d:%d: %v", in.filename, in.pos.Line, in.pos.LineRune, s)
  103. panic(in.parseError)
  104. }
  105. // eof reports whether the input has reached end of file.
  106. func (in *input) eof() bool {
  107. return len(in.remaining) == 0
  108. }
  109. // peekRune returns the next rune in the input without consuming it.
  110. func (in *input) peekRune() int {
  111. if len(in.remaining) == 0 {
  112. return 0
  113. }
  114. r, _ := utf8.DecodeRune(in.remaining)
  115. return int(r)
  116. }
  117. // readRune consumes and returns the next rune in the input.
  118. func (in *input) readRune() int {
  119. if len(in.remaining) == 0 {
  120. in.Error("internal lexer error: readRune at EOF")
  121. }
  122. r, size := utf8.DecodeRune(in.remaining)
  123. in.remaining = in.remaining[size:]
  124. if r == '\n' {
  125. in.pos.Line++
  126. in.pos.LineRune = 1
  127. } else {
  128. in.pos.LineRune++
  129. }
  130. in.pos.Byte += size
  131. return int(r)
  132. }
  133. // startToken marks the beginning of the next input token.
  134. // It must be followed by a call to endToken, once the token has
  135. // been consumed using readRune.
  136. func (in *input) startToken(val *yySymType) {
  137. in.token = in.remaining
  138. val.tok = ""
  139. val.pos = in.pos
  140. }
  141. // yySymType (used in the next few functions) is defined by the
  142. // generated parser. It is a struct containing all the fields listed
  143. // in parse.y's %union [sic] section.
  144. // endToken marks the end of an input token.
  145. // It records the actual token string in val.tok if the caller
  146. // has not done that already.
  147. func (in *input) endToken(val *yySymType) {
  148. if val.tok == "" {
  149. tok := string(in.token[:len(in.token)-len(in.remaining)])
  150. val.tok = tok
  151. in.lastToken = val.tok
  152. }
  153. }
  154. // Lex is called from the generated parser to obtain the next input token.
  155. // It returns the token value (either a rune like '+' or a symbolic token _FOR)
  156. // and sets val to the data associated with the token.
  157. //
  158. // For all our input tokens, the associated data is
  159. // val.Pos (the position where the token begins)
  160. // and val.Token (the input string corresponding to the token).
  161. func (in *input) Lex(val *yySymType) int {
  162. // Skip past spaces, stopping at non-space or EOF.
  163. countNL := 0 // number of newlines we've skipped past
  164. for !in.eof() {
  165. // If a single statement is split into multiple lines, we don't need
  166. // to track indentations and unindentations within these lines. For example:
  167. //
  168. // def f(
  169. // # This indentation should be ignored
  170. // x):
  171. // # This unindentation should be ignored
  172. // # Actual indentation is from 0 to 2 spaces here
  173. // return x
  174. //
  175. // To handle this case, when we reach the beginning of a statement we scan forward to see where
  176. // it should end and record the number of input bytes remaining at that endpoint.
  177. //
  178. // If --format_bzl is set to false, top level blocks (e.g. an entire function definition)
  179. // is considered as a single statement.
  180. if in.endStmt != -1 && len(in.remaining) == in.endStmt {
  181. in.endStmt = -1
  182. }
  183. // Skip over spaces. Count newlines so we can give the parser
  184. // information about where top-level blank lines are,
  185. // for top-level comment assignment.
  186. c := in.peekRune()
  187. if c == ' ' || c == '\t' || c == '\r' || c == '\n' {
  188. if c == '\n' {
  189. in.indent = 0
  190. in.cleanLine = true
  191. if in.endStmt == -1 {
  192. // Not in a statememt. Tell parser about top-level blank line.
  193. in.startToken(val)
  194. in.readRune()
  195. in.endToken(val)
  196. return '\n'
  197. }
  198. countNL++
  199. } else if c == ' ' && in.cleanLine {
  200. in.indent++
  201. }
  202. in.readRune()
  203. continue
  204. }
  205. // Comment runs to end of line.
  206. if c == '#' {
  207. // If a line contains just a comment its indentation level doesn't matter.
  208. // Reset it to zero.
  209. in.indent = 0
  210. in.cleanLine = true
  211. // Is this comment the only thing on its line?
  212. // Find the last \n before this # and see if it's all
  213. // spaces from there to here.
  214. // If it's a suffix comment but the last non-space symbol before
  215. // it is one of (, [, or {, treat it as a line comment that should be
  216. // put inside the corresponding block.
  217. i := bytes.LastIndex(in.complete[:in.pos.Byte], []byte("\n"))
  218. prefix := bytes.TrimSpace(in.complete[i+1 : in.pos.Byte])
  219. isSuffix := true
  220. if len(prefix) == 0 ||
  221. prefix[len(prefix)-1] == '[' ||
  222. prefix[len(prefix)-1] == '(' ||
  223. prefix[len(prefix)-1] == '{' {
  224. isSuffix = false
  225. }
  226. // Consume comment without the \n it ends with.
  227. in.startToken(val)
  228. for len(in.remaining) > 0 && in.peekRune() != '\n' {
  229. in.readRune()
  230. }
  231. in.endToken(val)
  232. val.tok = strings.TrimRight(val.tok, "\n")
  233. in.lastToken = "comment"
  234. // If we are at top level (not in a rule), hand the comment to
  235. // the parser as a _COMMENT token. The grammar is written
  236. // to handle top-level comments itself.
  237. if in.endStmt == -1 {
  238. // Not in a statement. Tell parser about top-level comment.
  239. return _COMMENT
  240. }
  241. // Otherwise, save comment for later attachment to syntax tree.
  242. if countNL > 1 {
  243. in.lineComments = append(in.lineComments, Comment{val.pos, ""})
  244. }
  245. if isSuffix {
  246. in.suffixComments = append(in.suffixComments, Comment{val.pos, val.tok})
  247. } else {
  248. in.lineComments = append(in.lineComments, Comment{val.pos, val.tok})
  249. }
  250. countNL = 0
  251. continue
  252. }
  253. if c == '\\' && len(in.remaining) >= 2 && in.remaining[1] == '\n' {
  254. // We can ignore a trailing \ at end of line together with the \n.
  255. in.readRune()
  256. in.readRune()
  257. continue
  258. }
  259. // Found non-space non-comment.
  260. break
  261. }
  262. // Check for changes in indentation
  263. // Skip if --format_bzl is set to false, if we're inside a statement, or if there were non-space
  264. // characters before in the current line.
  265. if tables.FormatBzlFiles && in.endStmt == -1 && in.cleanLine {
  266. if in.indent > in.currentIndent() {
  267. // A new indentation block starts
  268. in.indents = append(in.indents, in.indent)
  269. in.lastToken = "indent"
  270. in.cleanLine = false
  271. return _INDENT
  272. } else if in.indent < in.currentIndent() {
  273. // An indentation block ends
  274. in.indents = in.indents[:len(in.indents)-1]
  275. // It's a syntax error if the current line indentation level in now greater than
  276. // currentIndent(), should be either equal (a parent block continues) or still less
  277. // (need to unindent more).
  278. if in.indent > in.currentIndent() {
  279. in.pos = val.pos
  280. in.Error("unexpected indentation")
  281. }
  282. in.lastToken = "unindent"
  283. return _UNINDENT
  284. }
  285. }
  286. in.cleanLine = false
  287. // If the file ends with an indented block, return the corresponding amounts of unindents.
  288. if in.eof() && in.currentIndent() > 0 {
  289. in.indents = in.indents[:len(in.indents)-1]
  290. in.lastToken = "unindent"
  291. return _UNINDENT
  292. }
  293. // Found the beginning of the next token.
  294. in.startToken(val)
  295. defer in.endToken(val)
  296. // End of file.
  297. if in.eof() {
  298. in.lastToken = "EOF"
  299. return _EOF
  300. }
  301. // If endStmt is 0, we need to recompute where the end of the next statement is.
  302. if in.endStmt == -1 {
  303. in.endStmt = len(in.skipStmt(in.remaining))
  304. }
  305. // Punctuation tokens.
  306. switch c := in.peekRune(); c {
  307. case '[', '(', '{':
  308. in.depth++
  309. in.readRune()
  310. return c
  311. case ']', ')', '}':
  312. in.depth--
  313. in.readRune()
  314. return c
  315. case '.', ':', ';', ',': // single-char tokens
  316. in.readRune()
  317. return c
  318. case '<', '>', '=', '!', '+', '-', '*', '/', '%': // possibly followed by =
  319. in.readRune()
  320. if c == '/' && in.peekRune() == '/' {
  321. // integer division
  322. in.readRune()
  323. }
  324. if in.peekRune() == '=' {
  325. in.readRune()
  326. switch c {
  327. case '<':
  328. return _LE
  329. case '>':
  330. return _GE
  331. case '=':
  332. return _EQ
  333. case '!':
  334. return _NE
  335. default:
  336. return _AUGM
  337. }
  338. }
  339. return c
  340. case 'r': // possible beginning of raw quoted string
  341. if len(in.remaining) < 2 || in.remaining[1] != '"' && in.remaining[1] != '\'' {
  342. break
  343. }
  344. in.readRune()
  345. c = in.peekRune()
  346. fallthrough
  347. case '"', '\'': // quoted string
  348. quote := c
  349. if len(in.remaining) >= 3 && in.remaining[0] == byte(quote) && in.remaining[1] == byte(quote) && in.remaining[2] == byte(quote) {
  350. // Triple-quoted string.
  351. in.readRune()
  352. in.readRune()
  353. in.readRune()
  354. var c1, c2, c3 int
  355. for {
  356. if in.eof() {
  357. in.pos = val.pos
  358. in.Error("unexpected EOF in string")
  359. }
  360. c1, c2, c3 = c2, c3, in.readRune()
  361. if c1 == quote && c2 == quote && c3 == quote {
  362. break
  363. }
  364. if c3 == '\\' {
  365. if in.eof() {
  366. in.pos = val.pos
  367. in.Error("unexpected EOF in string")
  368. }
  369. in.readRune()
  370. }
  371. }
  372. } else {
  373. in.readRune()
  374. for {
  375. if in.eof() {
  376. in.pos = val.pos
  377. in.Error("unexpected EOF in string")
  378. }
  379. if in.peekRune() == '\n' {
  380. in.Error("unexpected newline in string")
  381. }
  382. c := in.readRune()
  383. if c == quote {
  384. break
  385. }
  386. if c == '\\' {
  387. if in.eof() {
  388. in.pos = val.pos
  389. in.Error("unexpected EOF in string")
  390. }
  391. in.readRune()
  392. }
  393. }
  394. }
  395. in.endToken(val)
  396. s, triple, err := unquote(val.tok)
  397. if err != nil {
  398. in.Error(fmt.Sprint(err))
  399. }
  400. val.str = s
  401. val.triple = triple
  402. return _STRING
  403. }
  404. // Checked all punctuation. Must be identifier token.
  405. if c := in.peekRune(); !isIdent(c) {
  406. in.Error(fmt.Sprintf("unexpected input character %#q", c))
  407. }
  408. if !tables.FormatBzlFiles {
  409. // Look for raw Python block (class, def, if, etc at beginning of line) and pass through.
  410. if in.depth == 0 && in.pos.LineRune == 1 && hasPythonPrefix(in.remaining) {
  411. // Find end of Python block and advance input beyond it.
  412. // Have to loop calling readRune in order to maintain line number info.
  413. rest := in.skipStmt(in.remaining)
  414. for len(in.remaining) > len(rest) {
  415. in.readRune()
  416. }
  417. return _PYTHON
  418. }
  419. }
  420. // Scan over alphanumeric identifier.
  421. for {
  422. c := in.peekRune()
  423. if !isIdent(c) {
  424. break
  425. }
  426. in.readRune()
  427. }
  428. // Call endToken to set val.tok to identifier we just scanned,
  429. // so we can look to see if val.tok is a keyword.
  430. in.endToken(val)
  431. if k := keywordToken[val.tok]; k != 0 {
  432. return k
  433. }
  434. return _IDENT
  435. }
  436. // isIdent reports whether c is an identifier rune.
  437. // We treat all non-ASCII runes as identifier runes.
  438. func isIdent(c int) bool {
  439. return '0' <= c && c <= '9' ||
  440. 'A' <= c && c <= 'Z' ||
  441. 'a' <= c && c <= 'z' ||
  442. c == '_' ||
  443. c >= 0x80
  444. }
  445. // keywordToken records the special tokens for
  446. // strings that should not be treated as ordinary identifiers.
  447. var keywordToken = map[string]int{
  448. "and": _AND,
  449. "for": _FOR,
  450. "if": _IF,
  451. "else": _ELSE,
  452. "elif": _ELIF,
  453. "in": _IN,
  454. "is": _IS,
  455. "lambda": _LAMBDA,
  456. "load": _LOAD,
  457. "not": _NOT,
  458. "or": _OR,
  459. "def": _DEF,
  460. "return": _RETURN,
  461. }
  462. // Python scanning.
  463. // About 1% of BUILD files embed arbitrary Python into the file.
  464. // We do not attempt to parse it. Instead, we lex just enough to scan
  465. // beyond it, treating the Python block as an unintepreted blob.
  466. // hasPythonPrefix reports whether p begins with a keyword that would
  467. // introduce an uninterpreted Python block.
  468. func hasPythonPrefix(p []byte) bool {
  469. if tables.FormatBzlFiles {
  470. return false
  471. }
  472. for _, pre := range prefixes {
  473. if hasPrefixSpace(p, pre) {
  474. return true
  475. }
  476. }
  477. return false
  478. }
  479. // These keywords introduce uninterpreted Python blocks.
  480. var prefixes = []string{
  481. "assert",
  482. "class",
  483. "def",
  484. "del",
  485. "for",
  486. "if",
  487. "try",
  488. "else",
  489. "elif",
  490. "except",
  491. }
  492. // hasPrefixSpace reports whether p begins with pre followed by a space or colon.
  493. func hasPrefixSpace(p []byte, pre string) bool {
  494. if len(p) <= len(pre) || p[len(pre)] != ' ' && p[len(pre)] != '\t' && p[len(pre)] != ':' {
  495. return false
  496. }
  497. for i := range pre {
  498. if p[i] != pre[i] {
  499. return false
  500. }
  501. }
  502. return true
  503. }
  504. // A utility function for the legacy formatter.
  505. // Returns whether a given code starts with a top-level statement (maybe with some preceeding
  506. // comments and blank lines)
  507. func isOutsideBlock(b []byte) bool {
  508. isBlankLine := true
  509. isComment := false
  510. for _, c := range b {
  511. switch {
  512. case c == ' ' || c == '\t' || c == '\r':
  513. isBlankLine = false
  514. case c == '#':
  515. isBlankLine = false
  516. isComment = true
  517. case c == '\n':
  518. isBlankLine = true
  519. isComment = false
  520. default:
  521. if !isComment {
  522. return isBlankLine
  523. }
  524. }
  525. }
  526. return true
  527. }
  528. // skipStmt returns the data remaining after the statement beginning at p.
  529. // It does not advance the input position.
  530. // (The only reason for the input receiver is to be able to call in.Error.)
  531. func (in *input) skipStmt(p []byte) []byte {
  532. quote := byte(0) // if non-zero, the kind of quote we're in
  533. tripleQuote := false // if true, the quote is a triple quote
  534. depth := 0 // nesting depth for ( ) [ ] { }
  535. var rest []byte // data after the Python block
  536. defer func() {
  537. if quote != 0 {
  538. in.Error("EOF scanning Python quoted string")
  539. }
  540. }()
  541. // Scan over input one byte at a time until we find
  542. // an unindented, non-blank, non-comment line
  543. // outside quoted strings and brackets.
  544. for i := 0; i < len(p); i++ {
  545. c := p[i]
  546. if quote != 0 && c == quote && !tripleQuote {
  547. quote = 0
  548. continue
  549. }
  550. if quote != 0 && c == quote && tripleQuote && i+2 < len(p) && p[i+1] == quote && p[i+2] == quote {
  551. i += 2
  552. quote = 0
  553. tripleQuote = false
  554. continue
  555. }
  556. if quote != 0 {
  557. if c == '\\' {
  558. i++ // skip escaped char
  559. }
  560. continue
  561. }
  562. if c == '\'' || c == '"' {
  563. if i+2 < len(p) && p[i+1] == c && p[i+2] == c {
  564. quote = c
  565. tripleQuote = true
  566. i += 2
  567. continue
  568. }
  569. quote = c
  570. continue
  571. }
  572. if depth == 0 && i > 0 && p[i] == '\n' && p[i-1] != '\\' {
  573. // Possible stopping point. Save the earliest one we find.
  574. if rest == nil {
  575. rest = p[i:]
  576. }
  577. if tables.FormatBzlFiles {
  578. // In the bzl files mode we only care about the end of the statement, we've found it.
  579. return rest
  580. }
  581. // In the legacy mode we need to find where the current block ends
  582. if isOutsideBlock(p[i+1:]) {
  583. return rest
  584. }
  585. // Not a stopping point after all.
  586. rest = nil
  587. }
  588. switch c {
  589. case '#':
  590. // Skip comment.
  591. for i < len(p) && p[i] != '\n' {
  592. i++
  593. }
  594. // Rewind 1 position back because \n should be handled at the next iteration
  595. i--
  596. case '(', '[', '{':
  597. depth++
  598. case ')', ']', '}':
  599. depth--
  600. }
  601. }
  602. return rest
  603. }
  604. // Comment assignment.
  605. // We build two lists of all subexpressions, preorder and postorder.
  606. // The preorder list is ordered by start location, with outer expressions first.
  607. // The postorder list is ordered by end location, with outer expressions last.
  608. // We use the preorder list to assign each whole-line comment to the syntax
  609. // immediately following it, and we use the postorder list to assign each
  610. // end-of-line comment to the syntax immediately preceding it.
  611. // order walks the expression adding it and its subexpressions to the
  612. // preorder and postorder lists.
  613. func (in *input) order(v Expr) {
  614. if v != nil {
  615. in.pre = append(in.pre, v)
  616. }
  617. switch v := v.(type) {
  618. default:
  619. panic(fmt.Errorf("order: unexpected type %T", v))
  620. case nil:
  621. // nothing
  622. case *End:
  623. // nothing
  624. case *File:
  625. for _, stmt := range v.Stmt {
  626. in.order(stmt)
  627. }
  628. case *CommentBlock:
  629. // nothing
  630. case *CallExpr:
  631. in.order(v.X)
  632. for _, x := range v.List {
  633. in.order(x)
  634. }
  635. in.order(&v.End)
  636. case *PythonBlock:
  637. // nothing
  638. case *LiteralExpr:
  639. // nothing
  640. case *StringExpr:
  641. // nothing
  642. case *DotExpr:
  643. in.order(v.X)
  644. case *ListExpr:
  645. for _, x := range v.List {
  646. in.order(x)
  647. }
  648. in.order(&v.End)
  649. case *ListForExpr:
  650. in.order(v.X)
  651. for _, c := range v.For {
  652. in.order(c)
  653. }
  654. in.order(&v.End)
  655. case *SetExpr:
  656. for _, x := range v.List {
  657. in.order(x)
  658. }
  659. in.order(&v.End)
  660. case *ForClauseWithIfClausesOpt:
  661. in.order(v.For)
  662. for _, c := range v.Ifs {
  663. in.order(c)
  664. }
  665. case *ForClause:
  666. for _, name := range v.Var {
  667. in.order(name)
  668. }
  669. in.order(v.Expr)
  670. case *IfClause:
  671. in.order(v.Cond)
  672. case *KeyValueExpr:
  673. in.order(v.Key)
  674. in.order(v.Value)
  675. case *DictExpr:
  676. for _, x := range v.List {
  677. in.order(x)
  678. }
  679. in.order(&v.End)
  680. case *TupleExpr:
  681. for _, x := range v.List {
  682. in.order(x)
  683. }
  684. in.order(&v.End)
  685. case *UnaryExpr:
  686. in.order(v.X)
  687. case *BinaryExpr:
  688. in.order(v.X)
  689. in.order(v.Y)
  690. case *ConditionalExpr:
  691. in.order(v.Then)
  692. in.order(v.Test)
  693. in.order(v.Else)
  694. case *ParenExpr:
  695. in.order(v.X)
  696. in.order(&v.End)
  697. case *SliceExpr:
  698. in.order(v.X)
  699. in.order(v.From)
  700. in.order(v.To)
  701. in.order(v.Step)
  702. case *IndexExpr:
  703. in.order(v.X)
  704. in.order(v.Y)
  705. case *LambdaExpr:
  706. for _, name := range v.Var {
  707. in.order(name)
  708. }
  709. in.order(v.Expr)
  710. case *ReturnExpr:
  711. if v.X != nil {
  712. in.order(v.X)
  713. }
  714. case *FuncDef:
  715. for _, x := range v.Args {
  716. in.order(x)
  717. }
  718. for _, x := range v.Body.Statements {
  719. in.order(x)
  720. }
  721. case *ForLoop:
  722. for _, x := range v.LoopVars {
  723. in.order(x)
  724. }
  725. in.order(v.Iterable)
  726. for _, x := range v.Body.Statements {
  727. in.order(x)
  728. }
  729. case *IfElse:
  730. for _, condition := range v.Conditions {
  731. in.order(condition.If)
  732. for _, x := range condition.Then.Statements {
  733. in.order(x)
  734. }
  735. }
  736. }
  737. if v != nil {
  738. in.post = append(in.post, v)
  739. }
  740. }
  741. // assignComments attaches comments to nearby syntax.
  742. func (in *input) assignComments() {
  743. // Generate preorder and postorder lists.
  744. in.order(in.file)
  745. // Assign line comments to syntax immediately following.
  746. line := in.lineComments
  747. for _, x := range in.pre {
  748. start, _ := x.Span()
  749. xcom := x.Comment()
  750. for len(line) > 0 && start.Byte >= line[0].Start.Byte {
  751. xcom.Before = append(xcom.Before, line[0])
  752. line = line[1:]
  753. }
  754. }
  755. // Remaining line comments go at end of file.
  756. in.file.After = append(in.file.After, line...)
  757. // Assign suffix comments to syntax immediately before.
  758. suffix := in.suffixComments
  759. for i := len(in.post) - 1; i >= 0; i-- {
  760. x := in.post[i]
  761. // Do not assign suffix comments to file
  762. switch x.(type) {
  763. case *File:
  764. continue
  765. }
  766. _, end := x.Span()
  767. xcom := x.Comment()
  768. for len(suffix) > 0 && end.Byte <= suffix[len(suffix)-1].Start.Byte {
  769. xcom.Suffix = append(xcom.Suffix, suffix[len(suffix)-1])
  770. suffix = suffix[:len(suffix)-1]
  771. }
  772. }
  773. // We assigned suffix comments in reverse.
  774. // If multiple suffix comments were appended to the same
  775. // expression node, they are now in reverse. Fix that.
  776. for _, x := range in.post {
  777. reverseComments(x.Comment().Suffix)
  778. }
  779. // Remaining suffix comments go at beginning of file.
  780. in.file.Before = append(in.file.Before, suffix...)
  781. }
  782. // reverseComments reverses the []Comment list.
  783. func reverseComments(list []Comment) {
  784. for i, j := 0, len(list)-1; i < j; i, j = i+1, j-1 {
  785. list[i], list[j] = list[j], list[i]
  786. }
  787. }