parser.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. // TOML Parser.
  2. package toml
  3. import (
  4. "errors"
  5. "fmt"
  6. "math"
  7. "reflect"
  8. "regexp"
  9. "strconv"
  10. "strings"
  11. "time"
  12. )
  13. type tomlParser struct {
  14. flowIdx int
  15. flow []token
  16. tree *Tree
  17. currentTable []string
  18. seenTableKeys []string
  19. }
  20. type tomlParserStateFn func() tomlParserStateFn
  21. // Formats and panics an error message based on a token
  22. func (p *tomlParser) raiseError(tok *token, msg string, args ...interface{}) {
  23. panic(tok.Position.String() + ": " + fmt.Sprintf(msg, args...))
  24. }
  25. func (p *tomlParser) run() {
  26. for state := p.parseStart; state != nil; {
  27. state = state()
  28. }
  29. }
  30. func (p *tomlParser) peek() *token {
  31. if p.flowIdx >= len(p.flow) {
  32. return nil
  33. }
  34. return &p.flow[p.flowIdx]
  35. }
  36. func (p *tomlParser) assume(typ tokenType) {
  37. tok := p.getToken()
  38. if tok == nil {
  39. p.raiseError(tok, "was expecting token %s, but token stream is empty", tok)
  40. }
  41. if tok.typ != typ {
  42. p.raiseError(tok, "was expecting token %s, but got %s instead", typ, tok)
  43. }
  44. }
  45. func (p *tomlParser) getToken() *token {
  46. tok := p.peek()
  47. if tok == nil {
  48. return nil
  49. }
  50. p.flowIdx++
  51. return tok
  52. }
  53. func (p *tomlParser) parseStart() tomlParserStateFn {
  54. tok := p.peek()
  55. // end of stream, parsing is finished
  56. if tok == nil {
  57. return nil
  58. }
  59. switch tok.typ {
  60. case tokenDoubleLeftBracket:
  61. return p.parseGroupArray
  62. case tokenLeftBracket:
  63. return p.parseGroup
  64. case tokenKey:
  65. return p.parseAssign
  66. case tokenEOF:
  67. return nil
  68. default:
  69. p.raiseError(tok, "unexpected token")
  70. }
  71. return nil
  72. }
  73. func (p *tomlParser) parseGroupArray() tomlParserStateFn {
  74. startToken := p.getToken() // discard the [[
  75. key := p.getToken()
  76. if key.typ != tokenKeyGroupArray {
  77. p.raiseError(key, "unexpected token %s, was expecting a table array key", key)
  78. }
  79. // get or create table array element at the indicated part in the path
  80. keys, err := parseKey(key.val)
  81. if err != nil {
  82. p.raiseError(key, "invalid table array key: %s", err)
  83. }
  84. p.tree.createSubTree(keys[:len(keys)-1], startToken.Position) // create parent entries
  85. destTree := p.tree.GetPath(keys)
  86. var array []*Tree
  87. if destTree == nil {
  88. array = make([]*Tree, 0)
  89. } else if target, ok := destTree.([]*Tree); ok && target != nil {
  90. array = destTree.([]*Tree)
  91. } else {
  92. p.raiseError(key, "key %s is already assigned and not of type table array", key)
  93. }
  94. p.currentTable = keys
  95. // add a new tree to the end of the table array
  96. newTree := newTree()
  97. newTree.position = startToken.Position
  98. array = append(array, newTree)
  99. p.tree.SetPath(p.currentTable, array)
  100. // remove all keys that were children of this table array
  101. prefix := key.val + "."
  102. found := false
  103. for ii := 0; ii < len(p.seenTableKeys); {
  104. tableKey := p.seenTableKeys[ii]
  105. if strings.HasPrefix(tableKey, prefix) {
  106. p.seenTableKeys = append(p.seenTableKeys[:ii], p.seenTableKeys[ii+1:]...)
  107. } else {
  108. found = (tableKey == key.val)
  109. ii++
  110. }
  111. }
  112. // keep this key name from use by other kinds of assignments
  113. if !found {
  114. p.seenTableKeys = append(p.seenTableKeys, key.val)
  115. }
  116. // move to next parser state
  117. p.assume(tokenDoubleRightBracket)
  118. return p.parseStart
  119. }
  120. func (p *tomlParser) parseGroup() tomlParserStateFn {
  121. startToken := p.getToken() // discard the [
  122. key := p.getToken()
  123. if key.typ != tokenKeyGroup {
  124. p.raiseError(key, "unexpected token %s, was expecting a table key", key)
  125. }
  126. for _, item := range p.seenTableKeys {
  127. if item == key.val {
  128. p.raiseError(key, "duplicated tables")
  129. }
  130. }
  131. p.seenTableKeys = append(p.seenTableKeys, key.val)
  132. keys, err := parseKey(key.val)
  133. if err != nil {
  134. p.raiseError(key, "invalid table array key: %s", err)
  135. }
  136. if err := p.tree.createSubTree(keys, startToken.Position); err != nil {
  137. p.raiseError(key, "%s", err)
  138. }
  139. p.assume(tokenRightBracket)
  140. p.currentTable = keys
  141. return p.parseStart
  142. }
  143. func (p *tomlParser) parseAssign() tomlParserStateFn {
  144. key := p.getToken()
  145. p.assume(tokenEqual)
  146. value := p.parseRvalue()
  147. var tableKey []string
  148. if len(p.currentTable) > 0 {
  149. tableKey = p.currentTable
  150. } else {
  151. tableKey = []string{}
  152. }
  153. // find the table to assign, looking out for arrays of tables
  154. var targetNode *Tree
  155. switch node := p.tree.GetPath(tableKey).(type) {
  156. case []*Tree:
  157. targetNode = node[len(node)-1]
  158. case *Tree:
  159. targetNode = node
  160. default:
  161. p.raiseError(key, "Unknown table type for path: %s",
  162. strings.Join(tableKey, "."))
  163. }
  164. // assign value to the found table
  165. keyVals := []string{key.val}
  166. if len(keyVals) != 1 {
  167. p.raiseError(key, "Invalid key")
  168. }
  169. keyVal := keyVals[0]
  170. localKey := []string{keyVal}
  171. finalKey := append(tableKey, keyVal)
  172. if targetNode.GetPath(localKey) != nil {
  173. p.raiseError(key, "The following key was defined twice: %s",
  174. strings.Join(finalKey, "."))
  175. }
  176. var toInsert interface{}
  177. switch value.(type) {
  178. case *Tree, []*Tree:
  179. toInsert = value
  180. default:
  181. toInsert = &tomlValue{value: value, position: key.Position}
  182. }
  183. targetNode.values[keyVal] = toInsert
  184. return p.parseStart
  185. }
  186. var numberUnderscoreInvalidRegexp *regexp.Regexp
  187. var hexNumberUnderscoreInvalidRegexp *regexp.Regexp
  188. func numberContainsInvalidUnderscore(value string) error {
  189. if numberUnderscoreInvalidRegexp.MatchString(value) {
  190. return errors.New("invalid use of _ in number")
  191. }
  192. return nil
  193. }
  194. func hexNumberContainsInvalidUnderscore(value string) error {
  195. if hexNumberUnderscoreInvalidRegexp.MatchString(value) {
  196. return errors.New("invalid use of _ in hex number")
  197. }
  198. return nil
  199. }
  200. func cleanupNumberToken(value string) string {
  201. cleanedVal := strings.Replace(value, "_", "", -1)
  202. return cleanedVal
  203. }
  204. func (p *tomlParser) parseRvalue() interface{} {
  205. tok := p.getToken()
  206. if tok == nil || tok.typ == tokenEOF {
  207. p.raiseError(tok, "expecting a value")
  208. }
  209. switch tok.typ {
  210. case tokenString:
  211. return tok.val
  212. case tokenTrue:
  213. return true
  214. case tokenFalse:
  215. return false
  216. case tokenInf:
  217. if tok.val[0] == '-' {
  218. return math.Inf(-1)
  219. }
  220. return math.Inf(1)
  221. case tokenNan:
  222. return math.NaN()
  223. case tokenInteger:
  224. cleanedVal := cleanupNumberToken(tok.val)
  225. var err error
  226. var val int64
  227. if len(cleanedVal) >= 3 && cleanedVal[0] == '0' {
  228. switch cleanedVal[1] {
  229. case 'x':
  230. err = hexNumberContainsInvalidUnderscore(tok.val)
  231. if err != nil {
  232. p.raiseError(tok, "%s", err)
  233. }
  234. val, err = strconv.ParseInt(cleanedVal[2:], 16, 64)
  235. case 'o':
  236. err = numberContainsInvalidUnderscore(tok.val)
  237. if err != nil {
  238. p.raiseError(tok, "%s", err)
  239. }
  240. val, err = strconv.ParseInt(cleanedVal[2:], 8, 64)
  241. case 'b':
  242. err = numberContainsInvalidUnderscore(tok.val)
  243. if err != nil {
  244. p.raiseError(tok, "%s", err)
  245. }
  246. val, err = strconv.ParseInt(cleanedVal[2:], 2, 64)
  247. default:
  248. panic("invalid base") // the lexer should catch this first
  249. }
  250. } else {
  251. err = numberContainsInvalidUnderscore(tok.val)
  252. if err != nil {
  253. p.raiseError(tok, "%s", err)
  254. }
  255. val, err = strconv.ParseInt(cleanedVal, 10, 64)
  256. }
  257. if err != nil {
  258. p.raiseError(tok, "%s", err)
  259. }
  260. return val
  261. case tokenFloat:
  262. err := numberContainsInvalidUnderscore(tok.val)
  263. if err != nil {
  264. p.raiseError(tok, "%s", err)
  265. }
  266. cleanedVal := cleanupNumberToken(tok.val)
  267. val, err := strconv.ParseFloat(cleanedVal, 64)
  268. if err != nil {
  269. p.raiseError(tok, "%s", err)
  270. }
  271. return val
  272. case tokenDate:
  273. val, err := time.ParseInLocation(time.RFC3339Nano, tok.val, time.UTC)
  274. if err != nil {
  275. p.raiseError(tok, "%s", err)
  276. }
  277. return val
  278. case tokenLeftBracket:
  279. return p.parseArray()
  280. case tokenLeftCurlyBrace:
  281. return p.parseInlineTable()
  282. case tokenEqual:
  283. p.raiseError(tok, "cannot have multiple equals for the same key")
  284. case tokenError:
  285. p.raiseError(tok, "%s", tok)
  286. }
  287. p.raiseError(tok, "never reached")
  288. return nil
  289. }
  290. func tokenIsComma(t *token) bool {
  291. return t != nil && t.typ == tokenComma
  292. }
  293. func (p *tomlParser) parseInlineTable() *Tree {
  294. tree := newTree()
  295. var previous *token
  296. Loop:
  297. for {
  298. follow := p.peek()
  299. if follow == nil || follow.typ == tokenEOF {
  300. p.raiseError(follow, "unterminated inline table")
  301. }
  302. switch follow.typ {
  303. case tokenRightCurlyBrace:
  304. p.getToken()
  305. break Loop
  306. case tokenKey:
  307. if !tokenIsComma(previous) && previous != nil {
  308. p.raiseError(follow, "comma expected between fields in inline table")
  309. }
  310. key := p.getToken()
  311. p.assume(tokenEqual)
  312. value := p.parseRvalue()
  313. tree.Set(key.val, value)
  314. case tokenComma:
  315. if previous == nil {
  316. p.raiseError(follow, "inline table cannot start with a comma")
  317. }
  318. if tokenIsComma(previous) {
  319. p.raiseError(follow, "need field between two commas in inline table")
  320. }
  321. p.getToken()
  322. default:
  323. p.raiseError(follow, "unexpected token type in inline table: %s", follow.String())
  324. }
  325. previous = follow
  326. }
  327. if tokenIsComma(previous) {
  328. p.raiseError(previous, "trailing comma at the end of inline table")
  329. }
  330. return tree
  331. }
  332. func (p *tomlParser) parseArray() interface{} {
  333. var array []interface{}
  334. arrayType := reflect.TypeOf(nil)
  335. for {
  336. follow := p.peek()
  337. if follow == nil || follow.typ == tokenEOF {
  338. p.raiseError(follow, "unterminated array")
  339. }
  340. if follow.typ == tokenRightBracket {
  341. p.getToken()
  342. break
  343. }
  344. val := p.parseRvalue()
  345. if arrayType == nil {
  346. arrayType = reflect.TypeOf(val)
  347. }
  348. if reflect.TypeOf(val) != arrayType {
  349. p.raiseError(follow, "mixed types in array")
  350. }
  351. array = append(array, val)
  352. follow = p.peek()
  353. if follow == nil || follow.typ == tokenEOF {
  354. p.raiseError(follow, "unterminated array")
  355. }
  356. if follow.typ != tokenRightBracket && follow.typ != tokenComma {
  357. p.raiseError(follow, "missing comma")
  358. }
  359. if follow.typ == tokenComma {
  360. p.getToken()
  361. }
  362. }
  363. // An array of Trees is actually an array of inline
  364. // tables, which is a shorthand for a table array. If the
  365. // array was not converted from []interface{} to []*Tree,
  366. // the two notations would not be equivalent.
  367. if arrayType == reflect.TypeOf(newTree()) {
  368. tomlArray := make([]*Tree, len(array))
  369. for i, v := range array {
  370. tomlArray[i] = v.(*Tree)
  371. }
  372. return tomlArray
  373. }
  374. return array
  375. }
  376. func parseToml(flow []token) *Tree {
  377. result := newTree()
  378. result.position = Position{1, 1}
  379. parser := &tomlParser{
  380. flowIdx: 0,
  381. flow: flow,
  382. tree: result,
  383. currentTable: make([]string, 0),
  384. seenTableKeys: make([]string, 0),
  385. }
  386. parser.run()
  387. return result
  388. }
  389. func init() {
  390. numberUnderscoreInvalidRegexp = regexp.MustCompile(`([^\d]_|_[^\d])|_$|^_`)
  391. hexNumberUnderscoreInvalidRegexp = regexp.MustCompile(`(^0x_)|([^\da-f]_|_[^\da-f])|_$|^_`)
  392. }