parse.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592
  1. package toml
  2. import (
  3. "fmt"
  4. "strconv"
  5. "strings"
  6. "time"
  7. "unicode"
  8. "unicode/utf8"
  9. )
  10. type parser struct {
  11. mapping map[string]interface{}
  12. types map[string]tomlType
  13. lx *lexer
  14. // A list of keys in the order that they appear in the TOML data.
  15. ordered []Key
  16. // the full key for the current hash in scope
  17. context Key
  18. // the base key name for everything except hashes
  19. currentKey string
  20. // rough approximation of line number
  21. approxLine int
  22. // A map of 'key.group.names' to whether they were created implicitly.
  23. implicits map[string]bool
  24. }
  25. type parseError string
  26. func (pe parseError) Error() string {
  27. return string(pe)
  28. }
  29. func parse(data string) (p *parser, err error) {
  30. defer func() {
  31. if r := recover(); r != nil {
  32. var ok bool
  33. if err, ok = r.(parseError); ok {
  34. return
  35. }
  36. panic(r)
  37. }
  38. }()
  39. p = &parser{
  40. mapping: make(map[string]interface{}),
  41. types: make(map[string]tomlType),
  42. lx: lex(data),
  43. ordered: make([]Key, 0),
  44. implicits: make(map[string]bool),
  45. }
  46. for {
  47. item := p.next()
  48. if item.typ == itemEOF {
  49. break
  50. }
  51. p.topLevel(item)
  52. }
  53. return p, nil
  54. }
  55. func (p *parser) panicf(format string, v ...interface{}) {
  56. msg := fmt.Sprintf("Near line %d (last key parsed '%s'): %s",
  57. p.approxLine, p.current(), fmt.Sprintf(format, v...))
  58. panic(parseError(msg))
  59. }
  60. func (p *parser) next() item {
  61. it := p.lx.nextItem()
  62. if it.typ == itemError {
  63. p.panicf("%s", it.val)
  64. }
  65. return it
  66. }
  67. func (p *parser) bug(format string, v ...interface{}) {
  68. panic(fmt.Sprintf("BUG: "+format+"\n\n", v...))
  69. }
  70. func (p *parser) expect(typ itemType) item {
  71. it := p.next()
  72. p.assertEqual(typ, it.typ)
  73. return it
  74. }
  75. func (p *parser) assertEqual(expected, got itemType) {
  76. if expected != got {
  77. p.bug("Expected '%s' but got '%s'.", expected, got)
  78. }
  79. }
  80. func (p *parser) topLevel(item item) {
  81. switch item.typ {
  82. case itemCommentStart:
  83. p.approxLine = item.line
  84. p.expect(itemText)
  85. case itemTableStart:
  86. kg := p.next()
  87. p.approxLine = kg.line
  88. var key Key
  89. for ; kg.typ != itemTableEnd && kg.typ != itemEOF; kg = p.next() {
  90. key = append(key, p.keyString(kg))
  91. }
  92. p.assertEqual(itemTableEnd, kg.typ)
  93. p.establishContext(key, false)
  94. p.setType("", tomlHash)
  95. p.ordered = append(p.ordered, key)
  96. case itemArrayTableStart:
  97. kg := p.next()
  98. p.approxLine = kg.line
  99. var key Key
  100. for ; kg.typ != itemArrayTableEnd && kg.typ != itemEOF; kg = p.next() {
  101. key = append(key, p.keyString(kg))
  102. }
  103. p.assertEqual(itemArrayTableEnd, kg.typ)
  104. p.establishContext(key, true)
  105. p.setType("", tomlArrayHash)
  106. p.ordered = append(p.ordered, key)
  107. case itemKeyStart:
  108. kname := p.next()
  109. p.approxLine = kname.line
  110. p.currentKey = p.keyString(kname)
  111. val, typ := p.value(p.next())
  112. p.setValue(p.currentKey, val)
  113. p.setType(p.currentKey, typ)
  114. p.ordered = append(p.ordered, p.context.add(p.currentKey))
  115. p.currentKey = ""
  116. default:
  117. p.bug("Unexpected type at top level: %s", item.typ)
  118. }
  119. }
  120. // Gets a string for a key (or part of a key in a table name).
  121. func (p *parser) keyString(it item) string {
  122. switch it.typ {
  123. case itemText:
  124. return it.val
  125. case itemString, itemMultilineString,
  126. itemRawString, itemRawMultilineString:
  127. s, _ := p.value(it)
  128. return s.(string)
  129. default:
  130. p.bug("Unexpected key type: %s", it.typ)
  131. panic("unreachable")
  132. }
  133. }
  134. // value translates an expected value from the lexer into a Go value wrapped
  135. // as an empty interface.
  136. func (p *parser) value(it item) (interface{}, tomlType) {
  137. switch it.typ {
  138. case itemString:
  139. return p.replaceEscapes(it.val), p.typeOfPrimitive(it)
  140. case itemMultilineString:
  141. trimmed := stripFirstNewline(stripEscapedWhitespace(it.val))
  142. return p.replaceEscapes(trimmed), p.typeOfPrimitive(it)
  143. case itemRawString:
  144. return it.val, p.typeOfPrimitive(it)
  145. case itemRawMultilineString:
  146. return stripFirstNewline(it.val), p.typeOfPrimitive(it)
  147. case itemBool:
  148. switch it.val {
  149. case "true":
  150. return true, p.typeOfPrimitive(it)
  151. case "false":
  152. return false, p.typeOfPrimitive(it)
  153. }
  154. p.bug("Expected boolean value, but got '%s'.", it.val)
  155. case itemInteger:
  156. if !numUnderscoresOK(it.val) {
  157. p.panicf("Invalid integer %q: underscores must be surrounded by digits",
  158. it.val)
  159. }
  160. val := strings.Replace(it.val, "_", "", -1)
  161. num, err := strconv.ParseInt(val, 10, 64)
  162. if err != nil {
  163. // Distinguish integer values. Normally, it'd be a bug if the lexer
  164. // provides an invalid integer, but it's possible that the number is
  165. // out of range of valid values (which the lexer cannot determine).
  166. // So mark the former as a bug but the latter as a legitimate user
  167. // error.
  168. if e, ok := err.(*strconv.NumError); ok &&
  169. e.Err == strconv.ErrRange {
  170. p.panicf("Integer '%s' is out of the range of 64-bit "+
  171. "signed integers.", it.val)
  172. } else {
  173. p.bug("Expected integer value, but got '%s'.", it.val)
  174. }
  175. }
  176. return num, p.typeOfPrimitive(it)
  177. case itemFloat:
  178. parts := strings.FieldsFunc(it.val, func(r rune) bool {
  179. switch r {
  180. case '.', 'e', 'E':
  181. return true
  182. }
  183. return false
  184. })
  185. for _, part := range parts {
  186. if !numUnderscoresOK(part) {
  187. p.panicf("Invalid float %q: underscores must be "+
  188. "surrounded by digits", it.val)
  189. }
  190. }
  191. if !numPeriodsOK(it.val) {
  192. // As a special case, numbers like '123.' or '1.e2',
  193. // which are valid as far as Go/strconv are concerned,
  194. // must be rejected because TOML says that a fractional
  195. // part consists of '.' followed by 1+ digits.
  196. p.panicf("Invalid float %q: '.' must be followed "+
  197. "by one or more digits", it.val)
  198. }
  199. val := strings.Replace(it.val, "_", "", -1)
  200. num, err := strconv.ParseFloat(val, 64)
  201. if err != nil {
  202. if e, ok := err.(*strconv.NumError); ok &&
  203. e.Err == strconv.ErrRange {
  204. p.panicf("Float '%s' is out of the range of 64-bit "+
  205. "IEEE-754 floating-point numbers.", it.val)
  206. } else {
  207. p.panicf("Invalid float value: %q", it.val)
  208. }
  209. }
  210. return num, p.typeOfPrimitive(it)
  211. case itemDatetime:
  212. var t time.Time
  213. var ok bool
  214. var err error
  215. for _, format := range []string{
  216. "2006-01-02T15:04:05Z07:00",
  217. "2006-01-02T15:04:05",
  218. "2006-01-02",
  219. } {
  220. t, err = time.ParseInLocation(format, it.val, time.Local)
  221. if err == nil {
  222. ok = true
  223. break
  224. }
  225. }
  226. if !ok {
  227. p.panicf("Invalid TOML Datetime: %q.", it.val)
  228. }
  229. return t, p.typeOfPrimitive(it)
  230. case itemArray:
  231. array := make([]interface{}, 0)
  232. types := make([]tomlType, 0)
  233. for it = p.next(); it.typ != itemArrayEnd; it = p.next() {
  234. if it.typ == itemCommentStart {
  235. p.expect(itemText)
  236. continue
  237. }
  238. val, typ := p.value(it)
  239. array = append(array, val)
  240. types = append(types, typ)
  241. }
  242. return array, p.typeOfArray(types)
  243. case itemInlineTableStart:
  244. var (
  245. hash = make(map[string]interface{})
  246. outerContext = p.context
  247. outerKey = p.currentKey
  248. )
  249. p.context = append(p.context, p.currentKey)
  250. p.currentKey = ""
  251. for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() {
  252. if it.typ != itemKeyStart {
  253. p.bug("Expected key start but instead found %q, around line %d",
  254. it.val, p.approxLine)
  255. }
  256. if it.typ == itemCommentStart {
  257. p.expect(itemText)
  258. continue
  259. }
  260. // retrieve key
  261. k := p.next()
  262. p.approxLine = k.line
  263. kname := p.keyString(k)
  264. // retrieve value
  265. p.currentKey = kname
  266. val, typ := p.value(p.next())
  267. // make sure we keep metadata up to date
  268. p.setType(kname, typ)
  269. p.ordered = append(p.ordered, p.context.add(p.currentKey))
  270. hash[kname] = val
  271. }
  272. p.context = outerContext
  273. p.currentKey = outerKey
  274. return hash, tomlHash
  275. }
  276. p.bug("Unexpected value type: %s", it.typ)
  277. panic("unreachable")
  278. }
  279. // numUnderscoresOK checks whether each underscore in s is surrounded by
  280. // characters that are not underscores.
  281. func numUnderscoresOK(s string) bool {
  282. accept := false
  283. for _, r := range s {
  284. if r == '_' {
  285. if !accept {
  286. return false
  287. }
  288. accept = false
  289. continue
  290. }
  291. accept = true
  292. }
  293. return accept
  294. }
  295. // numPeriodsOK checks whether every period in s is followed by a digit.
  296. func numPeriodsOK(s string) bool {
  297. period := false
  298. for _, r := range s {
  299. if period && !isDigit(r) {
  300. return false
  301. }
  302. period = r == '.'
  303. }
  304. return !period
  305. }
  306. // establishContext sets the current context of the parser,
  307. // where the context is either a hash or an array of hashes. Which one is
  308. // set depends on the value of the `array` parameter.
  309. //
  310. // Establishing the context also makes sure that the key isn't a duplicate, and
  311. // will create implicit hashes automatically.
  312. func (p *parser) establishContext(key Key, array bool) {
  313. var ok bool
  314. // Always start at the top level and drill down for our context.
  315. hashContext := p.mapping
  316. keyContext := make(Key, 0)
  317. // We only need implicit hashes for key[0:-1]
  318. for _, k := range key[0 : len(key)-1] {
  319. _, ok = hashContext[k]
  320. keyContext = append(keyContext, k)
  321. // No key? Make an implicit hash and move on.
  322. if !ok {
  323. p.addImplicit(keyContext)
  324. hashContext[k] = make(map[string]interface{})
  325. }
  326. // If the hash context is actually an array of tables, then set
  327. // the hash context to the last element in that array.
  328. //
  329. // Otherwise, it better be a table, since this MUST be a key group (by
  330. // virtue of it not being the last element in a key).
  331. switch t := hashContext[k].(type) {
  332. case []map[string]interface{}:
  333. hashContext = t[len(t)-1]
  334. case map[string]interface{}:
  335. hashContext = t
  336. default:
  337. p.panicf("Key '%s' was already created as a hash.", keyContext)
  338. }
  339. }
  340. p.context = keyContext
  341. if array {
  342. // If this is the first element for this array, then allocate a new
  343. // list of tables for it.
  344. k := key[len(key)-1]
  345. if _, ok := hashContext[k]; !ok {
  346. hashContext[k] = make([]map[string]interface{}, 0, 5)
  347. }
  348. // Add a new table. But make sure the key hasn't already been used
  349. // for something else.
  350. if hash, ok := hashContext[k].([]map[string]interface{}); ok {
  351. hashContext[k] = append(hash, make(map[string]interface{}))
  352. } else {
  353. p.panicf("Key '%s' was already created and cannot be used as "+
  354. "an array.", keyContext)
  355. }
  356. } else {
  357. p.setValue(key[len(key)-1], make(map[string]interface{}))
  358. }
  359. p.context = append(p.context, key[len(key)-1])
  360. }
  361. // setValue sets the given key to the given value in the current context.
  362. // It will make sure that the key hasn't already been defined, account for
  363. // implicit key groups.
  364. func (p *parser) setValue(key string, value interface{}) {
  365. var tmpHash interface{}
  366. var ok bool
  367. hash := p.mapping
  368. keyContext := make(Key, 0)
  369. for _, k := range p.context {
  370. keyContext = append(keyContext, k)
  371. if tmpHash, ok = hash[k]; !ok {
  372. p.bug("Context for key '%s' has not been established.", keyContext)
  373. }
  374. switch t := tmpHash.(type) {
  375. case []map[string]interface{}:
  376. // The context is a table of hashes. Pick the most recent table
  377. // defined as the current hash.
  378. hash = t[len(t)-1]
  379. case map[string]interface{}:
  380. hash = t
  381. default:
  382. p.bug("Expected hash to have type 'map[string]interface{}', but "+
  383. "it has '%T' instead.", tmpHash)
  384. }
  385. }
  386. keyContext = append(keyContext, key)
  387. if _, ok := hash[key]; ok {
  388. // Typically, if the given key has already been set, then we have
  389. // to raise an error since duplicate keys are disallowed. However,
  390. // it's possible that a key was previously defined implicitly. In this
  391. // case, it is allowed to be redefined concretely. (See the
  392. // `tests/valid/implicit-and-explicit-after.toml` test in `toml-test`.)
  393. //
  394. // But we have to make sure to stop marking it as an implicit. (So that
  395. // another redefinition provokes an error.)
  396. //
  397. // Note that since it has already been defined (as a hash), we don't
  398. // want to overwrite it. So our business is done.
  399. if p.isImplicit(keyContext) {
  400. p.removeImplicit(keyContext)
  401. return
  402. }
  403. // Otherwise, we have a concrete key trying to override a previous
  404. // key, which is *always* wrong.
  405. p.panicf("Key '%s' has already been defined.", keyContext)
  406. }
  407. hash[key] = value
  408. }
  409. // setType sets the type of a particular value at a given key.
  410. // It should be called immediately AFTER setValue.
  411. //
  412. // Note that if `key` is empty, then the type given will be applied to the
  413. // current context (which is either a table or an array of tables).
  414. func (p *parser) setType(key string, typ tomlType) {
  415. keyContext := make(Key, 0, len(p.context)+1)
  416. for _, k := range p.context {
  417. keyContext = append(keyContext, k)
  418. }
  419. if len(key) > 0 { // allow type setting for hashes
  420. keyContext = append(keyContext, key)
  421. }
  422. p.types[keyContext.String()] = typ
  423. }
  424. // addImplicit sets the given Key as having been created implicitly.
  425. func (p *parser) addImplicit(key Key) {
  426. p.implicits[key.String()] = true
  427. }
  428. // removeImplicit stops tagging the given key as having been implicitly
  429. // created.
  430. func (p *parser) removeImplicit(key Key) {
  431. p.implicits[key.String()] = false
  432. }
  433. // isImplicit returns true if the key group pointed to by the key was created
  434. // implicitly.
  435. func (p *parser) isImplicit(key Key) bool {
  436. return p.implicits[key.String()]
  437. }
  438. // current returns the full key name of the current context.
  439. func (p *parser) current() string {
  440. if len(p.currentKey) == 0 {
  441. return p.context.String()
  442. }
  443. if len(p.context) == 0 {
  444. return p.currentKey
  445. }
  446. return fmt.Sprintf("%s.%s", p.context, p.currentKey)
  447. }
  448. func stripFirstNewline(s string) string {
  449. if len(s) == 0 || s[0] != '\n' {
  450. return s
  451. }
  452. return s[1:]
  453. }
  454. func stripEscapedWhitespace(s string) string {
  455. esc := strings.Split(s, "\\\n")
  456. if len(esc) > 1 {
  457. for i := 1; i < len(esc); i++ {
  458. esc[i] = strings.TrimLeftFunc(esc[i], unicode.IsSpace)
  459. }
  460. }
  461. return strings.Join(esc, "")
  462. }
  463. func (p *parser) replaceEscapes(str string) string {
  464. var replaced []rune
  465. s := []byte(str)
  466. r := 0
  467. for r < len(s) {
  468. if s[r] != '\\' {
  469. c, size := utf8.DecodeRune(s[r:])
  470. r += size
  471. replaced = append(replaced, c)
  472. continue
  473. }
  474. r += 1
  475. if r >= len(s) {
  476. p.bug("Escape sequence at end of string.")
  477. return ""
  478. }
  479. switch s[r] {
  480. default:
  481. p.bug("Expected valid escape code after \\, but got %q.", s[r])
  482. return ""
  483. case 'b':
  484. replaced = append(replaced, rune(0x0008))
  485. r += 1
  486. case 't':
  487. replaced = append(replaced, rune(0x0009))
  488. r += 1
  489. case 'n':
  490. replaced = append(replaced, rune(0x000A))
  491. r += 1
  492. case 'f':
  493. replaced = append(replaced, rune(0x000C))
  494. r += 1
  495. case 'r':
  496. replaced = append(replaced, rune(0x000D))
  497. r += 1
  498. case '"':
  499. replaced = append(replaced, rune(0x0022))
  500. r += 1
  501. case '\\':
  502. replaced = append(replaced, rune(0x005C))
  503. r += 1
  504. case 'u':
  505. // At this point, we know we have a Unicode escape of the form
  506. // `uXXXX` at [r, r+5). (Because the lexer guarantees this
  507. // for us.)
  508. escaped := p.asciiEscapeToUnicode(s[r+1 : r+5])
  509. replaced = append(replaced, escaped)
  510. r += 5
  511. case 'U':
  512. // At this point, we know we have a Unicode escape of the form
  513. // `uXXXX` at [r, r+9). (Because the lexer guarantees this
  514. // for us.)
  515. escaped := p.asciiEscapeToUnicode(s[r+1 : r+9])
  516. replaced = append(replaced, escaped)
  517. r += 9
  518. }
  519. }
  520. return string(replaced)
  521. }
  522. func (p *parser) asciiEscapeToUnicode(bs []byte) rune {
  523. s := string(bs)
  524. hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32)
  525. if err != nil {
  526. p.bug("Could not parse '%s' as a hexadecimal number, but the "+
  527. "lexer claims it's OK: %s", s, err)
  528. }
  529. if !utf8.ValidRune(rune(hex)) {
  530. p.panicf("Escaped character '\\u%s' is not valid UTF-8.", s)
  531. }
  532. return rune(hex)
  533. }
  534. func isStringType(ty itemType) bool {
  535. return ty == itemString || ty == itemMultilineString ||
  536. ty == itemRawString || ty == itemRawMultilineString
  537. }