text_parser.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
  1. package plist
  2. import (
  3. "encoding/binary"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "io/ioutil"
  8. "runtime"
  9. "strings"
  10. "time"
  11. "unicode/utf16"
  12. "unicode/utf8"
  13. )
  14. type textPlistParser struct {
  15. reader io.Reader
  16. format int
  17. input string
  18. start int
  19. pos int
  20. width int
  21. }
  22. func convertU16(buffer []byte, bo binary.ByteOrder) (string, error) {
  23. if len(buffer)%2 != 0 {
  24. return "", errors.New("truncated utf16")
  25. }
  26. tmp := make([]uint16, len(buffer)/2)
  27. for i := 0; i < len(buffer); i += 2 {
  28. tmp[i/2] = bo.Uint16(buffer[i : i+2])
  29. }
  30. return string(utf16.Decode(tmp)), nil
  31. }
  32. func guessEncodingAndConvert(buffer []byte) (string, error) {
  33. if len(buffer) >= 3 && buffer[0] == 0xEF && buffer[1] == 0xBB && buffer[2] == 0xBF {
  34. // UTF-8 BOM
  35. return zeroCopy8BitString(buffer, 3, len(buffer)-3), nil
  36. } else if len(buffer) >= 2 {
  37. // UTF-16 guesses
  38. switch {
  39. // stream is big-endian (BOM is FE FF or head is 00 XX)
  40. case (buffer[0] == 0xFE && buffer[1] == 0xFF):
  41. return convertU16(buffer[2:], binary.BigEndian)
  42. case (buffer[0] == 0 && buffer[1] != 0):
  43. return convertU16(buffer, binary.BigEndian)
  44. // stream is little-endian (BOM is FE FF or head is XX 00)
  45. case (buffer[0] == 0xFF && buffer[1] == 0xFE):
  46. return convertU16(buffer[2:], binary.LittleEndian)
  47. case (buffer[0] != 0 && buffer[1] == 0):
  48. return convertU16(buffer, binary.LittleEndian)
  49. }
  50. }
  51. // fallback: assume ASCII (not great!)
  52. return zeroCopy8BitString(buffer, 0, len(buffer)), nil
  53. }
  54. func (p *textPlistParser) parseDocument() (pval cfValue, parseError error) {
  55. defer func() {
  56. if r := recover(); r != nil {
  57. if _, ok := r.(runtime.Error); ok {
  58. panic(r)
  59. }
  60. // Wrap all non-invalid-plist errors.
  61. parseError = plistParseError{"text", r.(error)}
  62. }
  63. }()
  64. buffer, err := ioutil.ReadAll(p.reader)
  65. if err != nil {
  66. panic(err)
  67. }
  68. p.input, err = guessEncodingAndConvert(buffer)
  69. if err != nil {
  70. panic(err)
  71. }
  72. val := p.parsePlistValue()
  73. p.skipWhitespaceAndComments()
  74. if p.peek() != eof {
  75. if _, ok := val.(cfString); !ok {
  76. p.error("garbage after end of document")
  77. }
  78. p.start = 0
  79. p.pos = 0
  80. val = p.parseDictionary(true)
  81. }
  82. pval = val
  83. return
  84. }
  85. const eof rune = -1
  86. func (p *textPlistParser) error(e string, args ...interface{}) {
  87. line := strings.Count(p.input[:p.pos], "\n")
  88. char := p.pos - strings.LastIndex(p.input[:p.pos], "\n") - 1
  89. panic(fmt.Errorf("%s at line %d character %d", fmt.Sprintf(e, args...), line, char))
  90. }
  91. func (p *textPlistParser) next() rune {
  92. if int(p.pos) >= len(p.input) {
  93. p.width = 0
  94. return eof
  95. }
  96. r, w := utf8.DecodeRuneInString(p.input[p.pos:])
  97. p.width = w
  98. p.pos += p.width
  99. return r
  100. }
  101. func (p *textPlistParser) backup() {
  102. p.pos -= p.width
  103. }
  104. func (p *textPlistParser) peek() rune {
  105. r := p.next()
  106. p.backup()
  107. return r
  108. }
  109. func (p *textPlistParser) emit() string {
  110. s := p.input[p.start:p.pos]
  111. p.start = p.pos
  112. return s
  113. }
  114. func (p *textPlistParser) ignore() {
  115. p.start = p.pos
  116. }
  117. func (p *textPlistParser) empty() bool {
  118. return p.start == p.pos
  119. }
  120. func (p *textPlistParser) scanUntil(ch rune) {
  121. if x := strings.IndexRune(p.input[p.pos:], ch); x >= 0 {
  122. p.pos += x
  123. return
  124. }
  125. p.pos = len(p.input)
  126. }
  127. func (p *textPlistParser) scanUntilAny(chs string) {
  128. if x := strings.IndexAny(p.input[p.pos:], chs); x >= 0 {
  129. p.pos += x
  130. return
  131. }
  132. p.pos = len(p.input)
  133. }
  134. func (p *textPlistParser) scanCharactersInSet(ch *characterSet) {
  135. for ch.Contains(p.next()) {
  136. }
  137. p.backup()
  138. }
  139. func (p *textPlistParser) scanCharactersNotInSet(ch *characterSet) {
  140. var r rune
  141. for {
  142. r = p.next()
  143. if r == eof || ch.Contains(r) {
  144. break
  145. }
  146. }
  147. p.backup()
  148. }
  149. func (p *textPlistParser) skipWhitespaceAndComments() {
  150. for {
  151. p.scanCharactersInSet(&whitespace)
  152. if strings.HasPrefix(p.input[p.pos:], "//") {
  153. p.scanCharactersNotInSet(&newlineCharacterSet)
  154. } else if strings.HasPrefix(p.input[p.pos:], "/*") {
  155. if x := strings.Index(p.input[p.pos:], "*/"); x >= 0 {
  156. p.pos += x + 2 // skip the */ as well
  157. continue // consume more whitespace
  158. } else {
  159. p.error("unexpected eof in block comment")
  160. }
  161. } else {
  162. break
  163. }
  164. }
  165. p.ignore()
  166. }
  167. func (p *textPlistParser) parseOctalDigits(max int) uint64 {
  168. var val uint64
  169. for i := 0; i < max; i++ {
  170. r := p.next()
  171. if r >= '0' && r <= '7' {
  172. val <<= 3
  173. val |= uint64((r - '0'))
  174. } else {
  175. p.backup()
  176. break
  177. }
  178. }
  179. return val
  180. }
  181. func (p *textPlistParser) parseHexDigits(max int) uint64 {
  182. var val uint64
  183. for i := 0; i < max; i++ {
  184. r := p.next()
  185. if r >= 'a' && r <= 'f' {
  186. val <<= 4
  187. val |= 10 + uint64((r - 'a'))
  188. } else if r >= 'A' && r <= 'F' {
  189. val <<= 4
  190. val |= 10 + uint64((r - 'A'))
  191. } else if r >= '0' && r <= '9' {
  192. val <<= 4
  193. val |= uint64((r - '0'))
  194. } else {
  195. p.backup()
  196. break
  197. }
  198. }
  199. return val
  200. }
  201. // the \ has already been consumed
  202. func (p *textPlistParser) parseEscape() string {
  203. var s string
  204. switch p.next() {
  205. case 'a':
  206. s = "\a"
  207. case 'b':
  208. s = "\b"
  209. case 'v':
  210. s = "\v"
  211. case 'f':
  212. s = "\f"
  213. case 't':
  214. s = "\t"
  215. case 'r':
  216. s = "\r"
  217. case 'n':
  218. s = "\n"
  219. case '\\':
  220. s = `\`
  221. case '"':
  222. s = `"`
  223. case 'x':
  224. s = string(rune(p.parseHexDigits(2)))
  225. case 'u', 'U':
  226. s = string(rune(p.parseHexDigits(4)))
  227. case '0', '1', '2', '3', '4', '5', '6', '7':
  228. p.backup() // we've already consumed one of the digits
  229. s = string(rune(p.parseOctalDigits(3)))
  230. default:
  231. p.backup() // everything else should be accepted
  232. }
  233. p.ignore() // skip the entire escape sequence
  234. return s
  235. }
  236. // the " has already been consumed
  237. func (p *textPlistParser) parseQuotedString() cfString {
  238. p.ignore() // ignore the "
  239. slowPath := false
  240. s := ""
  241. for {
  242. p.scanUntilAny(`"\`)
  243. switch p.peek() {
  244. case eof:
  245. p.error("unexpected eof in quoted string")
  246. case '"':
  247. section := p.emit()
  248. p.pos++ // skip "
  249. if !slowPath {
  250. return cfString(section)
  251. }
  252. s += section
  253. return cfString(s)
  254. case '\\':
  255. slowPath = true
  256. s += p.emit()
  257. p.next() // consume \
  258. s += p.parseEscape()
  259. }
  260. }
  261. }
  262. func (p *textPlistParser) parseUnquotedString() cfString {
  263. p.scanCharactersNotInSet(&gsQuotable)
  264. s := p.emit()
  265. if s == "" {
  266. p.error("invalid unquoted string (found an unquoted character that should be quoted?)")
  267. }
  268. return cfString(s)
  269. }
  270. // the { has already been consumed
  271. func (p *textPlistParser) parseDictionary(ignoreEOF bool) *cfDictionary {
  272. //p.ignore() // ignore the {
  273. var keypv cfValue
  274. keys := make([]string, 0, 32)
  275. values := make([]cfValue, 0, 32)
  276. outer:
  277. for {
  278. p.skipWhitespaceAndComments()
  279. switch p.next() {
  280. case eof:
  281. if !ignoreEOF {
  282. p.error("unexpected eof in dictionary")
  283. }
  284. fallthrough
  285. case '}':
  286. break outer
  287. case '"':
  288. keypv = p.parseQuotedString()
  289. default:
  290. p.backup()
  291. keypv = p.parseUnquotedString()
  292. }
  293. // INVARIANT: key can't be nil; parseQuoted and parseUnquoted
  294. // will panic out before they return nil.
  295. p.skipWhitespaceAndComments()
  296. var val cfValue
  297. n := p.next()
  298. if n == ';' {
  299. val = keypv
  300. } else if n == '=' {
  301. // whitespace is consumed within
  302. val = p.parsePlistValue()
  303. p.skipWhitespaceAndComments()
  304. if p.next() != ';' {
  305. p.error("missing ; in dictionary")
  306. }
  307. } else {
  308. p.error("missing = in dictionary")
  309. }
  310. keys = append(keys, string(keypv.(cfString)))
  311. values = append(values, val)
  312. }
  313. return &cfDictionary{keys: keys, values: values}
  314. }
  315. // the ( has already been consumed
  316. func (p *textPlistParser) parseArray() *cfArray {
  317. //p.ignore() // ignore the (
  318. values := make([]cfValue, 0, 32)
  319. outer:
  320. for {
  321. p.skipWhitespaceAndComments()
  322. switch p.next() {
  323. case eof:
  324. p.error("unexpected eof in array")
  325. case ')':
  326. break outer // done here
  327. case ',':
  328. continue // restart; ,) is valid and we don't want to blow it
  329. default:
  330. p.backup()
  331. }
  332. pval := p.parsePlistValue() // whitespace is consumed within
  333. if str, ok := pval.(cfString); ok && string(str) == "" {
  334. // Empty strings in arrays are apparently skipped?
  335. // TODO: Figure out why this was implemented.
  336. continue
  337. }
  338. values = append(values, pval)
  339. }
  340. return &cfArray{values}
  341. }
  342. // the <* have already been consumed
  343. func (p *textPlistParser) parseGNUStepValue() cfValue {
  344. typ := p.next()
  345. p.ignore()
  346. p.scanUntil('>')
  347. if typ == eof || typ == '>' || p.empty() || p.peek() == eof {
  348. p.error("invalid GNUStep extended value")
  349. }
  350. v := p.emit()
  351. p.next() // consume the >
  352. switch typ {
  353. case 'I':
  354. if v[0] == '-' {
  355. n := mustParseInt(v, 10, 64)
  356. return &cfNumber{signed: true, value: uint64(n)}
  357. }
  358. n := mustParseUint(v, 10, 64)
  359. return &cfNumber{signed: false, value: n}
  360. case 'R':
  361. n := mustParseFloat(v, 64)
  362. return &cfReal{wide: true, value: n} // TODO(DH) 32/64
  363. case 'B':
  364. b := v[0] == 'Y'
  365. return cfBoolean(b)
  366. case 'D':
  367. t, err := time.Parse(textPlistTimeLayout, v)
  368. if err != nil {
  369. p.error(err.Error())
  370. }
  371. return cfDate(t.In(time.UTC))
  372. }
  373. p.error("invalid GNUStep type " + string(typ))
  374. return nil
  375. }
  376. // The < has already been consumed
  377. func (p *textPlistParser) parseHexData() cfData {
  378. buf := make([]byte, 256)
  379. i := 0
  380. c := 0
  381. for {
  382. r := p.next()
  383. switch r {
  384. case eof:
  385. p.error("unexpected eof in data")
  386. case '>':
  387. if c&1 == 1 {
  388. p.error("uneven number of hex digits in data")
  389. }
  390. p.ignore()
  391. return cfData(buf[:i])
  392. case ' ', '\t', '\n', '\r', '\u2028', '\u2029': // more lax than apple here: skip spaces
  393. continue
  394. }
  395. buf[i] <<= 4
  396. if r >= 'a' && r <= 'f' {
  397. buf[i] |= 10 + byte((r - 'a'))
  398. } else if r >= 'A' && r <= 'F' {
  399. buf[i] |= 10 + byte((r - 'A'))
  400. } else if r >= '0' && r <= '9' {
  401. buf[i] |= byte((r - '0'))
  402. } else {
  403. p.error("unexpected hex digit `%c'", r)
  404. }
  405. c++
  406. if c&1 == 0 {
  407. i++
  408. if i >= len(buf) {
  409. realloc := make([]byte, len(buf)*2)
  410. copy(realloc, buf)
  411. buf = realloc
  412. }
  413. }
  414. }
  415. }
  416. func (p *textPlistParser) parsePlistValue() cfValue {
  417. for {
  418. p.skipWhitespaceAndComments()
  419. switch p.next() {
  420. case eof:
  421. return &cfDictionary{}
  422. case '<':
  423. if p.next() == '*' {
  424. p.format = GNUStepFormat
  425. return p.parseGNUStepValue()
  426. }
  427. p.backup()
  428. return p.parseHexData()
  429. case '"':
  430. return p.parseQuotedString()
  431. case '{':
  432. return p.parseDictionary(false)
  433. case '(':
  434. return p.parseArray()
  435. default:
  436. p.backup()
  437. return p.parseUnquotedString()
  438. }
  439. }
  440. }
  441. func newTextPlistParser(r io.Reader) *textPlistParser {
  442. return &textPlistParser{
  443. reader: r,
  444. format: OpenStepFormat,
  445. }
  446. }