bplist_parser.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353
  1. package plist
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "io/ioutil"
  9. "math"
  10. "runtime"
  11. "time"
  12. "unicode/utf16"
  13. )
  14. const (
  15. signedHighBits = 0xFFFFFFFFFFFFFFFF
  16. )
  17. type offset uint64
  18. type bplistParser struct {
  19. buffer []byte
  20. reader io.ReadSeeker
  21. version int
  22. objects []cfValue // object ID to object
  23. trailer bplistTrailer
  24. trailerOffset uint64
  25. containerStack []offset // slice of object offsets; manipulated during container deserialization
  26. }
  27. func (p *bplistParser) validateDocumentTrailer() {
  28. if p.trailer.OffsetTableOffset >= p.trailerOffset {
  29. panic(fmt.Errorf("offset table beyond beginning of trailer (0x%x, trailer@0x%x)", p.trailer.OffsetTableOffset, p.trailerOffset))
  30. }
  31. if p.trailer.OffsetTableOffset < 9 {
  32. panic(fmt.Errorf("offset table begins inside header (0x%x)", p.trailer.OffsetTableOffset))
  33. }
  34. if p.trailerOffset > (p.trailer.NumObjects*uint64(p.trailer.OffsetIntSize))+p.trailer.OffsetTableOffset {
  35. panic(errors.New("garbage between offset table and trailer"))
  36. }
  37. if p.trailer.OffsetTableOffset+(uint64(p.trailer.OffsetIntSize)*p.trailer.NumObjects) > p.trailerOffset {
  38. panic(errors.New("offset table isn't long enough to address every object"))
  39. }
  40. maxObjectRef := uint64(1) << (8 * p.trailer.ObjectRefSize)
  41. if p.trailer.NumObjects > maxObjectRef {
  42. panic(fmt.Errorf("more objects (%v) than object ref size (%v bytes) can support", p.trailer.NumObjects, p.trailer.ObjectRefSize))
  43. }
  44. if p.trailer.OffsetIntSize < uint8(8) && (uint64(1)<<(8*p.trailer.OffsetIntSize)) <= p.trailer.OffsetTableOffset {
  45. panic(errors.New("offset size isn't big enough to address entire file"))
  46. }
  47. if p.trailer.TopObject >= p.trailer.NumObjects {
  48. panic(fmt.Errorf("top object #%d is out of range (only %d exist)", p.trailer.TopObject, p.trailer.NumObjects))
  49. }
  50. }
  51. func (p *bplistParser) parseDocument() (pval cfValue, parseError error) {
  52. defer func() {
  53. if r := recover(); r != nil {
  54. if _, ok := r.(runtime.Error); ok {
  55. panic(r)
  56. }
  57. parseError = plistParseError{"binary", r.(error)}
  58. }
  59. }()
  60. p.buffer, _ = ioutil.ReadAll(p.reader)
  61. l := len(p.buffer)
  62. if l < 40 {
  63. panic(errors.New("not enough data"))
  64. }
  65. if !bytes.Equal(p.buffer[0:6], []byte{'b', 'p', 'l', 'i', 's', 't'}) {
  66. panic(errors.New("incomprehensible magic"))
  67. }
  68. p.version = int(((p.buffer[6] - '0') * 10) + (p.buffer[7] - '0'))
  69. if p.version > 1 {
  70. panic(fmt.Errorf("unexpected version %d", p.version))
  71. }
  72. p.trailerOffset = uint64(l - 32)
  73. p.trailer = bplistTrailer{
  74. SortVersion: p.buffer[p.trailerOffset+5],
  75. OffsetIntSize: p.buffer[p.trailerOffset+6],
  76. ObjectRefSize: p.buffer[p.trailerOffset+7],
  77. NumObjects: binary.BigEndian.Uint64(p.buffer[p.trailerOffset+8:]),
  78. TopObject: binary.BigEndian.Uint64(p.buffer[p.trailerOffset+16:]),
  79. OffsetTableOffset: binary.BigEndian.Uint64(p.buffer[p.trailerOffset+24:]),
  80. }
  81. p.validateDocumentTrailer()
  82. // INVARIANTS:
  83. // - Entire offset table is before trailer
  84. // - Offset table begins after header
  85. // - Offset table can address entire document
  86. // - Object IDs are big enough to support the number of objects in this plist
  87. // - Top object is in range
  88. p.objects = make([]cfValue, p.trailer.NumObjects)
  89. pval = p.objectAtIndex(p.trailer.TopObject)
  90. return
  91. }
  92. // parseSizedInteger returns a 128-bit integer as low64, high64
  93. func (p *bplistParser) parseSizedInteger(off offset, nbytes int) (lo uint64, hi uint64, newOffset offset) {
  94. // Per comments in CoreFoundation, format version 00 requires that all
  95. // 1, 2 or 4-byte integers be interpreted as unsigned. 8-byte integers are
  96. // signed (always?) and therefore must be sign extended here.
  97. // negative 1, 2, or 4-byte integers are always emitted as 64-bit.
  98. switch nbytes {
  99. case 1:
  100. lo, hi = uint64(p.buffer[off]), 0
  101. case 2:
  102. lo, hi = uint64(binary.BigEndian.Uint16(p.buffer[off:])), 0
  103. case 4:
  104. lo, hi = uint64(binary.BigEndian.Uint32(p.buffer[off:])), 0
  105. case 8:
  106. lo = binary.BigEndian.Uint64(p.buffer[off:])
  107. if p.buffer[off]&0x80 != 0 {
  108. // sign extend if lo is signed
  109. hi = signedHighBits
  110. }
  111. case 16:
  112. lo, hi = binary.BigEndian.Uint64(p.buffer[off+8:]), binary.BigEndian.Uint64(p.buffer[off:])
  113. default:
  114. panic(errors.New("illegal integer size"))
  115. }
  116. newOffset = off + offset(nbytes)
  117. return
  118. }
  119. func (p *bplistParser) parseObjectRefAtOffset(off offset) (uint64, offset) {
  120. oid, _, next := p.parseSizedInteger(off, int(p.trailer.ObjectRefSize))
  121. return oid, next
  122. }
  123. func (p *bplistParser) parseOffsetAtOffset(off offset) (offset, offset) {
  124. parsedOffset, _, next := p.parseSizedInteger(off, int(p.trailer.OffsetIntSize))
  125. return offset(parsedOffset), next
  126. }
  127. func (p *bplistParser) objectAtIndex(index uint64) cfValue {
  128. if index >= p.trailer.NumObjects {
  129. panic(fmt.Errorf("invalid object#%d (max %d)", index, p.trailer.NumObjects))
  130. }
  131. if pval := p.objects[index]; pval != nil {
  132. return pval
  133. }
  134. off, _ := p.parseOffsetAtOffset(offset(p.trailer.OffsetTableOffset + (index * uint64(p.trailer.OffsetIntSize))))
  135. if off > offset(p.trailer.OffsetTableOffset-1) {
  136. panic(fmt.Errorf("object#%d starts beyond beginning of object table (0x%x, table@0x%x)", index, off, p.trailer.OffsetTableOffset))
  137. }
  138. pval := p.parseTagAtOffset(off)
  139. p.objects[index] = pval
  140. return pval
  141. }
  142. func (p *bplistParser) pushNestedObject(off offset) {
  143. for _, v := range p.containerStack {
  144. if v == off {
  145. p.panicNestedObject(off)
  146. }
  147. }
  148. p.containerStack = append(p.containerStack, off)
  149. }
  150. func (p *bplistParser) panicNestedObject(off offset) {
  151. ids := ""
  152. for _, v := range p.containerStack {
  153. ids += fmt.Sprintf("0x%x > ", v)
  154. }
  155. // %s0x%d: ids above ends with " > "
  156. panic(fmt.Errorf("self-referential collection@0x%x (%s0x%x) cannot be deserialized", off, ids, off))
  157. }
  158. func (p *bplistParser) popNestedObject() {
  159. p.containerStack = p.containerStack[:len(p.containerStack)-1]
  160. }
  161. func (p *bplistParser) parseTagAtOffset(off offset) cfValue {
  162. tag := p.buffer[off]
  163. switch tag & 0xF0 {
  164. case bpTagNull:
  165. switch tag & 0x0F {
  166. case bpTagBoolTrue, bpTagBoolFalse:
  167. return cfBoolean(tag == bpTagBoolTrue)
  168. }
  169. case bpTagInteger:
  170. lo, hi, _ := p.parseIntegerAtOffset(off)
  171. return &cfNumber{
  172. signed: hi == signedHighBits, // a signed integer is stored as a 128-bit integer with the top 64 bits set
  173. value: lo,
  174. }
  175. case bpTagReal:
  176. nbytes := 1 << (tag & 0x0F)
  177. switch nbytes {
  178. case 4:
  179. bits := binary.BigEndian.Uint32(p.buffer[off+1:])
  180. return &cfReal{wide: false, value: float64(math.Float32frombits(bits))}
  181. case 8:
  182. bits := binary.BigEndian.Uint64(p.buffer[off+1:])
  183. return &cfReal{wide: true, value: math.Float64frombits(bits)}
  184. }
  185. panic(errors.New("illegal float size"))
  186. case bpTagDate:
  187. bits := binary.BigEndian.Uint64(p.buffer[off+1:])
  188. val := math.Float64frombits(bits)
  189. // Apple Epoch is 20110101000000Z
  190. // Adjust for UNIX Time
  191. val += 978307200
  192. sec, fsec := math.Modf(val)
  193. time := time.Unix(int64(sec), int64(fsec*float64(time.Second))).In(time.UTC)
  194. return cfDate(time)
  195. case bpTagData:
  196. data := p.parseDataAtOffset(off)
  197. return cfData(data)
  198. case bpTagASCIIString:
  199. str := p.parseASCIIStringAtOffset(off)
  200. return cfString(str)
  201. case bpTagUTF16String:
  202. str := p.parseUTF16StringAtOffset(off)
  203. return cfString(str)
  204. case bpTagUID: // Somehow different than int: low half is nbytes - 1 instead of log2(nbytes)
  205. lo, _, _ := p.parseSizedInteger(off+1, int(tag&0xF)+1)
  206. return cfUID(lo)
  207. case bpTagDictionary:
  208. return p.parseDictionaryAtOffset(off)
  209. case bpTagArray:
  210. return p.parseArrayAtOffset(off)
  211. }
  212. panic(fmt.Errorf("unexpected atom 0x%2.02x at offset 0x%x", tag, off))
  213. }
  214. func (p *bplistParser) parseIntegerAtOffset(off offset) (uint64, uint64, offset) {
  215. tag := p.buffer[off]
  216. return p.parseSizedInteger(off+1, 1<<(tag&0xF))
  217. }
  218. func (p *bplistParser) countForTagAtOffset(off offset) (uint64, offset) {
  219. tag := p.buffer[off]
  220. cnt := uint64(tag & 0x0F)
  221. if cnt == 0xF {
  222. cnt, _, off = p.parseIntegerAtOffset(off + 1)
  223. return cnt, off
  224. }
  225. return cnt, off + 1
  226. }
  227. func (p *bplistParser) parseDataAtOffset(off offset) []byte {
  228. len, start := p.countForTagAtOffset(off)
  229. if start+offset(len) > offset(p.trailer.OffsetTableOffset) {
  230. panic(fmt.Errorf("data@0x%x too long (%v bytes, max is %v)", off, len, p.trailer.OffsetTableOffset-uint64(start)))
  231. }
  232. return p.buffer[start : start+offset(len)]
  233. }
  234. func (p *bplistParser) parseASCIIStringAtOffset(off offset) string {
  235. len, start := p.countForTagAtOffset(off)
  236. if start+offset(len) > offset(p.trailer.OffsetTableOffset) {
  237. panic(fmt.Errorf("ascii string@0x%x too long (%v bytes, max is %v)", off, len, p.trailer.OffsetTableOffset-uint64(start)))
  238. }
  239. return zeroCopy8BitString(p.buffer, int(start), int(len))
  240. }
  241. func (p *bplistParser) parseUTF16StringAtOffset(off offset) string {
  242. len, start := p.countForTagAtOffset(off)
  243. bytes := len * 2
  244. if start+offset(bytes) > offset(p.trailer.OffsetTableOffset) {
  245. panic(fmt.Errorf("utf16 string@0x%x too long (%v bytes, max is %v)", off, bytes, p.trailer.OffsetTableOffset-uint64(start)))
  246. }
  247. u16s := make([]uint16, len)
  248. for i := offset(0); i < offset(len); i++ {
  249. u16s[i] = binary.BigEndian.Uint16(p.buffer[start+(i*2):])
  250. }
  251. runes := utf16.Decode(u16s)
  252. return string(runes)
  253. }
  254. func (p *bplistParser) parseObjectListAtOffset(off offset, count uint64) []cfValue {
  255. if off+offset(count*uint64(p.trailer.ObjectRefSize)) > offset(p.trailer.OffsetTableOffset) {
  256. panic(fmt.Errorf("list@0x%x length (%v) puts its end beyond the offset table at 0x%x", off, count, p.trailer.OffsetTableOffset))
  257. }
  258. objects := make([]cfValue, count)
  259. next := off
  260. var oid uint64
  261. for i := uint64(0); i < count; i++ {
  262. oid, next = p.parseObjectRefAtOffset(next)
  263. objects[i] = p.objectAtIndex(oid)
  264. }
  265. return objects
  266. }
  267. func (p *bplistParser) parseDictionaryAtOffset(off offset) *cfDictionary {
  268. p.pushNestedObject(off)
  269. defer p.popNestedObject()
  270. // a dictionary is an object list of [key key key val val val]
  271. cnt, start := p.countForTagAtOffset(off)
  272. objects := p.parseObjectListAtOffset(start, cnt*2)
  273. keys := make([]string, cnt)
  274. for i := uint64(0); i < cnt; i++ {
  275. if str, ok := objects[i].(cfString); ok {
  276. keys[i] = string(str)
  277. } else {
  278. panic(fmt.Errorf("dictionary@0x%x contains non-string key at index %d", off, i))
  279. }
  280. }
  281. return &cfDictionary{
  282. keys: keys,
  283. values: objects[cnt:],
  284. }
  285. }
  286. func (p *bplistParser) parseArrayAtOffset(off offset) *cfArray {
  287. p.pushNestedObject(off)
  288. defer p.popNestedObject()
  289. // an array is just an object list
  290. cnt, start := p.countForTagAtOffset(off)
  291. return &cfArray{p.parseObjectListAtOffset(start, cnt)}
  292. }
  293. func newBplistParser(r io.ReadSeeker) *bplistParser {
  294. return &bplistParser{reader: r}
  295. }