da.go 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. package stringutil
  2. import (
  3. "fmt"
  4. "sort"
  5. "unicode/utf8"
  6. )
  7. const (
  8. terminationCharacter = '#'
  9. )
  10. func mustDoubleArray(da *doubleArray, err error) *doubleArray {
  11. if err != nil {
  12. panic(err)
  13. }
  14. return da
  15. }
  16. func (da *doubleArray) Build(keys []string) error {
  17. records := makeRecords(keys)
  18. if err := da.build(records, 1, 0, make(map[int]struct{})); err != nil {
  19. return err
  20. }
  21. return nil
  22. }
  23. type doubleArray struct {
  24. bc []baseCheck
  25. node []int
  26. }
  27. func newDoubleArray(keys []string) (*doubleArray, error) {
  28. da := &doubleArray{
  29. bc: []baseCheck{0},
  30. node: []int{-1}, // A start index is adjusting to 1 because 0 will be used as a mark of non-existent node.
  31. }
  32. if err := da.Build(keys); err != nil {
  33. return nil, err
  34. }
  35. return da, nil
  36. }
  37. // baseCheck contains BASE, CHECK and Extra flags.
  38. // From the top, 22bits of BASE, 2bits of Extra flags and 8bits of CHECK.
  39. //
  40. // BASE (22bit) | Extra flags (2bit) | CHECK (8bit)
  41. // |----------------------|--|--------|
  42. // 32 10 8 0
  43. type baseCheck uint32
  44. func (bc baseCheck) Base() int {
  45. return int(bc >> 10)
  46. }
  47. func (bc *baseCheck) SetBase(base int) {
  48. *bc |= baseCheck(base) << 10
  49. }
  50. func (bc baseCheck) Check() byte {
  51. return byte(bc)
  52. }
  53. func (bc *baseCheck) SetCheck(check byte) {
  54. *bc |= baseCheck(check)
  55. }
  56. func (bc baseCheck) IsEmpty() bool {
  57. return bc&0xfffffcff == 0
  58. }
  59. func (da *doubleArray) Lookup(path string) (length int) {
  60. idx := 1
  61. tmpIdx := idx
  62. for i := 0; i < len(path); i++ {
  63. c := path[i]
  64. tmpIdx = da.nextIndex(da.bc[tmpIdx].Base(), c)
  65. if tmpIdx >= len(da.bc) || da.bc[tmpIdx].Check() != c {
  66. break
  67. }
  68. idx = tmpIdx
  69. }
  70. if next := da.nextIndex(da.bc[idx].Base(), terminationCharacter); next < len(da.bc) && da.bc[next].Check() == terminationCharacter {
  71. return da.node[da.bc[next].Base()]
  72. }
  73. return -1
  74. }
  75. func (da *doubleArray) LookupByBytes(path []byte) (length int) {
  76. idx := 1
  77. tmpIdx := idx
  78. for i := 0; i < len(path); i++ {
  79. c := path[i]
  80. tmpIdx = da.nextIndex(da.bc[tmpIdx].Base(), c)
  81. if tmpIdx >= len(da.bc) || da.bc[tmpIdx].Check() != c {
  82. break
  83. }
  84. idx = tmpIdx
  85. }
  86. if next := da.nextIndex(da.bc[idx].Base(), terminationCharacter); next < len(da.bc) && da.bc[next].Check() == terminationCharacter {
  87. return da.node[da.bc[next].Base()]
  88. }
  89. return -1
  90. }
  91. func (da *doubleArray) build(srcs []record, idx, depth int, usedBase map[int]struct{}) error {
  92. sort.Stable(recordSlice(srcs))
  93. base, siblings, leaf, err := da.arrange(srcs, idx, depth, usedBase)
  94. if err != nil {
  95. return err
  96. }
  97. if leaf != nil {
  98. da.bc[idx].SetBase(len(da.node))
  99. da.node = append(da.node, leaf.value)
  100. }
  101. for _, sib := range siblings {
  102. da.setCheck(da.nextIndex(base, sib.c), sib.c)
  103. }
  104. for _, sib := range siblings {
  105. if err := da.build(srcs[sib.start:sib.end], da.nextIndex(base, sib.c), depth+1, usedBase); err != nil {
  106. return err
  107. }
  108. }
  109. return nil
  110. }
  111. func (da *doubleArray) setBase(i, base int) {
  112. da.bc[i].SetBase(base)
  113. }
  114. func (da *doubleArray) setCheck(i int, check byte) {
  115. da.bc[i].SetCheck(check)
  116. }
  117. func (da *doubleArray) findEmptyIndex(start int) int {
  118. i := start
  119. for ; i < len(da.bc); i++ {
  120. if da.bc[i].IsEmpty() {
  121. break
  122. }
  123. }
  124. return i
  125. }
  126. // findBase returns good BASE.
  127. func (da *doubleArray) findBase(siblings []sibling, start int, usedBase map[int]struct{}) (base int) {
  128. for idx, firstChar := start+1, siblings[0].c; ; idx = da.findEmptyIndex(idx + 1) {
  129. base = da.nextIndex(idx, firstChar)
  130. if _, used := usedBase[base]; used {
  131. continue
  132. }
  133. i := 0
  134. for ; i < len(siblings); i++ {
  135. next := da.nextIndex(base, siblings[i].c)
  136. if len(da.bc) <= next {
  137. da.bc = append(da.bc, make([]baseCheck, next-len(da.bc)+1)...)
  138. }
  139. if !da.bc[next].IsEmpty() {
  140. break
  141. }
  142. }
  143. if i == len(siblings) {
  144. break
  145. }
  146. }
  147. usedBase[base] = struct{}{}
  148. return base
  149. }
  150. func (da *doubleArray) arrange(records []record, idx, depth int, usedBase map[int]struct{}) (base int, siblings []sibling, leaf *record, err error) {
  151. siblings, leaf, err = makeSiblings(records, depth)
  152. if err != nil {
  153. return -1, nil, nil, err
  154. }
  155. if len(siblings) < 1 {
  156. return -1, nil, leaf, nil
  157. }
  158. base = da.findBase(siblings, idx, usedBase)
  159. da.setBase(idx, base)
  160. return base, siblings, leaf, err
  161. }
  162. type sibling struct {
  163. start int
  164. end int
  165. c byte
  166. }
  167. func (da *doubleArray) nextIndex(base int, c byte) int {
  168. return base ^ int(c)
  169. }
  170. func makeSiblings(records []record, depth int) (sib []sibling, leaf *record, err error) {
  171. var (
  172. pc byte
  173. n int
  174. )
  175. for i, r := range records {
  176. if len(r.key) <= depth {
  177. leaf = &r
  178. continue
  179. }
  180. c := r.key[depth]
  181. switch {
  182. case pc < c:
  183. sib = append(sib, sibling{start: i, c: c})
  184. case pc == c:
  185. continue
  186. default:
  187. return nil, nil, fmt.Errorf("stringutil: BUG: records hasn't been sorted")
  188. }
  189. if n > 0 {
  190. sib[n-1].end = i
  191. }
  192. pc = c
  193. n++
  194. }
  195. if n == 0 {
  196. return nil, leaf, nil
  197. }
  198. sib[n-1].end = len(records)
  199. return sib, leaf, nil
  200. }
  201. type record struct {
  202. key string
  203. value int
  204. }
  205. func makeRecords(srcs []string) (records []record) {
  206. termChar := string(terminationCharacter)
  207. for _, s := range srcs {
  208. records = append(records, record{
  209. key: string(s + termChar),
  210. value: utf8.RuneCountInString(s),
  211. })
  212. }
  213. return records
  214. }
  215. type recordSlice []record
  216. func (rs recordSlice) Len() int {
  217. return len(rs)
  218. }
  219. func (rs recordSlice) Less(i, j int) bool {
  220. return rs[i].key < rs[j].key
  221. }
  222. func (rs recordSlice) Swap(i, j int) {
  223. rs[i], rs[j] = rs[j], rs[i]
  224. }