123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253 |
- package stringutil
- import (
- "fmt"
- "sort"
- "unicode/utf8"
- )
- const (
- terminationCharacter = '#'
- )
- func mustDoubleArray(da *doubleArray, err error) *doubleArray {
- if err != nil {
- panic(err)
- }
- return da
- }
- func (da *doubleArray) Build(keys []string) error {
- records := makeRecords(keys)
- if err := da.build(records, 1, 0, make(map[int]struct{})); err != nil {
- return err
- }
- return nil
- }
- type doubleArray struct {
- bc []baseCheck
- node []int
- }
- func newDoubleArray(keys []string) (*doubleArray, error) {
- da := &doubleArray{
- bc: []baseCheck{0},
- node: []int{-1}, // A start index is adjusting to 1 because 0 will be used as a mark of non-existent node.
- }
- if err := da.Build(keys); err != nil {
- return nil, err
- }
- return da, nil
- }
- // baseCheck contains BASE, CHECK and Extra flags.
- // From the top, 22bits of BASE, 2bits of Extra flags and 8bits of CHECK.
- //
- // BASE (22bit) | Extra flags (2bit) | CHECK (8bit)
- // |----------------------|--|--------|
- // 32 10 8 0
- type baseCheck uint32
- func (bc baseCheck) Base() int {
- return int(bc >> 10)
- }
- func (bc *baseCheck) SetBase(base int) {
- *bc |= baseCheck(base) << 10
- }
- func (bc baseCheck) Check() byte {
- return byte(bc)
- }
- func (bc *baseCheck) SetCheck(check byte) {
- *bc |= baseCheck(check)
- }
- func (bc baseCheck) IsEmpty() bool {
- return bc&0xfffffcff == 0
- }
- func (da *doubleArray) Lookup(path string) (length int) {
- idx := 1
- tmpIdx := idx
- for i := 0; i < len(path); i++ {
- c := path[i]
- tmpIdx = da.nextIndex(da.bc[tmpIdx].Base(), c)
- if tmpIdx >= len(da.bc) || da.bc[tmpIdx].Check() != c {
- break
- }
- idx = tmpIdx
- }
- if next := da.nextIndex(da.bc[idx].Base(), terminationCharacter); next < len(da.bc) && da.bc[next].Check() == terminationCharacter {
- return da.node[da.bc[next].Base()]
- }
- return -1
- }
- func (da *doubleArray) LookupByBytes(path []byte) (length int) {
- idx := 1
- tmpIdx := idx
- for i := 0; i < len(path); i++ {
- c := path[i]
- tmpIdx = da.nextIndex(da.bc[tmpIdx].Base(), c)
- if tmpIdx >= len(da.bc) || da.bc[tmpIdx].Check() != c {
- break
- }
- idx = tmpIdx
- }
- if next := da.nextIndex(da.bc[idx].Base(), terminationCharacter); next < len(da.bc) && da.bc[next].Check() == terminationCharacter {
- return da.node[da.bc[next].Base()]
- }
- return -1
- }
- func (da *doubleArray) build(srcs []record, idx, depth int, usedBase map[int]struct{}) error {
- sort.Stable(recordSlice(srcs))
- base, siblings, leaf, err := da.arrange(srcs, idx, depth, usedBase)
- if err != nil {
- return err
- }
- if leaf != nil {
- da.bc[idx].SetBase(len(da.node))
- da.node = append(da.node, leaf.value)
- }
- for _, sib := range siblings {
- da.setCheck(da.nextIndex(base, sib.c), sib.c)
- }
- for _, sib := range siblings {
- if err := da.build(srcs[sib.start:sib.end], da.nextIndex(base, sib.c), depth+1, usedBase); err != nil {
- return err
- }
- }
- return nil
- }
- func (da *doubleArray) setBase(i, base int) {
- da.bc[i].SetBase(base)
- }
- func (da *doubleArray) setCheck(i int, check byte) {
- da.bc[i].SetCheck(check)
- }
- func (da *doubleArray) findEmptyIndex(start int) int {
- i := start
- for ; i < len(da.bc); i++ {
- if da.bc[i].IsEmpty() {
- break
- }
- }
- return i
- }
- // findBase returns good BASE.
- func (da *doubleArray) findBase(siblings []sibling, start int, usedBase map[int]struct{}) (base int) {
- for idx, firstChar := start+1, siblings[0].c; ; idx = da.findEmptyIndex(idx + 1) {
- base = da.nextIndex(idx, firstChar)
- if _, used := usedBase[base]; used {
- continue
- }
- i := 0
- for ; i < len(siblings); i++ {
- next := da.nextIndex(base, siblings[i].c)
- if len(da.bc) <= next {
- da.bc = append(da.bc, make([]baseCheck, next-len(da.bc)+1)...)
- }
- if !da.bc[next].IsEmpty() {
- break
- }
- }
- if i == len(siblings) {
- break
- }
- }
- usedBase[base] = struct{}{}
- return base
- }
- func (da *doubleArray) arrange(records []record, idx, depth int, usedBase map[int]struct{}) (base int, siblings []sibling, leaf *record, err error) {
- siblings, leaf, err = makeSiblings(records, depth)
- if err != nil {
- return -1, nil, nil, err
- }
- if len(siblings) < 1 {
- return -1, nil, leaf, nil
- }
- base = da.findBase(siblings, idx, usedBase)
- da.setBase(idx, base)
- return base, siblings, leaf, err
- }
- type sibling struct {
- start int
- end int
- c byte
- }
- func (da *doubleArray) nextIndex(base int, c byte) int {
- return base ^ int(c)
- }
- func makeSiblings(records []record, depth int) (sib []sibling, leaf *record, err error) {
- var (
- pc byte
- n int
- )
- for i, r := range records {
- if len(r.key) <= depth {
- leaf = &r
- continue
- }
- c := r.key[depth]
- switch {
- case pc < c:
- sib = append(sib, sibling{start: i, c: c})
- case pc == c:
- continue
- default:
- return nil, nil, fmt.Errorf("stringutil: BUG: records hasn't been sorted")
- }
- if n > 0 {
- sib[n-1].end = i
- }
- pc = c
- n++
- }
- if n == 0 {
- return nil, leaf, nil
- }
- sib[n-1].end = len(records)
- return sib, leaf, nil
- }
- type record struct {
- key string
- value int
- }
- func makeRecords(srcs []string) (records []record) {
- termChar := string(terminationCharacter)
- for _, s := range srcs {
- records = append(records, record{
- key: string(s + termChar),
- value: utf8.RuneCountInString(s),
- })
- }
- return records
- }
- type recordSlice []record
- func (rs recordSlice) Len() int {
- return len(rs)
- }
- func (rs recordSlice) Less(i, j int) bool {
- return rs[i].key < rs[j].key
- }
- func (rs recordSlice) Swap(i, j int) {
- rs[i], rs[j] = rs[j], rs[i]
- }
|