triegen.go 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. // Copyright 2011 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // +build ignore
  5. // Trie table generator.
  6. // Used by make*tables tools to generate a go file with trie data structures
  7. // for mapping UTF-8 to a 16-bit value. All but the last byte in a UTF-8 byte
  8. // sequence are used to lookup offsets in the index table to be used for the
  9. // next byte. The last byte is used to index into a table with 16-bit values.
  10. package main
  11. import (
  12. "fmt"
  13. "io"
  14. )
  15. const maxSparseEntries = 16
  16. type normCompacter struct {
  17. sparseBlocks [][]uint64
  18. sparseOffset []uint16
  19. sparseCount int
  20. name string
  21. }
  22. func mostFrequentStride(a []uint64) int {
  23. counts := make(map[int]int)
  24. var v int
  25. for _, x := range a {
  26. if stride := int(x) - v; v != 0 && stride >= 0 {
  27. counts[stride]++
  28. }
  29. v = int(x)
  30. }
  31. var maxs, maxc int
  32. for stride, cnt := range counts {
  33. if cnt > maxc || (cnt == maxc && stride < maxs) {
  34. maxs, maxc = stride, cnt
  35. }
  36. }
  37. return maxs
  38. }
  39. func countSparseEntries(a []uint64) int {
  40. stride := mostFrequentStride(a)
  41. var v, count int
  42. for _, tv := range a {
  43. if int(tv)-v != stride {
  44. if tv != 0 {
  45. count++
  46. }
  47. }
  48. v = int(tv)
  49. }
  50. return count
  51. }
  52. func (c *normCompacter) Size(v []uint64) (sz int, ok bool) {
  53. if n := countSparseEntries(v); n <= maxSparseEntries {
  54. return (n+1)*4 + 2, true
  55. }
  56. return 0, false
  57. }
  58. func (c *normCompacter) Store(v []uint64) uint32 {
  59. h := uint32(len(c.sparseOffset))
  60. c.sparseBlocks = append(c.sparseBlocks, v)
  61. c.sparseOffset = append(c.sparseOffset, uint16(c.sparseCount))
  62. c.sparseCount += countSparseEntries(v) + 1
  63. return h
  64. }
  65. func (c *normCompacter) Handler() string {
  66. return c.name + "Sparse.lookup"
  67. }
  68. func (c *normCompacter) Print(w io.Writer) (retErr error) {
  69. p := func(f string, x ...interface{}) {
  70. if _, err := fmt.Fprintf(w, f, x...); retErr == nil && err != nil {
  71. retErr = err
  72. }
  73. }
  74. ls := len(c.sparseBlocks)
  75. p("// %sSparseOffset: %d entries, %d bytes\n", c.name, ls, ls*2)
  76. p("var %sSparseOffset = %#v\n\n", c.name, c.sparseOffset)
  77. ns := c.sparseCount
  78. p("// %sSparseValues: %d entries, %d bytes\n", c.name, ns, ns*4)
  79. p("var %sSparseValues = [%d]valueRange {", c.name, ns)
  80. for i, b := range c.sparseBlocks {
  81. p("\n// Block %#x, offset %#x", i, c.sparseOffset[i])
  82. var v int
  83. stride := mostFrequentStride(b)
  84. n := countSparseEntries(b)
  85. p("\n{value:%#04x,lo:%#02x},", stride, uint8(n))
  86. for i, nv := range b {
  87. if int(nv)-v != stride {
  88. if v != 0 {
  89. p(",hi:%#02x},", 0x80+i-1)
  90. }
  91. if nv != 0 {
  92. p("\n{value:%#04x,lo:%#02x", nv, 0x80+i)
  93. }
  94. }
  95. v = int(nv)
  96. }
  97. if v != 0 {
  98. p(",hi:%#02x},", 0x80+len(b)-1)
  99. }
  100. }
  101. p("\n}\n\n")
  102. return
  103. }