123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117 |
- // Copyright 2011 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- // +build ignore
- // Trie table generator.
- // Used by make*tables tools to generate a go file with trie data structures
- // for mapping UTF-8 to a 16-bit value. All but the last byte in a UTF-8 byte
- // sequence are used to lookup offsets in the index table to be used for the
- // next byte. The last byte is used to index into a table with 16-bit values.
- package main
- import (
- "fmt"
- "io"
- )
- const maxSparseEntries = 16
- type normCompacter struct {
- sparseBlocks [][]uint64
- sparseOffset []uint16
- sparseCount int
- name string
- }
- func mostFrequentStride(a []uint64) int {
- counts := make(map[int]int)
- var v int
- for _, x := range a {
- if stride := int(x) - v; v != 0 && stride >= 0 {
- counts[stride]++
- }
- v = int(x)
- }
- var maxs, maxc int
- for stride, cnt := range counts {
- if cnt > maxc || (cnt == maxc && stride < maxs) {
- maxs, maxc = stride, cnt
- }
- }
- return maxs
- }
- func countSparseEntries(a []uint64) int {
- stride := mostFrequentStride(a)
- var v, count int
- for _, tv := range a {
- if int(tv)-v != stride {
- if tv != 0 {
- count++
- }
- }
- v = int(tv)
- }
- return count
- }
- func (c *normCompacter) Size(v []uint64) (sz int, ok bool) {
- if n := countSparseEntries(v); n <= maxSparseEntries {
- return (n+1)*4 + 2, true
- }
- return 0, false
- }
- func (c *normCompacter) Store(v []uint64) uint32 {
- h := uint32(len(c.sparseOffset))
- c.sparseBlocks = append(c.sparseBlocks, v)
- c.sparseOffset = append(c.sparseOffset, uint16(c.sparseCount))
- c.sparseCount += countSparseEntries(v) + 1
- return h
- }
- func (c *normCompacter) Handler() string {
- return c.name + "Sparse.lookup"
- }
- func (c *normCompacter) Print(w io.Writer) (retErr error) {
- p := func(f string, x ...interface{}) {
- if _, err := fmt.Fprintf(w, f, x...); retErr == nil && err != nil {
- retErr = err
- }
- }
- ls := len(c.sparseBlocks)
- p("// %sSparseOffset: %d entries, %d bytes\n", c.name, ls, ls*2)
- p("var %sSparseOffset = %#v\n\n", c.name, c.sparseOffset)
- ns := c.sparseCount
- p("// %sSparseValues: %d entries, %d bytes\n", c.name, ns, ns*4)
- p("var %sSparseValues = [%d]valueRange {", c.name, ns)
- for i, b := range c.sparseBlocks {
- p("\n// Block %#x, offset %#x", i, c.sparseOffset[i])
- var v int
- stride := mostFrequentStride(b)
- n := countSparseEntries(b)
- p("\n{value:%#04x,lo:%#02x},", stride, uint8(n))
- for i, nv := range b {
- if int(nv)-v != stride {
- if v != 0 {
- p(",hi:%#02x},", 0x80+i-1)
- }
- if nv != 0 {
- p("\n{value:%#04x,lo:%#02x", nv, 0x80+i)
- }
- }
- v = int(nv)
- }
- if v != 0 {
- p(",hi:%#02x},", 0x80+len(b)-1)
- }
- }
- p("\n}\n\n")
- return
- }
|