gen.go 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. // Copyright 2017 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // +build ignore
  5. package main
  6. import (
  7. "encoding/xml"
  8. "fmt"
  9. "io"
  10. "log"
  11. "sort"
  12. "strconv"
  13. "strings"
  14. "golang.org/x/text/encoding/internal/identifier"
  15. "golang.org/x/text/internal/gen"
  16. )
  17. type registry struct {
  18. XMLName xml.Name `xml:"registry"`
  19. Updated string `xml:"updated"`
  20. Registry []struct {
  21. ID string `xml:"id,attr"`
  22. Record []struct {
  23. Name string `xml:"name"`
  24. Xref []struct {
  25. Type string `xml:"type,attr"`
  26. Data string `xml:"data,attr"`
  27. } `xml:"xref"`
  28. Desc struct {
  29. Data string `xml:",innerxml"`
  30. } `xml:"description,"`
  31. MIB string `xml:"value"`
  32. Alias []string `xml:"alias"`
  33. MIME string `xml:"preferred_alias"`
  34. } `xml:"record"`
  35. } `xml:"registry"`
  36. }
  37. func main() {
  38. r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml")
  39. reg := &registry{}
  40. if err := xml.NewDecoder(r).Decode(&reg); err != nil && err != io.EOF {
  41. log.Fatalf("Error decoding charset registry: %v", err)
  42. }
  43. if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" {
  44. log.Fatalf("Unexpected ID %s", reg.Registry[0].ID)
  45. }
  46. x := &indexInfo{}
  47. for _, rec := range reg.Registry[0].Record {
  48. mib := identifier.MIB(parseInt(rec.MIB))
  49. x.addEntry(mib, rec.Name)
  50. for _, a := range rec.Alias {
  51. a = strings.Split(a, " ")[0] // strip comments.
  52. x.addAlias(a, mib)
  53. // MIB name aliases are prefixed with a "cs" (character set) in the
  54. // registry to identify them as display names and to ensure that
  55. // the name starts with a lowercase letter in case it is used as
  56. // an identifier. We remove it to be left with a nice clean name.
  57. if strings.HasPrefix(a, "cs") {
  58. x.setName(2, a[2:])
  59. }
  60. }
  61. if rec.MIME != "" {
  62. x.addAlias(rec.MIME, mib)
  63. x.setName(1, rec.MIME)
  64. }
  65. }
  66. w := gen.NewCodeWriter()
  67. fmt.Fprintln(w, `import "golang.org/x/text/encoding/internal/identifier"`)
  68. writeIndex(w, x)
  69. w.WriteGoFile("tables.go", "ianaindex")
  70. }
  71. type alias struct {
  72. name string
  73. mib identifier.MIB
  74. }
  75. type indexInfo struct {
  76. // compacted index from code to MIB
  77. codeToMIB []identifier.MIB
  78. alias []alias
  79. names [][3]string
  80. }
  81. func (ii *indexInfo) Len() int {
  82. return len(ii.codeToMIB)
  83. }
  84. func (ii *indexInfo) Less(a, b int) bool {
  85. return ii.codeToMIB[a] < ii.codeToMIB[b]
  86. }
  87. func (ii *indexInfo) Swap(a, b int) {
  88. ii.codeToMIB[a], ii.codeToMIB[b] = ii.codeToMIB[b], ii.codeToMIB[a]
  89. // Co-sort the names.
  90. ii.names[a], ii.names[b] = ii.names[b], ii.names[a]
  91. }
  92. func (ii *indexInfo) setName(i int, name string) {
  93. ii.names[len(ii.names)-1][i] = name
  94. }
  95. func (ii *indexInfo) addEntry(mib identifier.MIB, name string) {
  96. ii.names = append(ii.names, [3]string{name, name, name})
  97. ii.addAlias(name, mib)
  98. ii.codeToMIB = append(ii.codeToMIB, mib)
  99. }
  100. func (ii *indexInfo) addAlias(name string, mib identifier.MIB) {
  101. // Don't add duplicates for the same mib. Adding duplicate aliases for
  102. // different MIBs will cause the compiler to barf on an invalid map: great!.
  103. for i := len(ii.alias) - 1; i >= 0 && ii.alias[i].mib == mib; i-- {
  104. if ii.alias[i].name == name {
  105. return
  106. }
  107. }
  108. ii.alias = append(ii.alias, alias{name, mib})
  109. lower := strings.ToLower(name)
  110. if lower != name {
  111. ii.addAlias(lower, mib)
  112. }
  113. }
  114. const maxMIMENameLen = '0' - 1 // officially 40, but we leave some buffer.
  115. func writeIndex(w *gen.CodeWriter, x *indexInfo) {
  116. sort.Stable(x)
  117. // Write constants.
  118. fmt.Fprintln(w, "const (")
  119. for i, m := range x.codeToMIB {
  120. if i == 0 {
  121. fmt.Fprintf(w, "enc%d = iota\n", m)
  122. } else {
  123. fmt.Fprintf(w, "enc%d\n", m)
  124. }
  125. }
  126. fmt.Fprintln(w, "numIANA")
  127. fmt.Fprintln(w, ")")
  128. w.WriteVar("ianaToMIB", x.codeToMIB)
  129. var ianaNames, mibNames []string
  130. for _, names := range x.names {
  131. n := names[0]
  132. if names[0] != names[1] {
  133. // MIME names are mostly identical to IANA names. We share the
  134. // tables by setting the first byte of the string to an index into
  135. // the string itself (< maxMIMENameLen) to the IANA name. The MIME
  136. // name immediately follows the index.
  137. x := len(names[1]) + 1
  138. if x > maxMIMENameLen {
  139. log.Fatalf("MIME name length (%d) > %d", x, maxMIMENameLen)
  140. }
  141. n = string(x) + names[1] + names[0]
  142. }
  143. ianaNames = append(ianaNames, n)
  144. mibNames = append(mibNames, names[2])
  145. }
  146. w.WriteVar("ianaNames", ianaNames)
  147. w.WriteVar("mibNames", mibNames)
  148. w.WriteComment(`
  149. TODO: Instead of using a map, we could use binary search strings doing
  150. on-the fly lower-casing per character. This allows to always avoid
  151. allocation and will be considerably more compact.`)
  152. fmt.Fprintln(w, "var ianaAliases = map[string]int{")
  153. for _, a := range x.alias {
  154. fmt.Fprintf(w, "%q: enc%d,\n", a.name, a.mib)
  155. }
  156. fmt.Fprintln(w, "}")
  157. }
  158. func parseInt(s string) int {
  159. x, err := strconv.ParseInt(s, 10, 64)
  160. if err != nil {
  161. log.Fatalf("Could not parse integer: %v", err)
  162. }
  163. return int(x)
  164. }