opencc.go 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. package chinese
  2. import (
  3. "context"
  4. "encoding/json"
  5. "fmt"
  6. "strings"
  7. "go-common/library/log"
  8. )
  9. var (
  10. defaultConversion = "s2twp"
  11. )
  12. // Group holds a sequence of dicts
  13. type Group struct {
  14. Files []string
  15. Dicts []*dict
  16. }
  17. func (g *Group) String() string {
  18. return fmt.Sprintf("%+v", g.Files)
  19. }
  20. // OpenCC contains the converter
  21. type openCC struct {
  22. Conversion string
  23. Description string
  24. DictGroup []*Group
  25. }
  26. var conversions = map[string]*openCC{
  27. "s2twp": {Conversion: s2twp},
  28. // "hk2s": {Conversion: hk2s}, "s2hk": {Conversion: s2hk}, "s2t": {Conversion: s2t},
  29. // "s2tw": {Conversion: s2tw}, "t2hk": {Conversion: t2hk},
  30. // "t2s": {Conversion: t2s}, "t2tw": {Conversion: t2tw},
  31. // "tw2s": {Conversion: tw2s}, "tw2sp": {Conversion: tw2sp},
  32. }
  33. // Init construct an instance of OpenCC.
  34. func Init() {
  35. for k, v := range conversions {
  36. if err := v.dict(k); err != nil {
  37. panic(err)
  38. }
  39. }
  40. }
  41. // Converts .
  42. func Converts(ctx context.Context, in ...string) (out map[string]string) {
  43. var err error
  44. out = make(map[string]string, len(in))
  45. for _, v := range in {
  46. if out[v], err = convert(v, defaultConversion); err != nil {
  47. log.Error("convert(%s),err:%+v", in, err)
  48. out[v] = v
  49. }
  50. }
  51. return
  52. }
  53. // Convert string from Simplified Chinese to Traditional Chinese .
  54. func Convert(ctx context.Context, in string) (out string) {
  55. var err error
  56. if out, err = convert(in, defaultConversion); err != nil {
  57. log.Error("convert(%s),err:%+v", in, err)
  58. }
  59. return
  60. }
  61. func (cc *openCC) dict(conversion string) error {
  62. var m interface{}
  63. json.Unmarshal([]byte(cc.Conversion), &m)
  64. config := m.(map[string]interface{})
  65. cc.Description = config["name"].(string)
  66. dictChain, ok := config["conversion_chain"].([]interface{})
  67. if !ok {
  68. return fmt.Errorf("format %+v not correct", config)
  69. }
  70. for _, v := range dictChain {
  71. d, ok := v.(map[string]interface{})
  72. if !ok {
  73. return fmt.Errorf("should be map inside conversion_chain")
  74. }
  75. dictMap, ok := d["dict"]
  76. if !ok {
  77. return fmt.Errorf("should have dict inside conversion_chain")
  78. }
  79. if dict, ok := dictMap.(map[string]interface{}); ok {
  80. group, err := cc.group(dict)
  81. if err != nil {
  82. return err
  83. }
  84. cc.DictGroup = append(cc.DictGroup, group)
  85. }
  86. }
  87. return nil
  88. }
  89. func (cc *openCC) group(d map[string]interface{}) (*Group, error) {
  90. typ, ok := d["type"].(string)
  91. if !ok {
  92. return nil, fmt.Errorf("type should be string")
  93. }
  94. res := &Group{}
  95. switch typ {
  96. case "group":
  97. dicts, ok := d["dicts"].([]interface{})
  98. if !ok {
  99. return nil, fmt.Errorf("dicts field invalid")
  100. }
  101. for _, dict := range dicts {
  102. d, ok := dict.(map[string]interface{})
  103. if !ok {
  104. return nil, fmt.Errorf("dicts items invalid")
  105. }
  106. group, err := cc.group(d)
  107. if err != nil {
  108. return nil, err
  109. }
  110. res.Files = append(res.Files, group.Files...)
  111. res.Dicts = append(res.Dicts, group.Dicts...)
  112. }
  113. case "txt":
  114. file, ok := d["file"]
  115. if !ok {
  116. return nil, fmt.Errorf("no file field found")
  117. }
  118. daDict, err := buildFromFile(file.(string))
  119. if err != nil {
  120. return nil, err
  121. }
  122. res.Files = append(res.Files, file.(string))
  123. res.Dicts = append(res.Dicts, daDict)
  124. default:
  125. return nil, fmt.Errorf("type should be txt or group")
  126. }
  127. return res, nil
  128. }
  129. // convert string from Simplified Chinese to Traditional Chinese or vice versa
  130. func convert(in, conversion string) (string, error) {
  131. if conversion == "" {
  132. conversion = defaultConversion
  133. }
  134. for _, group := range conversions[conversion].DictGroup {
  135. r := []rune(in)
  136. var tokens []string
  137. for i := 0; i < len(r); {
  138. s := r[i:]
  139. var token string
  140. max := 0
  141. for _, dict := range group.Dicts {
  142. ret, err := dict.prefixMatch(string(s))
  143. if err != nil {
  144. return "", err
  145. }
  146. if len(ret) > 0 {
  147. o := ""
  148. for k, v := range ret {
  149. if len(k) > max {
  150. max = len(k)
  151. token = v[0]
  152. o = k
  153. }
  154. }
  155. i += len([]rune(o))
  156. break
  157. }
  158. }
  159. if max == 0 { //no match
  160. token = string(r[i])
  161. i++
  162. }
  163. tokens = append(tokens, token)
  164. }
  165. in = strings.Join(tokens, "")
  166. }
  167. return in, nil
  168. }