strings.go 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320
  1. package stringutil
  2. import (
  3. "sync"
  4. "unicode"
  5. "unicode/utf8"
  6. )
  7. var (
  8. mu sync.Mutex
  9. // Based on https://github.com/golang/lint/blob/32a87160691b3c96046c0c678fe57c5bef761456/lint.go#L702
  10. commonInitialismMap = map[string]struct{}{
  11. "API": struct{}{},
  12. "ASCII": struct{}{},
  13. "CPU": struct{}{},
  14. "CSRF": struct{}{},
  15. "CSS": struct{}{},
  16. "DNS": struct{}{},
  17. "EOF": struct{}{},
  18. "GUID": struct{}{},
  19. "HTML": struct{}{},
  20. "HTTP": struct{}{},
  21. "HTTPS": struct{}{},
  22. "ID": struct{}{},
  23. "IP": struct{}{},
  24. "JSON": struct{}{},
  25. "LHS": struct{}{},
  26. "QPS": struct{}{},
  27. "RAM": struct{}{},
  28. "RHS": struct{}{},
  29. "RPC": struct{}{},
  30. "SLA": struct{}{},
  31. "SMTP": struct{}{},
  32. "SQL": struct{}{},
  33. "SSH": struct{}{},
  34. "TCP": struct{}{},
  35. "TLS": struct{}{},
  36. "TTL": struct{}{},
  37. "UDP": struct{}{},
  38. "UI": struct{}{},
  39. "UID": struct{}{},
  40. "UUID": struct{}{},
  41. "URI": struct{}{},
  42. "URL": struct{}{},
  43. "UTF8": struct{}{},
  44. "VM": struct{}{},
  45. "XML": struct{}{},
  46. "XSRF": struct{}{},
  47. "XSS": struct{}{},
  48. }
  49. commonInitialisms = keys(commonInitialismMap)
  50. commonInitialism = mustDoubleArray(newDoubleArray(commonInitialisms))
  51. longestLen = longestLength(commonInitialisms)
  52. shortestLen = shortestLength(commonInitialisms, longestLen)
  53. )
  54. // ToUpperCamelCase returns a copy of the string s with all Unicode letters mapped to their camel case.
  55. // It will convert to upper case previous letter of '_' and first letter, and remove letter of '_'.
  56. func ToUpperCamelCase(s string) string {
  57. if s == "" {
  58. return ""
  59. }
  60. upper := true
  61. start := 0
  62. result := make([]byte, 0, len(s))
  63. var runeBuf [utf8.UTFMax]byte
  64. var initialism []byte
  65. for _, c := range s {
  66. if c == '_' {
  67. upper = true
  68. candidate := string(result[start:])
  69. initialism = initialism[:0]
  70. for _, r := range candidate {
  71. if r < utf8.RuneSelf {
  72. initialism = append(initialism, toUpperASCII(byte(r)))
  73. } else {
  74. n := utf8.EncodeRune(runeBuf[:], unicode.ToUpper(r))
  75. initialism = append(initialism, runeBuf[:n]...)
  76. }
  77. }
  78. if length := commonInitialism.LookupByBytes(initialism); length > 0 {
  79. result = append(result[:start], initialism...)
  80. }
  81. start = len(result)
  82. continue
  83. }
  84. if upper {
  85. if c < utf8.RuneSelf {
  86. result = append(result, toUpperASCII(byte(c)))
  87. } else {
  88. n := utf8.EncodeRune(runeBuf[:], unicode.ToUpper(c))
  89. result = append(result, runeBuf[:n]...)
  90. }
  91. upper = false
  92. continue
  93. }
  94. if c < utf8.RuneSelf {
  95. result = append(result, byte(c))
  96. } else {
  97. n := utf8.EncodeRune(runeBuf[:], c)
  98. result = append(result, runeBuf[:n]...)
  99. }
  100. }
  101. candidate := string(result[start:])
  102. initialism = initialism[:0]
  103. for _, r := range candidate {
  104. if r < utf8.RuneSelf {
  105. initialism = append(initialism, toUpperASCII(byte(r)))
  106. } else {
  107. n := utf8.EncodeRune(runeBuf[:], unicode.ToUpper(r))
  108. initialism = append(initialism, runeBuf[:n]...)
  109. }
  110. }
  111. if length := commonInitialism.LookupByBytes(initialism); length > 0 {
  112. result = append(result[:start], initialism...)
  113. }
  114. return string(result)
  115. }
  116. // ToUpperCamelCaseASCII is similar to ToUpperCamelCase, but optimized for
  117. // only the ASCII characters.
  118. // ToUpperCamelCaseASCII is faster than ToUpperCamelCase, but doesn't work if
  119. // contains non-ASCII characters.
  120. func ToUpperCamelCaseASCII(s string) string {
  121. if s == "" {
  122. return ""
  123. }
  124. upper := true
  125. start := 0
  126. result := make([]byte, 0, len(s))
  127. var initialism []byte
  128. for i := 0; i < len(s); i++ {
  129. c := s[i]
  130. if c == '_' {
  131. upper = true
  132. candidate := result[start:]
  133. initialism = initialism[:0]
  134. for _, b := range candidate {
  135. initialism = append(initialism, toUpperASCII(b))
  136. }
  137. if length := commonInitialism.LookupByBytes(initialism); length > 0 {
  138. result = append(result[:start], initialism...)
  139. }
  140. start = len(result)
  141. continue
  142. }
  143. if upper {
  144. result = append(result, toUpperASCII(c))
  145. upper = false
  146. continue
  147. }
  148. result = append(result, c)
  149. }
  150. candidate := result[start:]
  151. initialism = initialism[:0]
  152. for _, b := range candidate {
  153. initialism = append(initialism, toUpperASCII(b))
  154. }
  155. if length := commonInitialism.LookupByBytes(initialism); length > 0 {
  156. result = append(result[:start], initialism...)
  157. }
  158. return string(result)
  159. }
  160. // ToSnakeCase returns a copy of the string s with all Unicode letters mapped to their snake case.
  161. // It will insert letter of '_' at position of previous letter of uppercase and all
  162. // letters convert to lower case.
  163. // ToSnakeCase does not insert '_' letter into a common initialism word like ID, URL and so on.
  164. func ToSnakeCase(s string) string {
  165. if s == "" {
  166. return ""
  167. }
  168. result := make([]byte, 0, len(s))
  169. var runeBuf [utf8.UTFMax]byte
  170. var j, skipCount int
  171. for i, c := range s {
  172. if i < skipCount {
  173. continue
  174. }
  175. if unicode.IsUpper(c) {
  176. if i != 0 {
  177. result = append(result, '_')
  178. }
  179. next := nextIndex(j, len(s))
  180. if length := commonInitialism.Lookup(s[j:next]); length > 0 {
  181. for _, r := range s[j : j+length] {
  182. if r < utf8.RuneSelf {
  183. result = append(result, toLowerASCII(byte(r)))
  184. } else {
  185. n := utf8.EncodeRune(runeBuf[:], unicode.ToLower(r))
  186. result = append(result, runeBuf[:n]...)
  187. }
  188. }
  189. j += length - 1
  190. skipCount = i + length
  191. continue
  192. }
  193. }
  194. if c < utf8.RuneSelf {
  195. result = append(result, toLowerASCII(byte(c)))
  196. } else {
  197. n := utf8.EncodeRune(runeBuf[:], unicode.ToLower(c))
  198. result = append(result, runeBuf[:n]...)
  199. }
  200. j++
  201. }
  202. return string(result)
  203. }
  204. // ToSnakeCaseASCII is similar to ToSnakeCase, but optimized for only the ASCII
  205. // characters.
  206. // ToSnakeCaseASCII is faster than ToSnakeCase, but doesn't work correctly if
  207. // contains non-ASCII characters.
  208. func ToSnakeCaseASCII(s string) string {
  209. if s == "" {
  210. return ""
  211. }
  212. result := make([]byte, 0, len(s))
  213. for i := 0; i < len(s); i++ {
  214. c := s[i]
  215. if isUpperASCII(c) {
  216. if i != 0 {
  217. result = append(result, '_')
  218. }
  219. if k := i + shortestLen - 1; k < len(s) && isUpperASCII(s[k]) {
  220. if length := commonInitialism.Lookup(s[i:nextIndex(i, len(s))]); length > 0 {
  221. for j, buf := 0, s[i:i+length]; j < len(buf); j++ {
  222. result = append(result, toLowerASCII(buf[j]))
  223. }
  224. i += length - 1
  225. continue
  226. }
  227. }
  228. }
  229. result = append(result, toLowerASCII(c))
  230. }
  231. return string(result)
  232. }
  233. // AddCommonInitialism adds ss to list of common initialisms.
  234. func AddCommonInitialism(ss ...string) {
  235. mu.Lock()
  236. defer mu.Unlock()
  237. for _, s := range ss {
  238. commonInitialismMap[s] = struct{}{}
  239. }
  240. commonInitialisms = keys(commonInitialismMap)
  241. commonInitialism = mustDoubleArray(newDoubleArray(commonInitialisms))
  242. longestLen = longestLength(commonInitialisms)
  243. shortestLen = shortestLength(commonInitialisms, longestLen)
  244. }
  245. // DelCommonInitialism deletes ss from list of common initialisms.
  246. func DelCommonInitialism(ss ...string) {
  247. mu.Lock()
  248. defer mu.Unlock()
  249. for _, s := range ss {
  250. delete(commonInitialismMap, s)
  251. }
  252. commonInitialisms = keys(commonInitialismMap)
  253. commonInitialism = mustDoubleArray(newDoubleArray(commonInitialisms))
  254. longestLen = longestLength(commonInitialisms)
  255. shortestLen = shortestLength(commonInitialisms, longestLen)
  256. }
  257. func isUpperASCII(c byte) bool {
  258. return 'A' <= c && c <= 'Z'
  259. }
  260. func isLowerASCII(c byte) bool {
  261. return 'a' <= c && c <= 'z'
  262. }
  263. func toUpperASCII(c byte) byte {
  264. if isLowerASCII(c) {
  265. return c - ('a' - 'A')
  266. }
  267. return c
  268. }
  269. func toLowerASCII(c byte) byte {
  270. if isUpperASCII(c) {
  271. return c + 'a' - 'A'
  272. }
  273. return c
  274. }
  275. func nextIndex(i, maxlen int) int {
  276. if n := i + longestLen; n < maxlen {
  277. return n
  278. }
  279. return maxlen
  280. }
  281. func keys(m map[string]struct{}) []string {
  282. result := make([]string, 0, len(m))
  283. for k := range m {
  284. result = append(result, k)
  285. }
  286. return result
  287. }
  288. func shortestLength(strs []string, shortest int) int {
  289. for _, s := range strs {
  290. if candidate := utf8.RuneCountInString(s); candidate < shortest {
  291. shortest = candidate
  292. }
  293. }
  294. return shortest
  295. }
  296. func longestLength(strs []string) (longest int) {
  297. for _, s := range strs {
  298. if candidate := utf8.RuneCountInString(s); candidate > longest {
  299. longest = candidate
  300. }
  301. }
  302. return longest
  303. }