123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320 |
- package stringutil
- import (
- "sync"
- "unicode"
- "unicode/utf8"
- )
- var (
- mu sync.Mutex
- // Based on https://github.com/golang/lint/blob/32a87160691b3c96046c0c678fe57c5bef761456/lint.go#L702
- commonInitialismMap = map[string]struct{}{
- "API": struct{}{},
- "ASCII": struct{}{},
- "CPU": struct{}{},
- "CSRF": struct{}{},
- "CSS": struct{}{},
- "DNS": struct{}{},
- "EOF": struct{}{},
- "GUID": struct{}{},
- "HTML": struct{}{},
- "HTTP": struct{}{},
- "HTTPS": struct{}{},
- "ID": struct{}{},
- "IP": struct{}{},
- "JSON": struct{}{},
- "LHS": struct{}{},
- "QPS": struct{}{},
- "RAM": struct{}{},
- "RHS": struct{}{},
- "RPC": struct{}{},
- "SLA": struct{}{},
- "SMTP": struct{}{},
- "SQL": struct{}{},
- "SSH": struct{}{},
- "TCP": struct{}{},
- "TLS": struct{}{},
- "TTL": struct{}{},
- "UDP": struct{}{},
- "UI": struct{}{},
- "UID": struct{}{},
- "UUID": struct{}{},
- "URI": struct{}{},
- "URL": struct{}{},
- "UTF8": struct{}{},
- "VM": struct{}{},
- "XML": struct{}{},
- "XSRF": struct{}{},
- "XSS": struct{}{},
- }
- commonInitialisms = keys(commonInitialismMap)
- commonInitialism = mustDoubleArray(newDoubleArray(commonInitialisms))
- longestLen = longestLength(commonInitialisms)
- shortestLen = shortestLength(commonInitialisms, longestLen)
- )
- // ToUpperCamelCase returns a copy of the string s with all Unicode letters mapped to their camel case.
- // It will convert to upper case previous letter of '_' and first letter, and remove letter of '_'.
- func ToUpperCamelCase(s string) string {
- if s == "" {
- return ""
- }
- upper := true
- start := 0
- result := make([]byte, 0, len(s))
- var runeBuf [utf8.UTFMax]byte
- var initialism []byte
- for _, c := range s {
- if c == '_' {
- upper = true
- candidate := string(result[start:])
- initialism = initialism[:0]
- for _, r := range candidate {
- if r < utf8.RuneSelf {
- initialism = append(initialism, toUpperASCII(byte(r)))
- } else {
- n := utf8.EncodeRune(runeBuf[:], unicode.ToUpper(r))
- initialism = append(initialism, runeBuf[:n]...)
- }
- }
- if length := commonInitialism.LookupByBytes(initialism); length > 0 {
- result = append(result[:start], initialism...)
- }
- start = len(result)
- continue
- }
- if upper {
- if c < utf8.RuneSelf {
- result = append(result, toUpperASCII(byte(c)))
- } else {
- n := utf8.EncodeRune(runeBuf[:], unicode.ToUpper(c))
- result = append(result, runeBuf[:n]...)
- }
- upper = false
- continue
- }
- if c < utf8.RuneSelf {
- result = append(result, byte(c))
- } else {
- n := utf8.EncodeRune(runeBuf[:], c)
- result = append(result, runeBuf[:n]...)
- }
- }
- candidate := string(result[start:])
- initialism = initialism[:0]
- for _, r := range candidate {
- if r < utf8.RuneSelf {
- initialism = append(initialism, toUpperASCII(byte(r)))
- } else {
- n := utf8.EncodeRune(runeBuf[:], unicode.ToUpper(r))
- initialism = append(initialism, runeBuf[:n]...)
- }
- }
- if length := commonInitialism.LookupByBytes(initialism); length > 0 {
- result = append(result[:start], initialism...)
- }
- return string(result)
- }
- // ToUpperCamelCaseASCII is similar to ToUpperCamelCase, but optimized for
- // only the ASCII characters.
- // ToUpperCamelCaseASCII is faster than ToUpperCamelCase, but doesn't work if
- // contains non-ASCII characters.
- func ToUpperCamelCaseASCII(s string) string {
- if s == "" {
- return ""
- }
- upper := true
- start := 0
- result := make([]byte, 0, len(s))
- var initialism []byte
- for i := 0; i < len(s); i++ {
- c := s[i]
- if c == '_' {
- upper = true
- candidate := result[start:]
- initialism = initialism[:0]
- for _, b := range candidate {
- initialism = append(initialism, toUpperASCII(b))
- }
- if length := commonInitialism.LookupByBytes(initialism); length > 0 {
- result = append(result[:start], initialism...)
- }
- start = len(result)
- continue
- }
- if upper {
- result = append(result, toUpperASCII(c))
- upper = false
- continue
- }
- result = append(result, c)
- }
- candidate := result[start:]
- initialism = initialism[:0]
- for _, b := range candidate {
- initialism = append(initialism, toUpperASCII(b))
- }
- if length := commonInitialism.LookupByBytes(initialism); length > 0 {
- result = append(result[:start], initialism...)
- }
- return string(result)
- }
- // ToSnakeCase returns a copy of the string s with all Unicode letters mapped to their snake case.
- // It will insert letter of '_' at position of previous letter of uppercase and all
- // letters convert to lower case.
- // ToSnakeCase does not insert '_' letter into a common initialism word like ID, URL and so on.
- func ToSnakeCase(s string) string {
- if s == "" {
- return ""
- }
- result := make([]byte, 0, len(s))
- var runeBuf [utf8.UTFMax]byte
- var j, skipCount int
- for i, c := range s {
- if i < skipCount {
- continue
- }
- if unicode.IsUpper(c) {
- if i != 0 {
- result = append(result, '_')
- }
- next := nextIndex(j, len(s))
- if length := commonInitialism.Lookup(s[j:next]); length > 0 {
- for _, r := range s[j : j+length] {
- if r < utf8.RuneSelf {
- result = append(result, toLowerASCII(byte(r)))
- } else {
- n := utf8.EncodeRune(runeBuf[:], unicode.ToLower(r))
- result = append(result, runeBuf[:n]...)
- }
- }
- j += length - 1
- skipCount = i + length
- continue
- }
- }
- if c < utf8.RuneSelf {
- result = append(result, toLowerASCII(byte(c)))
- } else {
- n := utf8.EncodeRune(runeBuf[:], unicode.ToLower(c))
- result = append(result, runeBuf[:n]...)
- }
- j++
- }
- return string(result)
- }
- // ToSnakeCaseASCII is similar to ToSnakeCase, but optimized for only the ASCII
- // characters.
- // ToSnakeCaseASCII is faster than ToSnakeCase, but doesn't work correctly if
- // contains non-ASCII characters.
- func ToSnakeCaseASCII(s string) string {
- if s == "" {
- return ""
- }
- result := make([]byte, 0, len(s))
- for i := 0; i < len(s); i++ {
- c := s[i]
- if isUpperASCII(c) {
- if i != 0 {
- result = append(result, '_')
- }
- if k := i + shortestLen - 1; k < len(s) && isUpperASCII(s[k]) {
- if length := commonInitialism.Lookup(s[i:nextIndex(i, len(s))]); length > 0 {
- for j, buf := 0, s[i:i+length]; j < len(buf); j++ {
- result = append(result, toLowerASCII(buf[j]))
- }
- i += length - 1
- continue
- }
- }
- }
- result = append(result, toLowerASCII(c))
- }
- return string(result)
- }
- // AddCommonInitialism adds ss to list of common initialisms.
- func AddCommonInitialism(ss ...string) {
- mu.Lock()
- defer mu.Unlock()
- for _, s := range ss {
- commonInitialismMap[s] = struct{}{}
- }
- commonInitialisms = keys(commonInitialismMap)
- commonInitialism = mustDoubleArray(newDoubleArray(commonInitialisms))
- longestLen = longestLength(commonInitialisms)
- shortestLen = shortestLength(commonInitialisms, longestLen)
- }
- // DelCommonInitialism deletes ss from list of common initialisms.
- func DelCommonInitialism(ss ...string) {
- mu.Lock()
- defer mu.Unlock()
- for _, s := range ss {
- delete(commonInitialismMap, s)
- }
- commonInitialisms = keys(commonInitialismMap)
- commonInitialism = mustDoubleArray(newDoubleArray(commonInitialisms))
- longestLen = longestLength(commonInitialisms)
- shortestLen = shortestLength(commonInitialisms, longestLen)
- }
- func isUpperASCII(c byte) bool {
- return 'A' <= c && c <= 'Z'
- }
- func isLowerASCII(c byte) bool {
- return 'a' <= c && c <= 'z'
- }
- func toUpperASCII(c byte) byte {
- if isLowerASCII(c) {
- return c - ('a' - 'A')
- }
- return c
- }
- func toLowerASCII(c byte) byte {
- if isUpperASCII(c) {
- return c + 'a' - 'A'
- }
- return c
- }
- func nextIndex(i, maxlen int) int {
- if n := i + longestLen; n < maxlen {
- return n
- }
- return maxlen
- }
- func keys(m map[string]struct{}) []string {
- result := make([]string, 0, len(m))
- for k := range m {
- result = append(result, k)
- }
- return result
- }
- func shortestLength(strs []string, shortest int) int {
- for _, s := range strs {
- if candidate := utf8.RuneCountInString(s); candidate < shortest {
- shortest = candidate
- }
- }
- return shortest
- }
- func longestLength(strs []string) (longest int) {
- for _, s := range strs {
- if candidate := utf8.RuneCountInString(s); candidate > longest {
- longest = candidate
- }
- }
- return longest
- }
|