quote.go 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. /*
  2. Copyright 2016 Google Inc. All Rights Reserved.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. // Python quoted strings.
  14. package build
  15. import (
  16. "bytes"
  17. "fmt"
  18. "strconv"
  19. "strings"
  20. )
  21. // unesc maps single-letter chars following \ to their actual values.
  22. var unesc = [256]byte{
  23. 'a': '\a',
  24. 'b': '\b',
  25. 'f': '\f',
  26. 'n': '\n',
  27. 'r': '\r',
  28. 't': '\t',
  29. 'v': '\v',
  30. '\\': '\\',
  31. '\'': '\'',
  32. '"': '"',
  33. }
  34. // esc maps escape-worthy bytes to the char that should follow \.
  35. var esc = [256]byte{
  36. '\a': 'a',
  37. '\b': 'b',
  38. '\f': 'f',
  39. '\n': 'n',
  40. '\r': 'r',
  41. '\t': 't',
  42. '\v': 'v',
  43. '\\': '\\',
  44. '\'': '\'',
  45. '"': '"',
  46. }
  47. // notEsc is a list of characters that can follow a \ in a string value
  48. // without having to escape the \. That is, since ( is in this list, we
  49. // quote the Go string "foo\\(bar" as the Python literal "foo\(bar".
  50. // This really does happen in BUILD files, especially in strings
  51. // being used as shell arguments containing regular expressions.
  52. const notEsc = " !#$%&()*+,-./:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~"
  53. // unquote unquotes the quoted string, returning the actual
  54. // string value, whether the original was triple-quoted, and
  55. // an error describing invalid input.
  56. func unquote(quoted string) (s string, triple bool, err error) {
  57. // Check for raw prefix: means don't interpret the inner \.
  58. raw := false
  59. if strings.HasPrefix(quoted, "r") {
  60. raw = true
  61. quoted = quoted[1:]
  62. }
  63. if len(quoted) < 2 {
  64. err = fmt.Errorf("string literal too short")
  65. return
  66. }
  67. if quoted[0] != '"' && quoted[0] != '\'' || quoted[0] != quoted[len(quoted)-1] {
  68. err = fmt.Errorf("string literal has invalid quotes")
  69. }
  70. // Check for triple quoted string.
  71. quote := quoted[0]
  72. if len(quoted) >= 6 && quoted[1] == quote && quoted[2] == quote && quoted[:3] == quoted[len(quoted)-3:] {
  73. triple = true
  74. quoted = quoted[3 : len(quoted)-3]
  75. } else {
  76. quoted = quoted[1 : len(quoted)-1]
  77. }
  78. // Now quoted is the quoted data, but no quotes.
  79. // If we're in raw mode or there are no escapes, we're done.
  80. if raw || !strings.Contains(quoted, `\`) {
  81. s = quoted
  82. return
  83. }
  84. // Otherwise process quoted string.
  85. // Each iteration processes one escape sequence along with the
  86. // plain text leading up to it.
  87. var buf bytes.Buffer
  88. for {
  89. // Remove prefix before escape sequence.
  90. i := strings.Index(quoted, `\`)
  91. if i < 0 {
  92. i = len(quoted)
  93. }
  94. buf.WriteString(quoted[:i])
  95. quoted = quoted[i:]
  96. if len(quoted) == 0 {
  97. break
  98. }
  99. // Process escape sequence.
  100. if len(quoted) == 1 {
  101. err = fmt.Errorf(`truncated escape sequence \`)
  102. return
  103. }
  104. switch quoted[1] {
  105. default:
  106. // In Python, if \z (for some byte z) is not a known escape sequence
  107. // then it appears as literal text in the string.
  108. buf.WriteString(quoted[:2])
  109. quoted = quoted[2:]
  110. case '\n':
  111. // Ignore the escape and the line break.
  112. quoted = quoted[2:]
  113. case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '\'', '"':
  114. // One-char escape
  115. buf.WriteByte(unesc[quoted[1]])
  116. quoted = quoted[2:]
  117. case '0', '1', '2', '3', '4', '5', '6', '7':
  118. // Octal escape, up to 3 digits.
  119. n := int(quoted[1] - '0')
  120. quoted = quoted[2:]
  121. for i := 1; i < 3; i++ {
  122. if len(quoted) == 0 || quoted[0] < '0' || '7' < quoted[0] {
  123. break
  124. }
  125. n = n*8 + int(quoted[0]-'0')
  126. quoted = quoted[1:]
  127. }
  128. if n >= 256 {
  129. // NOTE: Python silently discards the high bit,
  130. // so that '\541' == '\141' == 'a'.
  131. // Let's see if we can avoid doing that in BUILD files.
  132. err = fmt.Errorf(`invalid escape sequence \%03o`, n)
  133. return
  134. }
  135. buf.WriteByte(byte(n))
  136. case 'x':
  137. // Hexadecimal escape, exactly 2 digits.
  138. if len(quoted) < 4 {
  139. err = fmt.Errorf(`truncated escape sequence %s`, quoted)
  140. return
  141. }
  142. n, err1 := strconv.ParseInt(quoted[2:4], 16, 0)
  143. if err1 != nil {
  144. err = fmt.Errorf(`invalid escape sequence %s`, quoted[:4])
  145. return
  146. }
  147. buf.WriteByte(byte(n))
  148. quoted = quoted[4:]
  149. }
  150. }
  151. s = buf.String()
  152. return
  153. }
  154. // indexByte returns the index of the first instance of b in s, or else -1.
  155. func indexByte(s string, b byte) int {
  156. for i := 0; i < len(s); i++ {
  157. if s[i] == b {
  158. return i
  159. }
  160. }
  161. return -1
  162. }
  163. // hex is a list of the hexadecimal digits, for use in quoting.
  164. // We always print lower-case hexadecimal.
  165. const hex = "0123456789abcdef"
  166. // quote returns the quoted form of the string value "x".
  167. // If triple is true, quote uses the triple-quoted form """x""".
  168. func quote(unquoted string, triple bool) string {
  169. q := `"`
  170. if triple {
  171. q = `"""`
  172. }
  173. var buf bytes.Buffer
  174. buf.WriteString(q)
  175. for i := 0; i < len(unquoted); i++ {
  176. c := unquoted[i]
  177. if c == '"' && triple && (i+1 < len(unquoted) && unquoted[i+1] != '"' || i+2 < len(unquoted) && unquoted[i+2] != '"') {
  178. // Can pass up to two quotes through, because they are followed by a non-quote byte.
  179. buf.WriteByte(c)
  180. if i+1 < len(unquoted) && unquoted[i+1] == '"' {
  181. buf.WriteByte(c)
  182. i++
  183. }
  184. continue
  185. }
  186. if triple && c == '\n' {
  187. // Can allow newline in triple-quoted string.
  188. buf.WriteByte(c)
  189. continue
  190. }
  191. if c == '\'' {
  192. // Can allow ' since we always use ".
  193. buf.WriteByte(c)
  194. continue
  195. }
  196. if c == '\\' {
  197. if i+1 < len(unquoted) && indexByte(notEsc, unquoted[i+1]) >= 0 {
  198. // Can pass \ through when followed by a byte that
  199. // known not to be a valid escape sequence and also
  200. // that does not trigger an escape sequence of its own.
  201. // Use this, because various BUILD files do.
  202. buf.WriteByte('\\')
  203. buf.WriteByte(unquoted[i+1])
  204. i++
  205. continue
  206. }
  207. }
  208. if esc[c] != 0 {
  209. buf.WriteByte('\\')
  210. buf.WriteByte(esc[c])
  211. continue
  212. }
  213. if c < 0x20 || c >= 0x80 {
  214. // BUILD files are supposed to be Latin-1, so escape all control and high bytes.
  215. // I'd prefer to use \x here, but Blaze does not implement
  216. // \x in quoted strings (b/7272572).
  217. buf.WriteByte('\\')
  218. buf.WriteByte(hex[c>>6]) // actually octal but reusing hex digits 0-7.
  219. buf.WriteByte(hex[(c>>3)&7])
  220. buf.WriteByte(hex[c&7])
  221. /*
  222. buf.WriteByte('\\')
  223. buf.WriteByte('x')
  224. buf.WriteByte(hex[c>>4])
  225. buf.WriteByte(hex[c&0xF])
  226. */
  227. continue
  228. }
  229. buf.WriteByte(c)
  230. continue
  231. }
  232. buf.WriteString(q)
  233. return buf.String()
  234. }