123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262 |
- /*
- Copyright 2016 Google Inc. All Rights Reserved.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
- // Python quoted strings.
- package build
- import (
- "bytes"
- "fmt"
- "strconv"
- "strings"
- )
- // unesc maps single-letter chars following \ to their actual values.
- var unesc = [256]byte{
- 'a': '\a',
- 'b': '\b',
- 'f': '\f',
- 'n': '\n',
- 'r': '\r',
- 't': '\t',
- 'v': '\v',
- '\\': '\\',
- '\'': '\'',
- '"': '"',
- }
- // esc maps escape-worthy bytes to the char that should follow \.
- var esc = [256]byte{
- '\a': 'a',
- '\b': 'b',
- '\f': 'f',
- '\n': 'n',
- '\r': 'r',
- '\t': 't',
- '\v': 'v',
- '\\': '\\',
- '\'': '\'',
- '"': '"',
- }
- // notEsc is a list of characters that can follow a \ in a string value
- // without having to escape the \. That is, since ( is in this list, we
- // quote the Go string "foo\\(bar" as the Python literal "foo\(bar".
- // This really does happen in BUILD files, especially in strings
- // being used as shell arguments containing regular expressions.
- const notEsc = " !#$%&()*+,-./:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~"
- // unquote unquotes the quoted string, returning the actual
- // string value, whether the original was triple-quoted, and
- // an error describing invalid input.
- func unquote(quoted string) (s string, triple bool, err error) {
- // Check for raw prefix: means don't interpret the inner \.
- raw := false
- if strings.HasPrefix(quoted, "r") {
- raw = true
- quoted = quoted[1:]
- }
- if len(quoted) < 2 {
- err = fmt.Errorf("string literal too short")
- return
- }
- if quoted[0] != '"' && quoted[0] != '\'' || quoted[0] != quoted[len(quoted)-1] {
- err = fmt.Errorf("string literal has invalid quotes")
- }
- // Check for triple quoted string.
- quote := quoted[0]
- if len(quoted) >= 6 && quoted[1] == quote && quoted[2] == quote && quoted[:3] == quoted[len(quoted)-3:] {
- triple = true
- quoted = quoted[3 : len(quoted)-3]
- } else {
- quoted = quoted[1 : len(quoted)-1]
- }
- // Now quoted is the quoted data, but no quotes.
- // If we're in raw mode or there are no escapes, we're done.
- if raw || !strings.Contains(quoted, `\`) {
- s = quoted
- return
- }
- // Otherwise process quoted string.
- // Each iteration processes one escape sequence along with the
- // plain text leading up to it.
- var buf bytes.Buffer
- for {
- // Remove prefix before escape sequence.
- i := strings.Index(quoted, `\`)
- if i < 0 {
- i = len(quoted)
- }
- buf.WriteString(quoted[:i])
- quoted = quoted[i:]
- if len(quoted) == 0 {
- break
- }
- // Process escape sequence.
- if len(quoted) == 1 {
- err = fmt.Errorf(`truncated escape sequence \`)
- return
- }
- switch quoted[1] {
- default:
- // In Python, if \z (for some byte z) is not a known escape sequence
- // then it appears as literal text in the string.
- buf.WriteString(quoted[:2])
- quoted = quoted[2:]
- case '\n':
- // Ignore the escape and the line break.
- quoted = quoted[2:]
- case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '\'', '"':
- // One-char escape
- buf.WriteByte(unesc[quoted[1]])
- quoted = quoted[2:]
- case '0', '1', '2', '3', '4', '5', '6', '7':
- // Octal escape, up to 3 digits.
- n := int(quoted[1] - '0')
- quoted = quoted[2:]
- for i := 1; i < 3; i++ {
- if len(quoted) == 0 || quoted[0] < '0' || '7' < quoted[0] {
- break
- }
- n = n*8 + int(quoted[0]-'0')
- quoted = quoted[1:]
- }
- if n >= 256 {
- // NOTE: Python silently discards the high bit,
- // so that '\541' == '\141' == 'a'.
- // Let's see if we can avoid doing that in BUILD files.
- err = fmt.Errorf(`invalid escape sequence \%03o`, n)
- return
- }
- buf.WriteByte(byte(n))
- case 'x':
- // Hexadecimal escape, exactly 2 digits.
- if len(quoted) < 4 {
- err = fmt.Errorf(`truncated escape sequence %s`, quoted)
- return
- }
- n, err1 := strconv.ParseInt(quoted[2:4], 16, 0)
- if err1 != nil {
- err = fmt.Errorf(`invalid escape sequence %s`, quoted[:4])
- return
- }
- buf.WriteByte(byte(n))
- quoted = quoted[4:]
- }
- }
- s = buf.String()
- return
- }
- // indexByte returns the index of the first instance of b in s, or else -1.
- func indexByte(s string, b byte) int {
- for i := 0; i < len(s); i++ {
- if s[i] == b {
- return i
- }
- }
- return -1
- }
- // hex is a list of the hexadecimal digits, for use in quoting.
- // We always print lower-case hexadecimal.
- const hex = "0123456789abcdef"
- // quote returns the quoted form of the string value "x".
- // If triple is true, quote uses the triple-quoted form """x""".
- func quote(unquoted string, triple bool) string {
- q := `"`
- if triple {
- q = `"""`
- }
- var buf bytes.Buffer
- buf.WriteString(q)
- for i := 0; i < len(unquoted); i++ {
- c := unquoted[i]
- if c == '"' && triple && (i+1 < len(unquoted) && unquoted[i+1] != '"' || i+2 < len(unquoted) && unquoted[i+2] != '"') {
- // Can pass up to two quotes through, because they are followed by a non-quote byte.
- buf.WriteByte(c)
- if i+1 < len(unquoted) && unquoted[i+1] == '"' {
- buf.WriteByte(c)
- i++
- }
- continue
- }
- if triple && c == '\n' {
- // Can allow newline in triple-quoted string.
- buf.WriteByte(c)
- continue
- }
- if c == '\'' {
- // Can allow ' since we always use ".
- buf.WriteByte(c)
- continue
- }
- if c == '\\' {
- if i+1 < len(unquoted) && indexByte(notEsc, unquoted[i+1]) >= 0 {
- // Can pass \ through when followed by a byte that
- // known not to be a valid escape sequence and also
- // that does not trigger an escape sequence of its own.
- // Use this, because various BUILD files do.
- buf.WriteByte('\\')
- buf.WriteByte(unquoted[i+1])
- i++
- continue
- }
- }
- if esc[c] != 0 {
- buf.WriteByte('\\')
- buf.WriteByte(esc[c])
- continue
- }
- if c < 0x20 || c >= 0x80 {
- // BUILD files are supposed to be Latin-1, so escape all control and high bytes.
- // I'd prefer to use \x here, but Blaze does not implement
- // \x in quoted strings (b/7272572).
- buf.WriteByte('\\')
- buf.WriteByte(hex[c>>6]) // actually octal but reusing hex digits 0-7.
- buf.WriteByte(hex[(c>>3)&7])
- buf.WriteByte(hex[c&7])
- /*
- buf.WriteByte('\\')
- buf.WriteByte('x')
- buf.WriteByte(hex[c>>4])
- buf.WriteByte(hex[c&0xF])
- */
- continue
- }
- buf.WriteByte(c)
- continue
- }
- buf.WriteString(q)
- return buf.String()
- }
|