// Copyright 2020 CUE Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package literal import ( "strconv" "strings" "unicode/utf8" ) // Form defines how to quote a string or bytes literal. type Form struct { hashCount int quote byte multiline bool auto bool exact bool asciiOnly bool graphicOnly bool indent string tripleQuote string } // TODO: // - Fixed or max level of escape modifiers (#""#). // - Option to fall back to bytes if value cannot be represented as string. // E.g. ExactString. // - QuoteExact that fails with an error if a string cannot be represented // without loss. // - Handle auto-breaking for long lines (Swift-style, \-terminated lines). // This is not supported yet in CUE, but may, and should be considered as // a possibility in API design. // - Other possible convenience forms: Blob (auto-break bytes), String (bytes // or string), Label. // WithTabIndent returns a new Form with indentation set to the given number // of tabs. The result will be a multiline string. func (f Form) WithTabIndent(n int) Form { f.indent = tabs(n) f.multiline = true return f } const tabIndent = "\t\t\t\t\t\t\t\t\t\t\t\t" func tabs(n int) string { if n < len(tabIndent) { return tabIndent[:n] } return strings.Repeat("\t", n) } // WithOptionalIndent is like WithTabIndent, but only returns a multiline // strings if it doesn't contain any newline characters. func (f Form) WithOptionalTabIndent(tabs int) Form { if tabs < len(tabIndent) { f.indent = tabIndent[:tabs] } else { f.indent = strings.Repeat("\t", tabs) } f.auto = true return f } // WithASCIIOnly ensures the quoted strings consists solely of valid ASCII // characters. func (f Form) WithASCIIOnly() Form { f.asciiOnly = true return f } // WithGraphicOnly ensures the quoted strings consists solely of printable // characters. func (f Form) WithGraphicOnly() Form { f.graphicOnly = true return f } var ( // String defines the format of a CUE string. Conversions may be lossy. String Form = stringForm // TODO: ExactString: quotes to bytes type if the string cannot be // represented without loss of accuracy. // Label is like Text, but optimized for labels. Label Form = stringForm // Bytes defines the format of bytes literal. Bytes Form = bytesForm stringForm = Form{ quote: '"', tripleQuote: `"""`, } bytesForm = Form{ quote: '\'', tripleQuote: `'''`, exact: true, } ) // Quote returns CUE string literal representing s. The returned string uses CUE // escape sequences (\t, \n, \u00FF, \u0100) for control characters and // non-printable characters as defined by strconv.IsPrint. // // It reports an error if the string cannot be converted to the desired form. func (f Form) Quote(s string) string { return string(f.Append(make([]byte, 0, 3*len(s)/2), s)) } const ( lowerhex = "0123456789abcdef" ) // Append appends a CUE string literal representing s, as generated by Quote, to // buf and returns the extended buffer. func (f Form) Append(buf []byte, s string) []byte { if f.auto && strings.ContainsRune(s, '\n') { f.multiline = true } if f.multiline { f.hashCount = f.requiredHashCount(s) } // Often called with big strings, so preallocate. If there's quoting, // this is conservative but still helps a lot. if cap(buf)-len(buf) < len(s) { nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1) copy(nBuf, buf) buf = nBuf } for i := 0; i < f.hashCount; i++ { buf = append(buf, '#') } if f.multiline { buf = append(buf, f.quote, f.quote, f.quote, '\n') if s == "" { buf = append(buf, f.indent...) buf = append(buf, f.quote, f.quote, f.quote) return buf } if len(s) > 0 && s[0] != '\n' { buf = append(buf, f.indent...) } } else { buf = append(buf, f.quote) } buf = f.appendEscaped(buf, s) if f.multiline { buf = append(buf, '\n') buf = append(buf, f.indent...) buf = append(buf, f.quote, f.quote, f.quote) } else { buf = append(buf, f.quote) } for i := 0; i < f.hashCount; i++ { buf = append(buf, '#') } return buf } // AppendEscaped appends a CUE string literal representing s, as generated by // Quote but without the quotes, to buf and returns the extended buffer. // // It does not include the last indentation. func (f Form) AppendEscaped(buf []byte, s string) []byte { if f.auto && strings.ContainsRune(s, '\n') { f.multiline = true } // Often called with big strings, so preallocate. If there's quoting, // this is conservative but still helps a lot. if cap(buf)-len(buf) < len(s) { nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1) copy(nBuf, buf) buf = nBuf } buf = f.appendEscaped(buf, s) return buf } func (f Form) appendEscaped(buf []byte, s string) []byte { for width := 0; len(s) > 0; s = s[width:] { r := rune(s[0]) width = 1 if r >= utf8.RuneSelf { r, width = utf8.DecodeRuneInString(s) } if f.exact && width == 1 && r == utf8.RuneError { buf = append(buf, `\x`...) buf = append(buf, lowerhex[s[0]>>4]) buf = append(buf, lowerhex[s[0]&0xF]) continue } if f.multiline && r == '\n' { buf = append(buf, '\n') if len(s) > 1 && s[1] != '\n' { buf = append(buf, f.indent...) } continue } buf = f.appendEscapedRune(buf, r) } return buf } func (f *Form) appendEscapedRune(buf []byte, r rune) []byte { if (!f.multiline && r == rune(f.quote)) || r == '\\' { // always backslashed buf = f.appendEscape(buf) buf = append(buf, byte(r)) return buf } if f.asciiOnly { if r < utf8.RuneSelf && strconv.IsPrint(r) { buf = append(buf, byte(r)) return buf } } else if strconv.IsPrint(r) || f.graphicOnly && isInGraphicList(r) { buf = utf8.AppendRune(buf, r) return buf } buf = f.appendEscape(buf) switch r { case '\a': buf = append(buf, 'a') case '\b': buf = append(buf, 'b') case '\f': buf = append(buf, 'f') case '\n': buf = append(buf, 'n') case '\r': buf = append(buf, 'r') case '\t': buf = append(buf, 't') case '\v': buf = append(buf, 'v') default: switch { case r < ' ' && f.exact: buf = append(buf, 'x') buf = append(buf, lowerhex[byte(r)>>4]) buf = append(buf, lowerhex[byte(r)&0xF]) case r > utf8.MaxRune: r = 0xFFFD fallthrough case r < 0x10000: buf = append(buf, 'u') for s := 12; s >= 0; s -= 4 { buf = append(buf, lowerhex[r>>uint(s)&0xF]) } default: buf = append(buf, 'U') for s := 28; s >= 0; s -= 4 { buf = append(buf, lowerhex[r>>uint(s)&0xF]) } } } return buf } func (f *Form) appendEscape(buf []byte) []byte { buf = append(buf, '\\') for i := 0; i < f.hashCount; i++ { buf = append(buf, '#') } return buf } // requiredHashCount returns the number of # characters // that are required to quote the multiline string s. func (f *Form) requiredHashCount(s string) int { hashCount := 0 i := 0 // Find all occurrences of the triple-quote and count // the maximum number of succeeding # characters. for { j := strings.Index(s[i:], f.tripleQuote) if j == -1 { break } i += j + 3 // Absorb all extra quotes, so we // get to the end of the sequence. for ; i < len(s); i++ { if s[i] != f.quote { break } } e := i - 1 // Count succeeding # characters. for ; i < len(s); i++ { if s[i] != '#' { break } } if nhash := i - e; nhash > hashCount { hashCount = nhash } } return hashCount } // isInGraphicList reports whether the rune is in the isGraphic list. This separation // from IsGraphic allows quoteWith to avoid two calls to IsPrint. // Should be called only if IsPrint fails. func isInGraphicList(r rune) bool { // We know r must fit in 16 bits - see makeisprint.go. if r > 0xFFFF { return false } rr := uint16(r) i := bsearch16(isGraphic, rr) return i < len(isGraphic) && rr == isGraphic[i] } // bsearch16 returns the smallest i such that a[i] >= x. // If there is no such i, bsearch16 returns len(a). func bsearch16(a []uint16, x uint16) int { i, j := 0, len(a) for i < j { h := i + (j-i)/2 if a[h] < x { i = h + 1 } else { j = h } } return i } // isGraphic lists the graphic runes not matched by IsPrint. var isGraphic = []uint16{ 0x00a0, 0x1680, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 0x202f, 0x205f, 0x3000, }