...

Source file src/cuelang.org/go/cue/literal/quote.go

Documentation: cuelang.org/go/cue/literal

     1  // Copyright 2020 CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package literal
    16  
    17  import (
    18  	"strconv"
    19  	"strings"
    20  	"unicode/utf8"
    21  )
    22  
    23  // Form defines how to quote a string or bytes literal.
    24  type Form struct {
    25  	hashCount   int
    26  	quote       byte
    27  	multiline   bool
    28  	auto        bool
    29  	exact       bool
    30  	asciiOnly   bool
    31  	graphicOnly bool
    32  	indent      string
    33  	tripleQuote string
    34  }
    35  
    36  // TODO:
    37  // - Fixed or max level of escape modifiers (#""#).
    38  // - Option to fall back to bytes if value cannot be represented as string.
    39  //   E.g. ExactString.
    40  // - QuoteExact that fails with an error if a string cannot be represented
    41  //   without loss.
    42  // - Handle auto-breaking for long lines (Swift-style, \-terminated lines).
    43  //   This is not supported yet in CUE, but may, and should be considered as
    44  //   a possibility in API design.
    45  // - Other possible convenience forms: Blob (auto-break bytes), String (bytes
    46  //   or string), Label.
    47  
    48  // WithTabIndent returns a new Form with indentation set to the given number
    49  // of tabs. The result will be a multiline string.
    50  func (f Form) WithTabIndent(n int) Form {
    51  	f.indent = tabs(n)
    52  	f.multiline = true
    53  	return f
    54  }
    55  
    56  const tabIndent = "\t\t\t\t\t\t\t\t\t\t\t\t"
    57  
    58  func tabs(n int) string {
    59  	if n < len(tabIndent) {
    60  		return tabIndent[:n]
    61  	}
    62  	return strings.Repeat("\t", n)
    63  }
    64  
    65  // WithOptionalIndent is like WithTabIndent, but only returns a multiline
    66  // strings if it doesn't contain any newline characters.
    67  func (f Form) WithOptionalTabIndent(tabs int) Form {
    68  	if tabs < len(tabIndent) {
    69  		f.indent = tabIndent[:tabs]
    70  	} else {
    71  		f.indent = strings.Repeat("\t", tabs)
    72  	}
    73  	f.auto = true
    74  	return f
    75  }
    76  
    77  // WithASCIIOnly ensures the quoted strings consists solely of valid ASCII
    78  // characters.
    79  func (f Form) WithASCIIOnly() Form {
    80  	f.asciiOnly = true
    81  	return f
    82  }
    83  
    84  // WithGraphicOnly ensures the quoted strings consists solely of printable
    85  // characters.
    86  func (f Form) WithGraphicOnly() Form {
    87  	f.graphicOnly = true
    88  	return f
    89  }
    90  
    91  var (
    92  	// String defines the format of a CUE string. Conversions may be lossy.
    93  	String Form = stringForm
    94  
    95  	// TODO: ExactString: quotes to bytes type if the string cannot be
    96  	// represented without loss of accuracy.
    97  
    98  	// Label is like Text, but optimized for labels.
    99  	Label Form = stringForm
   100  
   101  	// Bytes defines the format of bytes literal.
   102  	Bytes Form = bytesForm
   103  
   104  	stringForm = Form{
   105  		quote:       '"',
   106  		tripleQuote: `"""`,
   107  	}
   108  	bytesForm = Form{
   109  		quote:       '\'',
   110  		tripleQuote: `'''`,
   111  		exact:       true,
   112  	}
   113  )
   114  
   115  // Quote returns CUE string literal representing s. The returned string uses CUE
   116  // escape sequences (\t, \n, \u00FF, \u0100) for control characters and
   117  // non-printable characters as defined by strconv.IsPrint.
   118  //
   119  // It reports an error if the string cannot be converted to the desired form.
   120  func (f Form) Quote(s string) string {
   121  	return string(f.Append(make([]byte, 0, 3*len(s)/2), s))
   122  }
   123  
   124  const (
   125  	lowerhex = "0123456789abcdef"
   126  )
   127  
   128  // Append appends a CUE string literal representing s, as generated by Quote, to
   129  // buf and returns the extended buffer.
   130  func (f Form) Append(buf []byte, s string) []byte {
   131  	if f.auto && strings.ContainsRune(s, '\n') {
   132  		f.multiline = true
   133  	}
   134  	if f.multiline {
   135  		f.hashCount = f.requiredHashCount(s)
   136  	}
   137  
   138  	// Often called with big strings, so preallocate. If there's quoting,
   139  	// this is conservative but still helps a lot.
   140  	if cap(buf)-len(buf) < len(s) {
   141  		nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1)
   142  		copy(nBuf, buf)
   143  		buf = nBuf
   144  	}
   145  	for i := 0; i < f.hashCount; i++ {
   146  		buf = append(buf, '#')
   147  	}
   148  	if f.multiline {
   149  		buf = append(buf, f.quote, f.quote, f.quote, '\n')
   150  		if s == "" {
   151  			buf = append(buf, f.indent...)
   152  			buf = append(buf, f.quote, f.quote, f.quote)
   153  			return buf
   154  		}
   155  		if len(s) > 0 && s[0] != '\n' {
   156  			buf = append(buf, f.indent...)
   157  		}
   158  	} else {
   159  		buf = append(buf, f.quote)
   160  	}
   161  
   162  	buf = f.appendEscaped(buf, s)
   163  
   164  	if f.multiline {
   165  		buf = append(buf, '\n')
   166  		buf = append(buf, f.indent...)
   167  		buf = append(buf, f.quote, f.quote, f.quote)
   168  	} else {
   169  		buf = append(buf, f.quote)
   170  	}
   171  	for i := 0; i < f.hashCount; i++ {
   172  		buf = append(buf, '#')
   173  	}
   174  
   175  	return buf
   176  }
   177  
   178  // AppendEscaped appends a CUE string literal representing s, as generated by
   179  // Quote but without the quotes, to buf and returns the extended buffer.
   180  //
   181  // It does not include the last indentation.
   182  func (f Form) AppendEscaped(buf []byte, s string) []byte {
   183  	if f.auto && strings.ContainsRune(s, '\n') {
   184  		f.multiline = true
   185  	}
   186  
   187  	// Often called with big strings, so preallocate. If there's quoting,
   188  	// this is conservative but still helps a lot.
   189  	if cap(buf)-len(buf) < len(s) {
   190  		nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1)
   191  		copy(nBuf, buf)
   192  		buf = nBuf
   193  	}
   194  
   195  	buf = f.appendEscaped(buf, s)
   196  
   197  	return buf
   198  }
   199  
   200  func (f Form) appendEscaped(buf []byte, s string) []byte {
   201  	for width := 0; len(s) > 0; s = s[width:] {
   202  		r := rune(s[0])
   203  		width = 1
   204  		if r >= utf8.RuneSelf {
   205  			r, width = utf8.DecodeRuneInString(s)
   206  		}
   207  		if f.exact && width == 1 && r == utf8.RuneError {
   208  			buf = append(buf, `\x`...)
   209  			buf = append(buf, lowerhex[s[0]>>4])
   210  			buf = append(buf, lowerhex[s[0]&0xF])
   211  			continue
   212  		}
   213  		if f.multiline && r == '\n' {
   214  			buf = append(buf, '\n')
   215  			if len(s) > 1 && s[1] != '\n' {
   216  				buf = append(buf, f.indent...)
   217  			}
   218  			continue
   219  		}
   220  		buf = f.appendEscapedRune(buf, r)
   221  	}
   222  	return buf
   223  }
   224  
   225  func (f *Form) appendEscapedRune(buf []byte, r rune) []byte {
   226  	if (!f.multiline && r == rune(f.quote)) || r == '\\' { // always backslashed
   227  		buf = f.appendEscape(buf)
   228  		buf = append(buf, byte(r))
   229  		return buf
   230  	}
   231  	if f.asciiOnly {
   232  		if r < utf8.RuneSelf && strconv.IsPrint(r) {
   233  			buf = append(buf, byte(r))
   234  			return buf
   235  		}
   236  	} else if strconv.IsPrint(r) || f.graphicOnly && isInGraphicList(r) {
   237  		buf = utf8.AppendRune(buf, r)
   238  		return buf
   239  	}
   240  	buf = f.appendEscape(buf)
   241  	switch r {
   242  	case '\a':
   243  		buf = append(buf, 'a')
   244  	case '\b':
   245  		buf = append(buf, 'b')
   246  	case '\f':
   247  		buf = append(buf, 'f')
   248  	case '\n':
   249  		buf = append(buf, 'n')
   250  	case '\r':
   251  		buf = append(buf, 'r')
   252  	case '\t':
   253  		buf = append(buf, 't')
   254  	case '\v':
   255  		buf = append(buf, 'v')
   256  	default:
   257  		switch {
   258  		case r < ' ' && f.exact:
   259  			buf = append(buf, 'x')
   260  			buf = append(buf, lowerhex[byte(r)>>4])
   261  			buf = append(buf, lowerhex[byte(r)&0xF])
   262  		case r > utf8.MaxRune:
   263  			r = 0xFFFD
   264  			fallthrough
   265  		case r < 0x10000:
   266  			buf = append(buf, 'u')
   267  			for s := 12; s >= 0; s -= 4 {
   268  				buf = append(buf, lowerhex[r>>uint(s)&0xF])
   269  			}
   270  		default:
   271  			buf = append(buf, 'U')
   272  			for s := 28; s >= 0; s -= 4 {
   273  				buf = append(buf, lowerhex[r>>uint(s)&0xF])
   274  			}
   275  		}
   276  	}
   277  	return buf
   278  }
   279  
   280  func (f *Form) appendEscape(buf []byte) []byte {
   281  	buf = append(buf, '\\')
   282  	for i := 0; i < f.hashCount; i++ {
   283  		buf = append(buf, '#')
   284  	}
   285  	return buf
   286  }
   287  
   288  // requiredHashCount returns the number of # characters
   289  // that are required to quote the multiline string s.
   290  func (f *Form) requiredHashCount(s string) int {
   291  	hashCount := 0
   292  	i := 0
   293  	// Find all occurrences of the triple-quote and count
   294  	// the maximum number of succeeding # characters.
   295  	for {
   296  		j := strings.Index(s[i:], f.tripleQuote)
   297  		if j == -1 {
   298  			break
   299  		}
   300  		i += j + 3
   301  		// Absorb all extra quotes, so we
   302  		// get to the end of the sequence.
   303  		for ; i < len(s); i++ {
   304  			if s[i] != f.quote {
   305  				break
   306  			}
   307  		}
   308  		e := i - 1
   309  		// Count succeeding # characters.
   310  		for ; i < len(s); i++ {
   311  			if s[i] != '#' {
   312  				break
   313  			}
   314  		}
   315  		if nhash := i - e; nhash > hashCount {
   316  			hashCount = nhash
   317  		}
   318  	}
   319  	return hashCount
   320  }
   321  
   322  // isInGraphicList reports whether the rune is in the isGraphic list. This separation
   323  // from IsGraphic allows quoteWith to avoid two calls to IsPrint.
   324  // Should be called only if IsPrint fails.
   325  func isInGraphicList(r rune) bool {
   326  	// We know r must fit in 16 bits - see makeisprint.go.
   327  	if r > 0xFFFF {
   328  		return false
   329  	}
   330  	rr := uint16(r)
   331  	i := bsearch16(isGraphic, rr)
   332  	return i < len(isGraphic) && rr == isGraphic[i]
   333  }
   334  
   335  // bsearch16 returns the smallest i such that a[i] >= x.
   336  // If there is no such i, bsearch16 returns len(a).
   337  func bsearch16(a []uint16, x uint16) int {
   338  	i, j := 0, len(a)
   339  	for i < j {
   340  		h := i + (j-i)/2
   341  		if a[h] < x {
   342  			i = h + 1
   343  		} else {
   344  			j = h
   345  		}
   346  	}
   347  	return i
   348  }
   349  
   350  // isGraphic lists the graphic runes not matched by IsPrint.
   351  var isGraphic = []uint16{
   352  	0x00a0,
   353  	0x1680,
   354  	0x2000,
   355  	0x2001,
   356  	0x2002,
   357  	0x2003,
   358  	0x2004,
   359  	0x2005,
   360  	0x2006,
   361  	0x2007,
   362  	0x2008,
   363  	0x2009,
   364  	0x200a,
   365  	0x202f,
   366  	0x205f,
   367  	0x3000,
   368  }
   369  

View as plain text