...

Source file src/github.com/aws/smithy-go/encoding/xml/escape.go

Documentation: github.com/aws/smithy-go/encoding/xml

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Copied and modified from Go 1.14 stdlib's encoding/xml
     6  
     7  package xml
     8  
     9  import (
    10  	"unicode/utf8"
    11  )
    12  
    13  // Copied from Go 1.14 stdlib's encoding/xml
    14  var (
    15  	escQuot = []byte(""") // shorter than """
    16  	escApos = []byte("'") // shorter than "'"
    17  	escAmp  = []byte("&")
    18  	escLT   = []byte("<")
    19  	escGT   = []byte(">")
    20  	escTab  = []byte("	")
    21  	escNL   = []byte("
")
    22  	escCR   = []byte("
")
    23  	escFFFD = []byte("\uFFFD") // Unicode replacement character
    24  
    25  	// Additional Escapes
    26  	escNextLine = []byte("…")
    27  	escLS       = []byte("
")
    28  )
    29  
    30  // Decide whether the given rune is in the XML Character Range, per
    31  // the Char production of https://www.xml.com/axml/testaxml.htm,
    32  // Section 2.2 Characters.
    33  func isInCharacterRange(r rune) (inrange bool) {
    34  	return r == 0x09 ||
    35  		r == 0x0A ||
    36  		r == 0x0D ||
    37  		r >= 0x20 && r <= 0xD7FF ||
    38  		r >= 0xE000 && r <= 0xFFFD ||
    39  		r >= 0x10000 && r <= 0x10FFFF
    40  }
    41  
    42  // TODO: When do we need to escape the string?
    43  // Based on encoding/xml escapeString from the Go Standard Library.
    44  // https://golang.org/src/encoding/xml/xml.go
    45  func escapeString(e writer, s string) {
    46  	var esc []byte
    47  	last := 0
    48  	for i := 0; i < len(s); {
    49  		r, width := utf8.DecodeRuneInString(s[i:])
    50  		i += width
    51  		switch r {
    52  		case '"':
    53  			esc = escQuot
    54  		case '\'':
    55  			esc = escApos
    56  		case '&':
    57  			esc = escAmp
    58  		case '<':
    59  			esc = escLT
    60  		case '>':
    61  			esc = escGT
    62  		case '\t':
    63  			esc = escTab
    64  		case '\n':
    65  			esc = escNL
    66  		case '\r':
    67  			esc = escCR
    68  		case '\u0085':
    69  			// Not escaped by stdlib
    70  			esc = escNextLine
    71  		case '\u2028':
    72  			// Not escaped by stdlib
    73  			esc = escLS
    74  		default:
    75  			if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
    76  				esc = escFFFD
    77  				break
    78  			}
    79  			continue
    80  		}
    81  		e.WriteString(s[last : i-width])
    82  		e.Write(esc)
    83  		last = i
    84  	}
    85  	e.WriteString(s[last:])
    86  }
    87  
    88  // escapeText writes to w the properly escaped XML equivalent
    89  // of the plain text data s. If escapeNewline is true, newline
    90  // characters will be escaped.
    91  //
    92  // Based on encoding/xml escapeText from the Go Standard Library.
    93  // https://golang.org/src/encoding/xml/xml.go
    94  func escapeText(e writer, s []byte) {
    95  	var esc []byte
    96  	last := 0
    97  	for i := 0; i < len(s); {
    98  		r, width := utf8.DecodeRune(s[i:])
    99  		i += width
   100  		switch r {
   101  		case '"':
   102  			esc = escQuot
   103  		case '\'':
   104  			esc = escApos
   105  		case '&':
   106  			esc = escAmp
   107  		case '<':
   108  			esc = escLT
   109  		case '>':
   110  			esc = escGT
   111  		case '\t':
   112  			esc = escTab
   113  		case '\n':
   114  			// This always escapes newline, which is different than stdlib's optional
   115  			// escape of new line.
   116  			esc = escNL
   117  		case '\r':
   118  			esc = escCR
   119  		case '\u0085':
   120  			// Not escaped by stdlib
   121  			esc = escNextLine
   122  		case '\u2028':
   123  			// Not escaped by stdlib
   124  			esc = escLS
   125  		default:
   126  			if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
   127  				esc = escFFFD
   128  				break
   129  			}
   130  			continue
   131  		}
   132  		e.Write(s[last : i-width])
   133  		e.Write(esc)
   134  		last = i
   135  	}
   136  	e.Write(s[last:])
   137  }
   138  

View as plain text