...

Source file src/github.com/cli/go-gh/v2/pkg/asciisanitizer/sanitizer.go

Documentation: github.com/cli/go-gh/v2/pkg/asciisanitizer

     1  // Package asciisanitizer implements an ASCII control character sanitizer for UTF-8 strings.
     2  // It will transform ASCII control codes into equivalent inert characters that are safe for display in the terminal.
     3  // Without sanitization these ASCII control characters will be interpreted by the terminal.
     4  // This behaviour can be used maliciously as an attack vector, especially the ASCII control characters \x1B and \x9B.
     5  package asciisanitizer
     6  
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"strings"
    11  	"unicode"
    12  	"unicode/utf8"
    13  
    14  	"golang.org/x/text/transform"
    15  )
    16  
    17  // Sanitizer implements transform.Transformer interface.
    18  type Sanitizer struct {
    19  	// JSON tells the Sanitizer to replace strings that will be transformed
    20  	// into control characters when the string is marshaled to JSON. Set to
    21  	// true if the string being sanitized represents JSON formatted data.
    22  	JSON      bool
    23  	addEscape bool
    24  }
    25  
    26  // Transform uses a sliding window algorithm to detect C0 and C1 control characters as they are read and replaces
    27  // them with equivalent inert characters. Bytes that are not part of a control character are not modified.
    28  func (t *Sanitizer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
    29  	transfer := func(write, read []byte) error {
    30  		readLength := len(read)
    31  		writeLength := len(write)
    32  		if writeLength > len(dst) {
    33  			return transform.ErrShortDst
    34  		}
    35  		copy(dst, write)
    36  		nDst += writeLength
    37  		dst = dst[writeLength:]
    38  		nSrc += readLength
    39  		src = src[readLength:]
    40  		return nil
    41  	}
    42  
    43  	for len(src) > 0 {
    44  		// When sanitizing JSON strings make sure that we have 6 bytes if available.
    45  		if t.JSON && len(src) < 6 && !atEOF {
    46  			err = transform.ErrShortSrc
    47  			return
    48  		}
    49  		r, size := utf8.DecodeRune(src)
    50  		if r == utf8.RuneError && size < 2 {
    51  			if !atEOF {
    52  				err = transform.ErrShortSrc
    53  				return
    54  			} else {
    55  				err = errors.New("invalid UTF-8 string")
    56  				return
    57  			}
    58  		}
    59  		// Replace C0 and C1 control characters.
    60  		if unicode.IsControl(r) {
    61  			if repl, found := mapControlToCaret(r); found {
    62  				err = transfer(repl, src[:size])
    63  				if err != nil {
    64  					return
    65  				}
    66  				continue
    67  			}
    68  		}
    69  		// Replace JSON C0 and C1 control characters.
    70  		if t.JSON && len(src) >= 6 {
    71  			if repl, found := mapJSONControlToCaret(src[:6]); found {
    72  				if t.addEscape {
    73  					// Add an escape character when necessary to prevent creating
    74  					// invalid JSON with our replacements.
    75  					repl = append([]byte{'\\'}, repl...)
    76  					t.addEscape = false
    77  				}
    78  				err = transfer(repl, src[:6])
    79  				if err != nil {
    80  					return
    81  				}
    82  				continue
    83  			}
    84  		}
    85  		err = transfer(src[:size], src[:size])
    86  		if err != nil {
    87  			return
    88  		}
    89  		if t.JSON {
    90  			if r == '\\' {
    91  				t.addEscape = !t.addEscape
    92  			} else {
    93  				t.addEscape = false
    94  			}
    95  		}
    96  	}
    97  	return
    98  }
    99  
   100  // Reset resets the state and allows the Sanitizer to be reused.
   101  func (t *Sanitizer) Reset() {
   102  	t.addEscape = false
   103  }
   104  
   105  // mapControlToCaret maps C0 and C1 control characters to their caret notation.
   106  func mapControlToCaret(r rune) ([]byte, bool) {
   107  	//\t (09), \n (10), \v (11), \r (13) are safe C0 characters and are not sanitized.
   108  	m := map[rune]string{
   109  		0:   `^@`,
   110  		1:   `^A`,
   111  		2:   `^B`,
   112  		3:   `^C`,
   113  		4:   `^D`,
   114  		5:   `^E`,
   115  		6:   `^F`,
   116  		7:   `^G`,
   117  		8:   `^H`,
   118  		12:  `^L`,
   119  		14:  `^N`,
   120  		15:  `^O`,
   121  		16:  `^P`,
   122  		17:  `^Q`,
   123  		18:  `^R`,
   124  		19:  `^S`,
   125  		20:  `^T`,
   126  		21:  `^U`,
   127  		22:  `^V`,
   128  		23:  `^W`,
   129  		24:  `^X`,
   130  		25:  `^Y`,
   131  		26:  `^Z`,
   132  		27:  `^[`,
   133  		28:  `^\\`,
   134  		29:  `^]`,
   135  		30:  `^^`,
   136  		31:  `^_`,
   137  		128: `^@`,
   138  		129: `^A`,
   139  		130: `^B`,
   140  		131: `^C`,
   141  		132: `^D`,
   142  		133: `^E`,
   143  		134: `^F`,
   144  		135: `^G`,
   145  		136: `^H`,
   146  		137: `^I`,
   147  		138: `^J`,
   148  		139: `^K`,
   149  		140: `^L`,
   150  		141: `^M`,
   151  		142: `^N`,
   152  		143: `^O`,
   153  		144: `^P`,
   154  		145: `^Q`,
   155  		146: `^R`,
   156  		147: `^S`,
   157  		148: `^T`,
   158  		149: `^U`,
   159  		150: `^V`,
   160  		151: `^W`,
   161  		152: `^X`,
   162  		153: `^Y`,
   163  		154: `^Z`,
   164  		155: `^[`,
   165  		156: `^\\`,
   166  		157: `^]`,
   167  		158: `^^`,
   168  		159: `^_`,
   169  	}
   170  	if c, ok := m[r]; ok {
   171  		return []byte(c), true
   172  	}
   173  	return nil, false
   174  }
   175  
   176  // mapJSONControlToCaret maps JSON C0 and C1 control characters to their caret notation.
   177  // JSON control characters are six byte strings, representing a unicode code point,
   178  // ranging from \u0000 to \u001F and \u0080 to \u009F.
   179  func mapJSONControlToCaret(b []byte) ([]byte, bool) {
   180  	if len(b) != 6 {
   181  		return nil, false
   182  	}
   183  	if !bytes.HasPrefix(b, []byte(`\u00`)) {
   184  		return nil, false
   185  	}
   186  	//\t (\u0009), \n (\u000a), \v (\u000b), \r (\u000d) are safe C0 characters and are not sanitized.
   187  	m := map[string]string{
   188  		`\u0000`: `^@`,
   189  		`\u0001`: `^A`,
   190  		`\u0002`: `^B`,
   191  		`\u0003`: `^C`,
   192  		`\u0004`: `^D`,
   193  		`\u0005`: `^E`,
   194  		`\u0006`: `^F`,
   195  		`\u0007`: `^G`,
   196  		`\u0008`: `^H`,
   197  		`\u000c`: `^L`,
   198  		`\u000e`: `^N`,
   199  		`\u000f`: `^O`,
   200  		`\u0010`: `^P`,
   201  		`\u0011`: `^Q`,
   202  		`\u0012`: `^R`,
   203  		`\u0013`: `^S`,
   204  		`\u0014`: `^T`,
   205  		`\u0015`: `^U`,
   206  		`\u0016`: `^V`,
   207  		`\u0017`: `^W`,
   208  		`\u0018`: `^X`,
   209  		`\u0019`: `^Y`,
   210  		`\u001a`: `^Z`,
   211  		`\u001b`: `^[`,
   212  		`\u001c`: `^\\`,
   213  		`\u001d`: `^]`,
   214  		`\u001e`: `^^`,
   215  		`\u001f`: `^_`,
   216  		`\u0080`: `^@`,
   217  		`\u0081`: `^A`,
   218  		`\u0082`: `^B`,
   219  		`\u0083`: `^C`,
   220  		`\u0084`: `^D`,
   221  		`\u0085`: `^E`,
   222  		`\u0086`: `^F`,
   223  		`\u0087`: `^G`,
   224  		`\u0088`: `^H`,
   225  		`\u0089`: `^I`,
   226  		`\u008a`: `^J`,
   227  		`\u008b`: `^K`,
   228  		`\u008c`: `^L`,
   229  		`\u008d`: `^M`,
   230  		`\u008e`: `^N`,
   231  		`\u008f`: `^O`,
   232  		`\u0090`: `^P`,
   233  		`\u0091`: `^Q`,
   234  		`\u0092`: `^R`,
   235  		`\u0093`: `^S`,
   236  		`\u0094`: `^T`,
   237  		`\u0095`: `^U`,
   238  		`\u0096`: `^V`,
   239  		`\u0097`: `^W`,
   240  		`\u0098`: `^X`,
   241  		`\u0099`: `^Y`,
   242  		`\u009a`: `^Z`,
   243  		`\u009b`: `^[`,
   244  		`\u009c`: `^\\`,
   245  		`\u009d`: `^]`,
   246  		`\u009e`: `^^`,
   247  		`\u009f`: `^_`,
   248  	}
   249  	if c, ok := m[strings.ToLower(string(b))]; ok {
   250  		return []byte(c), true
   251  	}
   252  	return nil, false
   253  }
   254  

View as plain text