scan.go

Documentation: github.com/drone/envsubst/v2/parse

     1  package parse
     2  
     3  import (
     4  	"unicode"
     5  	"unicode/utf8"
     6  )
     7  
     8  // eof rune sent when end of file is reached
     9  var eof = rune(0)
    10  
    11  // token is a lexical token.
    12  type token uint
    13  
    14  // list of lexical tokens.
    15  const (
    16  	// special tokens
    17  	tokenIllegal token = iota
    18  	tokenEOF
    19  
    20  	// identifiers and literals
    21  	tokenIdent
    22  
    23  	// operators and delimiters
    24  	tokenLbrack
    25  	tokenRbrack
    26  	tokenQuote
    27  )
    28  
    29  // predefined mode bits to control recognition of tokens.
    30  const (
    31  	scanIdent byte = 1 << iota
    32  	scanLbrack
    33  	scanRbrack
    34  	scanEscape
    35  )
    36  
    37  // predefined mode bits to control escape tokens.
    38  const (
    39  	dollar byte = 1 << iota
    40  	backslash
    41  	escapeAll = dollar | backslash
    42  )
    43  
    44  // returns true if rune is accepted.
    45  type acceptFunc func(r rune, i int) bool
    46  
    47  // scanner implements a lexical scanner that reads unicode
    48  // characters and tokens from a string buffer.
    49  type scanner struct {
    50  	buf         string
    51  	pos         int
    52  	start       int
    53  	width       int
    54  	mode        byte
    55  	escapeChars byte
    56  
    57  	accept acceptFunc
    58  }
    59  
    60  // init initializes a scanner with a new buffer.
    61  func (s *scanner) init(buf string) {
    62  	s.buf = buf
    63  	s.pos = 0
    64  	s.start = 0
    65  	s.width = 0
    66  	s.accept = nil
    67  }
    68  
    69  // read returns the next unicode character. It returns eof at
    70  // the end of the string buffer.
    71  func (s *scanner) read() rune {
    72  	if s.pos >= len(s.buf) {
    73  		s.width = 0
    74  		return eof
    75  	}
    76  	r, w := utf8.DecodeRuneInString(s.buf[s.pos:])
    77  	s.width = w
    78  	s.pos += s.width
    79  	return r
    80  }
    81  
    82  func (s *scanner) unread() {
    83  	s.pos -= s.width
    84  }
    85  
    86  // skip skips over the curring unicode character in the buffer
    87  // by slicing and removing from the buffer.
    88  func (s *scanner) skip() {
    89  	l := s.buf[:s.pos-1]
    90  	r := s.buf[s.pos:]
    91  	s.buf = l + r
    92  }
    93  
    94  // peek returns the next unicode character in the buffer without
    95  // advancing the scanner. It returns eof if the scanner's position
    96  // is at the last character of the source.
    97  func (s *scanner) peek() rune {
    98  	r := s.read()
    99  	s.unread()
   100  	return r
   101  }
   102  
   103  // string returns the string corresponding to the most recently
   104  // scanned token. Valid after calling scan().
   105  func (s *scanner) string() string {
   106  	return s.buf[s.start:s.pos]
   107  }
   108  
   109  // tests if the bit exists for a given character bit
   110  func (s *scanner) shouldEscape(character byte) bool {
   111  	return s.escapeChars&character != 0
   112  }
   113  
   114  // scan reads the next token or Unicode character from source and
   115  // returns it. It returns EOF at the end of the source.
   116  func (s *scanner) scan() token {
   117  	s.start = s.pos
   118  	r := s.read()
   119  	switch {
   120  	case r == eof:
   121  		return tokenEOF
   122  	case s.scanLbrack(r):
   123  		return tokenLbrack
   124  	case s.scanRbrack(r):
   125  		return tokenRbrack
   126  	case s.scanIdent(r):
   127  		return tokenIdent
   128  	}
   129  	return tokenIllegal
   130  }
   131  
   132  // scanIdent reads the next token or Unicode character from source
   133  // and returns true if the Ident character is accepted.
   134  func (s *scanner) scanIdent(r rune) bool {
   135  	if s.mode&scanIdent == 0 {
   136  		return false
   137  	}
   138  	if s.scanEscaped(r) {
   139  		s.skip()
   140  	} else if !s.accept(r, s.pos-s.start) {
   141  		return false
   142  	}
   143  loop:
   144  	for {
   145  		r := s.read()
   146  		switch {
   147  		case r == eof:
   148  			s.unread()
   149  			break loop
   150  		case s.scanLbrack(r):
   151  			s.unread()
   152  			s.unread()
   153  			break loop
   154  		}
   155  		if s.scanEscaped(r) {
   156  			s.skip()
   157  			continue
   158  		}
   159  		if !s.accept(r, s.pos-s.start) {
   160  			s.unread()
   161  			break loop
   162  		}
   163  	}
   164  	return true
   165  }
   166  
   167  // scanLbrack reads the next token or Unicode character from source
   168  // and returns true if the open bracket is encountered.
   169  func (s *scanner) scanLbrack(r rune) bool {
   170  	if s.mode&scanLbrack == 0 {
   171  		return false
   172  	}
   173  	if r == '$' {
   174  		if s.read() == '{' {
   175  			return true
   176  		}
   177  		s.unread()
   178  	}
   179  	return false
   180  }
   181  
   182  // scanRbrack reads the next token or Unicode character from source
   183  // and returns true if the closing bracket is encountered.
   184  func (s *scanner) scanRbrack(r rune) bool {
   185  	if s.mode&scanRbrack == 0 {
   186  		return false
   187  	}
   188  	return r == '}'
   189  }
   190  
   191  // scanEscaped reads the next token or Unicode character from source
   192  // and returns true if it being escaped and should be skipped.
   193  func (s *scanner) scanEscaped(r rune) bool {
   194  	if s.mode&scanEscape == 0 {
   195  		return false
   196  	}
   197  	if r == '$' && s.shouldEscape(dollar) {
   198  		if s.peek() == '$' {
   199  			return true
   200  		}
   201  	}
   202  	if r == '\\' && s.shouldEscape(backslash) {
   203  		switch s.peek() {
   204  		case '/', '\\':
   205  			return true
   206  		default:
   207  			return false
   208  		}
   209  	}
   210  
   211  	return false
   212  }
   213  
   214  //
   215  // scanner functions accept or reject runes.
   216  //
   217  
   218  func acceptRune(r rune, i int) bool {
   219  	return true
   220  }
   221  
   222  func acceptIdent(r rune, i int) bool {
   223  	return unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_'
   224  }
   225  
   226  func acceptColon(r rune, i int) bool {
   227  	return r == ':'
   228  }
   229  
   230  func acceptOneHash(r rune, i int) bool {
   231  	return r == '#' && i == 1
   232  }
   233  
   234  func acceptNone(r rune, i int) bool {
   235  	return false
   236  }
   237  
   238  func acceptNotClosing(r rune, i int) bool {
   239  	return r != '}'
   240  }
   241  
   242  func acceptHashFunc(r rune, i int) bool {
   243  	return r == '#' && i < 3
   244  }
   245  
   246  func acceptPercentFunc(r rune, i int) bool {
   247  	return r == '%' && i < 3
   248  }
   249  
   250  func acceptDefaultFunc(r rune, i int) bool {
   251  	switch {
   252  	case i == 1 && r == ':':
   253  		return true
   254  	case i == 2 && (r == '=' || r == '-' || r == '?' || r == '+'):
   255  		return true
   256  	default:
   257  		return false
   258  	}
   259  }
   260  
   261  func acceptReplaceFunc(r rune, i int) bool {
   262  	switch {
   263  	case i == 1 && r == '/':
   264  		return true
   265  	case i == 2 && (r == '/' || r == '#' || r == '%'):
   266  		return true
   267  	default:
   268  		return false
   269  	}
   270  }
   271  
   272  func acceptOneEqual(r rune, i int) bool {
   273  	return i == 1 && r == '='
   274  }
   275  
   276  func acceptOneColon(r rune, i int) bool {
   277  	return i == 1 && r == ':'
   278  }
   279  
   280  func rejectColonClose(r rune, i int) bool {
   281  	return r != ':' && r != '}'
   282  }
   283  
   284  func acceptSlash(r rune, i int) bool {
   285  	return r == '/'
   286  }
   287  
   288  func acceptNotSlash(r rune, i int) bool {
   289  	return r != '/'
   290  }
   291  
   292  func acceptCasingFunc(r rune, i int) bool {
   293  	return (r == ',' || r == '^') && i < 3
   294  }
   295
View as plain text