parse.go

Documentation: oss.terrastruct.com/d2/d2parser

     1  package d2parser
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"fmt"
     7  	"io"
     8  	"math/big"
     9  	"strconv"
    10  	"strings"
    11  	"unicode"
    12  	"unicode/utf8"
    13  
    14  	tunicode "golang.org/x/text/encoding/unicode"
    15  	"golang.org/x/text/transform"
    16  
    17  	"oss.terrastruct.com/d2/d2ast"
    18  	"oss.terrastruct.com/util-go/go2"
    19  )
    20  
    21  type ParseOptions struct {
    22  	// UTF16Pos would be used with input received from a browser where the browser will send the text as UTF-8 but
    23  	// JavaScript keeps strings in memory as UTF-16 and so needs UTF-16 indexes into the text to line up errors correctly.
    24  	// So you want to read UTF-8 still but adjust the indexes to pretend the input is utf16.
    25  	UTF16Pos bool
    26  
    27  	ParseError *ParseError
    28  }
    29  
    30  // Parse parses a .d2 Map in r.
    31  //
    32  // The returned Map always represents a valid .d2 file. All encountered errors will be in
    33  // []error.
    34  //
    35  // The map may be compiled via Compile even if there are errors to keep language tooling
    36  // operational. Though autoformat should not run.
    37  //
    38  // If UTF16Pos is true, positions will be recorded in UTF-16 codeunits as required by LSP
    39  // and browser clients. See
    40  // https://microsoft.github.io/language-server-protocol/specifications/specification-current/#textDocuments
    41  // TODO: update godocs
    42  func Parse(path string, r io.Reader, opts *ParseOptions) (*d2ast.Map, error) {
    43  	if opts == nil {
    44  		opts = &ParseOptions{
    45  			UTF16Pos: false,
    46  		}
    47  	}
    48  
    49  	p := &parser{
    50  		path: path,
    51  
    52  		utf16Pos: opts.UTF16Pos,
    53  		err:      opts.ParseError,
    54  	}
    55  	br := bufio.NewReader(r)
    56  	p.reader = br
    57  
    58  	bom, err := br.Peek(2)
    59  	if err == nil {
    60  		// 0xFFFE is invalid UTF-8 so this is safe.
    61  		// Also a different BOM is used for UTF-8.
    62  		// See https://unicode.org/faq/utf_bom.html#bom4
    63  		if bom[0] == 0xFF && bom[1] == 0xFE {
    64  			p.utf16Pos = true
    65  
    66  			buf := make([]byte, br.Buffered())
    67  			io.ReadFull(br, buf)
    68  
    69  			mr := io.MultiReader(bytes.NewBuffer(buf), r)
    70  			tr := transform.NewReader(mr, tunicode.UTF16(tunicode.LittleEndian, tunicode.UseBOM).NewDecoder())
    71  			br.Reset(tr)
    72  		}
    73  	}
    74  
    75  	if p.err == nil {
    76  		p.err = &ParseError{}
    77  	}
    78  
    79  	m := p.parseMap(true)
    80  	if !p.err.Empty() {
    81  		return m, p.err
    82  	}
    83  	return m, nil
    84  }
    85  
    86  func ParseKey(key string) (*d2ast.KeyPath, error) {
    87  	p := &parser{
    88  		reader: strings.NewReader(key),
    89  		err:    &ParseError{},
    90  	}
    91  
    92  	k := p.parseKey()
    93  	if !p.err.Empty() {
    94  		return nil, fmt.Errorf("failed to parse key %q: %w", key, p.err)
    95  	}
    96  	if k == nil {
    97  		return nil, fmt.Errorf("empty key: %q", key)
    98  	}
    99  	return k, nil
   100  }
   101  
   102  func ParseMapKey(mapKey string) (*d2ast.Key, error) {
   103  	p := &parser{
   104  		reader: strings.NewReader(mapKey),
   105  		err:    &ParseError{},
   106  	}
   107  
   108  	mk := p.parseMapKey()
   109  	if !p.err.Empty() {
   110  		return nil, fmt.Errorf("failed to parse map key %q: %w", mapKey, p.err)
   111  	}
   112  	if mk == nil {
   113  		return nil, fmt.Errorf("empty map key: %q", mapKey)
   114  	}
   115  	return mk, nil
   116  }
   117  
   118  func ParseValue(value string) (d2ast.Value, error) {
   119  	p := &parser{
   120  		reader: strings.NewReader(value),
   121  		err:    &ParseError{},
   122  	}
   123  
   124  	v := p.parseValue()
   125  	if !p.err.Empty() {
   126  		return nil, fmt.Errorf("failed to parse value %q: %w", value, p.err)
   127  	}
   128  	if v.Unbox() == nil {
   129  		return nil, fmt.Errorf("empty value: %q", value)
   130  	}
   131  	return v.Unbox(), nil
   132  }
   133  
   134  // TODO: refactor parser to keep entire file in memory as []rune
   135  //   - trivial to then convert positions
   136  //   - lookahead is gone, just forward back as much as you want :)
   137  //   - streaming parser isn't really helpful.
   138  //   - just read into a string even and decode runes forward/back as needed
   139  //   - the whole file essentially exists within the parser as the AST anyway...
   140  //
   141  // TODO: ast struct that combines map & errors and pass that around
   142  type parser struct {
   143  	path     string
   144  	pos      d2ast.Position
   145  	utf16Pos bool
   146  
   147  	reader    io.RuneReader
   148  	readerPos d2ast.Position
   149  
   150  	readahead    []rune
   151  	lookahead    []rune
   152  	lookaheadPos d2ast.Position
   153  
   154  	ioerr bool
   155  	err   *ParseError
   156  
   157  	inEdgeGroup bool
   158  
   159  	depth int
   160  }
   161  
   162  // TODO: rename to Error and make existing Error a private type errorWithRange
   163  type ParseError struct {
   164  	// Errors from globs need to be deduplicated
   165  	ErrorsLookup map[d2ast.Error]struct{} `json:"-"`
   166  	Errors       []d2ast.Error            `json:"errs"`
   167  }
   168  
   169  func Errorf(n d2ast.Node, f string, v ...interface{}) error {
   170  	f = "%v: " + f
   171  	v = append([]interface{}{n.GetRange()}, v...)
   172  	return d2ast.Error{
   173  		Range:   n.GetRange(),
   174  		Message: fmt.Sprintf(f, v...),
   175  	}
   176  }
   177  
   178  func (pe *ParseError) Empty() bool {
   179  	if pe == nil {
   180  		return true
   181  	}
   182  	return len(pe.Errors) == 0
   183  }
   184  
   185  func (pe *ParseError) Error() string {
   186  	var sb strings.Builder
   187  	for i, err := range pe.Errors {
   188  		if i > 0 {
   189  			sb.WriteByte('\n')
   190  		}
   191  		sb.WriteString(err.Error())
   192  	}
   193  	return sb.String()
   194  }
   195  
   196  func (p *parser) errorf(start d2ast.Position, end d2ast.Position, f string, v ...interface{}) {
   197  	r := d2ast.Range{
   198  		Path:  p.path,
   199  		Start: start,
   200  		End:   end,
   201  	}
   202  	f = "%v: " + f
   203  	v = append([]interface{}{r}, v...)
   204  	p.err.Errors = append(p.err.Errors, d2ast.Error{
   205  		Range:   r,
   206  		Message: fmt.Sprintf(f, v...),
   207  	})
   208  }
   209  
   210  // _readRune reads the next rune from the underlying reader or from the p.readahead buffer.
   211  func (p *parser) _readRune() (r rune, eof bool) {
   212  	if len(p.readahead) > 0 {
   213  		r = p.readahead[0]
   214  		p.readahead = append(p.readahead[:0], p.readahead[1:]...)
   215  		return r, false
   216  	}
   217  
   218  	if p.ioerr {
   219  		p.rewind()
   220  		return 0, true
   221  	}
   222  
   223  	p.readerPos = p.lookaheadPos
   224  
   225  	r, _, err := p.reader.ReadRune()
   226  	if err != nil {
   227  		p.ioerr = true
   228  		if err != io.EOF {
   229  			p.err.Errors = append(p.err.Errors, d2ast.Error{
   230  				Range: d2ast.Range{
   231  					Path:  p.path,
   232  					Start: p.readerPos,
   233  					End:   p.readerPos,
   234  				},
   235  				Message: fmt.Sprintf("io error: %v", err),
   236  			})
   237  		}
   238  		p.rewind()
   239  		return 0, true
   240  	}
   241  	return r, false
   242  }
   243  
   244  func (p *parser) read() (r rune, eof bool) {
   245  	r, eof = p._readRune()
   246  	if eof {
   247  		return 0, true
   248  	}
   249  	p.pos = p.pos.Advance(r, p.utf16Pos)
   250  	p.lookaheadPos = p.pos
   251  	return r, false
   252  }
   253  
   254  func (p *parser) replay(r rune) {
   255  	p.pos = p.pos.Subtract(r, p.utf16Pos)
   256  
   257  	// This is more complex than it needs to be to allow reusing the buffer underlying
   258  	// p.lookahead.
   259  	newcap := len(p.lookahead) + 1
   260  	if newcap > cap(p.lookahead) {
   261  		lookahead2 := make([]rune, newcap)
   262  		copy(lookahead2[1:], p.lookahead)
   263  		p.lookahead = lookahead2
   264  	} else {
   265  		p.lookahead = p.lookahead[:newcap]
   266  		copy(p.lookahead[1:], p.lookahead)
   267  	}
   268  	p.lookahead[0] = r
   269  
   270  	p.rewind()
   271  }
   272  
   273  // peek returns the next rune without advancing the parser.
   274  // You *must* call commit or rewind afterwards.
   275  func (p *parser) peek() (r rune, eof bool) {
   276  	r, eof = p._readRune()
   277  	if eof {
   278  		return 0, true
   279  	}
   280  
   281  	p.lookahead = append(p.lookahead, r)
   282  	p.lookaheadPos = p.lookaheadPos.Advance(r, p.utf16Pos)
   283  	return r, false
   284  }
   285  
   286  // TODO: this can replace multiple peeks i think, just return []rune instead
   287  func (p *parser) peekn(n int) (s string, eof bool) {
   288  	var sb strings.Builder
   289  	for i := 0; i < n; i++ {
   290  		r, eof := p.peek()
   291  		if eof {
   292  			return sb.String(), true
   293  		}
   294  		sb.WriteRune(r)
   295  	}
   296  	return sb.String(), false
   297  }
   298  
   299  func (p *parser) readNotSpace() (r rune, eof bool) {
   300  	for {
   301  		r, eof = p.read()
   302  		if eof {
   303  			return 0, true
   304  		}
   305  		if unicode.IsSpace(r) {
   306  			continue
   307  		}
   308  		return r, false
   309  	}
   310  }
   311  
   312  // peekNotSpace returns the next non space rune without advancing the parser.
   313  //
   314  // newline is set if the next non space character is on a different line
   315  // than the current line.
   316  //
   317  // TODO: everywhere this is used, we support newline escapes and so can just
   318  // add the logic here and it should *just* work
   319  // except line comments iirc
   320  // not entirely sure, maybe i can put it into peek somehow
   321  func (p *parser) peekNotSpace() (r rune, newlines int, eof bool) {
   322  	for {
   323  		r, eof = p.peek()
   324  		if eof {
   325  			return 0, 0, true
   326  		}
   327  		if unicode.IsSpace(r) {
   328  			if r == '\n' {
   329  				newlines++
   330  			}
   331  			continue
   332  		}
   333  		return r, newlines, false
   334  	}
   335  }
   336  
   337  // commit advances p.pos by all peeked bytes and then resets the p.lookahead buffer.
   338  func (p *parser) commit() {
   339  	p.pos = p.lookaheadPos
   340  	p.lookahead = p.lookahead[:0]
   341  }
   342  
   343  // rewind copies p.lookahead to the front of p.readahead and then resets the p.lookahead buffer.
   344  // All peeked bytes will again be available via p.eat or p.peek.
   345  // TODO:
   346  // peek
   347  // peekn
   348  // peekNotSpace
   349  // commit
   350  // rewind
   351  //
   352  // TODO: make each parse function read its delimiter and return nil if not as expected
   353  // TODO: lookahead *must* always be empty in between parse calls. you either commit or
   354  //
   355  //	rewind in each function. if you don't, you pass a hint.
   356  //
   357  // TODO: omg we don't need two buffers, just a single lookahead and an index...
   358  // TODO: get rid of lookaheadPos or at least never use directly. maybe rename to beforePeekPos?
   359  //
   360  //	or better yet keep positions in the lookahead buffer.
   361  //	ok so plan here is to get rid of lookaheadPos and add a rewindPos that stores
   362  //	the pos to rewind to.
   363  func (p *parser) rewind() {
   364  	if len(p.lookahead) == 0 {
   365  		return
   366  	}
   367  
   368  	// This is more complex than it needs to be to allow reusing the buffer underlying
   369  	// p.readahead.
   370  	newcap := len(p.lookahead) + len(p.readahead)
   371  	if cap(p.readahead) < newcap {
   372  		readahead2 := make([]rune, newcap)
   373  		copy(readahead2[len(p.lookahead):], p.readahead)
   374  		p.readahead = readahead2
   375  	} else {
   376  		p.readahead = p.readahead[:newcap]
   377  		copy(p.readahead[len(p.lookahead):], p.readahead)
   378  	}
   379  	copy(p.readahead, p.lookahead)
   380  
   381  	p.lookahead = p.lookahead[:0]
   382  	p.lookaheadPos = p.pos
   383  }
   384  
   385  // TODO: remove isFileMap like in printer. can't rn as we have to subtract delim
   386  func (p *parser) parseMap(isFileMap bool) *d2ast.Map {
   387  	m := &d2ast.Map{
   388  		Range: d2ast.Range{
   389  			Path:  p.path,
   390  			Start: p.pos,
   391  		},
   392  	}
   393  	defer m.Range.End.From(&p.pos)
   394  
   395  	if !isFileMap {
   396  		m.Range.Start = m.Range.Start.Subtract('{', p.utf16Pos)
   397  		p.depth++
   398  		defer dec(&p.depth)
   399  	}
   400  
   401  	for {
   402  		r, eof := p.readNotSpace()
   403  		if eof {
   404  			if !isFileMap {
   405  				p.errorf(m.Range.Start, p.readerPos, "maps must be terminated with }")
   406  			}
   407  			return m
   408  		}
   409  
   410  		switch r {
   411  		case ';':
   412  			continue
   413  		case '}':
   414  			if isFileMap {
   415  				p.errorf(p.pos.Subtract(r, p.utf16Pos), p.pos, "unexpected map termination character } in file map")
   416  				continue
   417  			}
   418  			return m
   419  		}
   420  
   421  		n := p.parseMapNode(r)
   422  		if n.Unbox() != nil {
   423  			m.Nodes = append(m.Nodes, n)
   424  			// TODO: all subsequent not comment characters on the current line (or till ;)
   425  			// need to be considered errors.
   426  			// TODO: add specific msg for each bad rune type
   427  		}
   428  
   429  		if n.BlockComment != nil {
   430  			// Anything after a block comment is ok.
   431  			continue
   432  		}
   433  
   434  		after := p.pos
   435  		for {
   436  			r, newlines, eof := p.peekNotSpace()
   437  			if eof || newlines != 0 || r == ';' || r == '}' || r == '#' {
   438  				p.rewind()
   439  				break
   440  			}
   441  			p.commit()
   442  		}
   443  
   444  		// TODO: maybe better idea here is to make parseUnquotedString aware of its delimiters
   445  		// better and so it would read technically invalid characters and just complain.
   446  		// TODO: that way broken syntax will be parsed more "intently". would work better with
   447  		// language tooling I think though not sure. yes definitely, eaterr!
   448  		if after != p.pos {
   449  			if n.Unbox() != nil {
   450  				if n.MapKey != nil && n.MapKey.Value.Unbox() != nil {
   451  					ps := ""
   452  					if _, ok := n.MapKey.Value.Unbox().(*d2ast.BlockString); ok {
   453  						ps = ". See https://d2lang.com/tour/text#advanced-block-strings."
   454  					}
   455  					p.errorf(after, p.pos, "unexpected text after %v%s", n.MapKey.Value.Unbox().Type(), ps)
   456  				} else {
   457  					p.errorf(after, p.pos, "unexpected text after %v", n.Unbox().Type())
   458  				}
   459  			} else {
   460  				p.errorf(after, p.pos, "invalid text beginning unquoted key")
   461  			}
   462  		}
   463  	}
   464  }
   465  
   466  func (p *parser) parseMapNode(r rune) d2ast.MapNodeBox {
   467  	var box d2ast.MapNodeBox
   468  
   469  	switch r {
   470  	case '#':
   471  		box.Comment = p.parseComment()
   472  		return box
   473  	case '"':
   474  		s, eof := p.peekn(2)
   475  		if eof {
   476  			break
   477  		}
   478  		if s != `""` {
   479  			p.rewind()
   480  			break
   481  		}
   482  		p.commit()
   483  		box.BlockComment = p.parseBlockComment()
   484  		return box
   485  	case '.':
   486  		s, eof := p.peekn(2)
   487  		if eof {
   488  			break
   489  		}
   490  		if s != ".." {
   491  			p.rewind()
   492  			break
   493  		}
   494  		r, eof := p.peek()
   495  		if eof {
   496  			break
   497  		}
   498  		if r == '$' {
   499  			p.commit()
   500  			box.Substitution = p.parseSubstitution(true)
   501  			return box
   502  		}
   503  		if r == '@' {
   504  			p.commit()
   505  			box.Import = p.parseImport(true)
   506  			return box
   507  		}
   508  		p.rewind()
   509  		break
   510  	}
   511  
   512  	p.replay(r)
   513  	box.MapKey = p.parseMapKey()
   514  	return box
   515  }
   516  
   517  func (p *parser) parseComment() *d2ast.Comment {
   518  	c := &d2ast.Comment{
   519  		Range: d2ast.Range{
   520  			Path:  p.path,
   521  			Start: p.pos.Subtract('#', p.utf16Pos),
   522  		},
   523  	}
   524  	defer c.Range.End.From(&p.pos)
   525  
   526  	var sb strings.Builder
   527  	defer func() {
   528  		c.Value = sb.String()
   529  	}()
   530  	p.parseCommentLine(c, &sb)
   531  
   532  	for {
   533  		r, newlines, eof := p.peekNotSpace()
   534  		if eof {
   535  			return c
   536  		}
   537  		if r != '#' || newlines >= 2 {
   538  			p.rewind()
   539  			return c
   540  		}
   541  		p.commit()
   542  
   543  		if newlines == 1 {
   544  			sb.WriteByte('\n')
   545  		}
   546  
   547  		p.parseCommentLine(c, &sb)
   548  	}
   549  }
   550  
   551  func (p *parser) parseCommentLine(c *d2ast.Comment, sb *strings.Builder) {
   552  	firstRune := true
   553  	for {
   554  		r, eof := p.peek()
   555  		if eof {
   556  			return
   557  		}
   558  		if r == '\n' {
   559  			p.rewind()
   560  			return
   561  		}
   562  		p.commit()
   563  
   564  		if firstRune {
   565  			firstRune = false
   566  			if r == ' ' {
   567  				continue
   568  			}
   569  		}
   570  		sb.WriteRune(r)
   571  	}
   572  }
   573  
   574  func (p *parser) parseBlockComment() *d2ast.BlockComment {
   575  	bc := &d2ast.BlockComment{
   576  		Range: d2ast.Range{
   577  			Path:  p.path,
   578  			Start: p.pos.SubtractString(`"""`, p.utf16Pos),
   579  		},
   580  	}
   581  	defer bc.Range.End.From(&p.pos)
   582  
   583  	p.depth++
   584  	defer dec(&p.depth)
   585  
   586  	var sb strings.Builder
   587  	defer func() {
   588  		bc.Value = trimSpaceAfterLastNewline(sb.String())
   589  		bc.Value = trimCommonIndent(bc.Value)
   590  	}()
   591  
   592  	for {
   593  		r, eof := p.peek()
   594  		if eof {
   595  			p.errorf(bc.Range.Start, p.readerPos, `block comments must be terminated with """`)
   596  			return bc
   597  		}
   598  
   599  		if !unicode.IsSpace(r) {
   600  			p.rewind()
   601  			break
   602  		}
   603  		p.commit()
   604  		if r == '\n' {
   605  			break
   606  		}
   607  	}
   608  
   609  	for {
   610  		r, eof := p.read()
   611  		if eof {
   612  			p.errorf(bc.Range.Start, p.readerPos, `block comments must be terminated with """`)
   613  			return bc
   614  		}
   615  
   616  		if r != '"' {
   617  			sb.WriteRune(r)
   618  			continue
   619  		}
   620  
   621  		s, eof := p.peekn(2)
   622  		if eof {
   623  			p.errorf(bc.Range.Start, p.readerPos, `block comments must be terminated with """`)
   624  			return bc
   625  		}
   626  		if s != `""` {
   627  			sb.WriteByte('"')
   628  			p.rewind()
   629  			continue
   630  		}
   631  		p.commit()
   632  		return bc
   633  	}
   634  }
   635  
   636  func trimSpaceAfterLastNewline(s string) string {
   637  	lastNewline := strings.LastIndexByte(s, '\n')
   638  	if lastNewline == -1 {
   639  		return strings.TrimRightFunc(s, unicode.IsSpace)
   640  	}
   641  
   642  	lastLine := s[lastNewline+1:]
   643  	lastLine = strings.TrimRightFunc(lastLine, unicode.IsSpace)
   644  	if len(lastLine) == 0 {
   645  		return s[:lastNewline]
   646  	}
   647  	return s[:lastNewline+1] + lastLine
   648  }
   649  
   650  func (p *parser) parseMapKey() (mk *d2ast.Key) {
   651  	mk = &d2ast.Key{
   652  		Range: d2ast.Range{
   653  			Path:  p.path,
   654  			Start: p.pos,
   655  		},
   656  	}
   657  	defer mk.Range.End.From(&p.pos)
   658  
   659  	defer func() {
   660  		if mk.Key == nil && len(mk.Edges) == 0 {
   661  			mk = nil
   662  		}
   663  	}()
   664  
   665  	// Check for not ampersand/@.
   666  	r, eof := p.peek()
   667  	if eof {
   668  		return mk
   669  	}
   670  	if r == '!' {
   671  		r, eof := p.peek()
   672  		if eof {
   673  			return mk
   674  		}
   675  		if r == '&' {
   676  			p.commit()
   677  			mk.NotAmpersand = true
   678  		} else {
   679  			p.rewind()
   680  		}
   681  	} else if r == '&' {
   682  		p.commit()
   683  		mk.Ampersand = true
   684  	} else {
   685  		p.rewind()
   686  	}
   687  
   688  	r, eof = p.peek()
   689  	if eof {
   690  		return mk
   691  	}
   692  	if r == '(' {
   693  		p.commit()
   694  		p.parseEdgeGroup(mk)
   695  		return mk
   696  	}
   697  	p.rewind()
   698  
   699  	k := p.parseKey()
   700  	if k != nil {
   701  		mk.Key = k
   702  	}
   703  
   704  	r, newlines, eof := p.peekNotSpace()
   705  	if eof {
   706  		return mk
   707  	}
   708  	if newlines > 0 {
   709  		p.rewind()
   710  		return mk
   711  	}
   712  	switch r {
   713  	case '(':
   714  		p.commit()
   715  		p.parseEdgeGroup(mk)
   716  		return mk
   717  	case '<', '>', '-':
   718  		p.rewind()
   719  		mk.Key = nil
   720  		p.parseEdges(mk, k)
   721  		p.parseMapKeyValue(mk)
   722  		return mk
   723  	default:
   724  		p.rewind()
   725  		p.parseMapKeyValue(mk)
   726  		return mk
   727  	}
   728  }
   729  
   730  func (p *parser) parseMapKeyValue(mk *d2ast.Key) {
   731  	r, newlines, eof := p.peekNotSpace()
   732  	if eof {
   733  		return
   734  	}
   735  	if newlines > 0 {
   736  		p.rewind()
   737  		return
   738  	}
   739  
   740  	switch r {
   741  	case '{':
   742  		p.rewind()
   743  		if mk.Key == nil && len(mk.Edges) == 0 {
   744  			return
   745  		}
   746  	case ':':
   747  		p.commit()
   748  		if mk.Key == nil && len(mk.Edges) == 0 {
   749  			p.errorf(mk.Range.Start, p.pos, "map value without key")
   750  		}
   751  	default:
   752  		p.rewind()
   753  		return
   754  	}
   755  	mk.Value = p.parseValue()
   756  	if mk.Value.Unbox() == nil {
   757  		p.errorf(p.pos.Subtract(':', p.utf16Pos), p.pos, "missing value after colon")
   758  	}
   759  
   760  	sb := mk.Value.ScalarBox()
   761  	// If the value is a scalar, then check if it's the primary value.
   762  	if sb.Unbox() != nil {
   763  		r, newlines, eof := p.peekNotSpace()
   764  		if eof || newlines > 0 || r != '{' {
   765  			p.rewind()
   766  			return
   767  		}
   768  		// Next character is on the same line without ; separator so it must mean
   769  		// our current value is the Primary and the next is the Value.
   770  		p.commit()
   771  		p.replay(r)
   772  		mk.Primary = sb
   773  		mk.Value = p.parseValue()
   774  	}
   775  }
   776  
   777  func (p *parser) parseEdgeGroup(mk *d2ast.Key) {
   778  	// To prevent p.parseUnquotedString from consuming terminating parentheses.
   779  	p.inEdgeGroup = true
   780  	defer func() {
   781  		p.inEdgeGroup = false
   782  	}()
   783  
   784  	src := p.parseKey()
   785  	p.parseEdges(mk, src)
   786  
   787  	r, newlines, eof := p.peekNotSpace()
   788  	if eof || newlines > 0 {
   789  		p.rewind()
   790  		return
   791  	}
   792  	if r != ')' {
   793  		p.rewind()
   794  		p.errorf(mk.Range.Start, p.pos, "edge groups must be terminated with )")
   795  		return
   796  	}
   797  	p.commit()
   798  
   799  	r, newlines, eof = p.peekNotSpace()
   800  	if eof || newlines > 0 {
   801  		p.rewind()
   802  		return
   803  	}
   804  	if r == '[' {
   805  		p.commit()
   806  		mk.EdgeIndex = p.parseEdgeIndex()
   807  	} else {
   808  		p.rewind()
   809  	}
   810  
   811  	r, newlines, eof = p.peekNotSpace()
   812  	if eof || newlines > 0 {
   813  		p.rewind()
   814  		return
   815  	}
   816  	if r == '.' {
   817  		p.commit()
   818  		mk.EdgeKey = p.parseKey()
   819  	} else {
   820  		p.rewind()
   821  	}
   822  
   823  	p.inEdgeGroup = false
   824  	p.parseMapKeyValue(mk)
   825  }
   826  
   827  func (p *parser) parseEdgeIndex() *d2ast.EdgeIndex {
   828  	ei := &d2ast.EdgeIndex{
   829  		Range: d2ast.Range{
   830  			Path:  p.path,
   831  			Start: p.pos.Subtract('[', p.utf16Pos),
   832  		},
   833  	}
   834  	defer ei.Range.End.From(&p.pos)
   835  
   836  	r, newlines, eof := p.peekNotSpace()
   837  	if eof || newlines > 0 {
   838  		p.rewind()
   839  		return nil
   840  	}
   841  
   842  	if unicode.IsDigit(r) {
   843  		p.commit()
   844  		var sb strings.Builder
   845  		sb.WriteRune(r)
   846  		for {
   847  			r, newlines, eof = p.peekNotSpace()
   848  			if eof || newlines > 0 {
   849  				p.rewind()
   850  				p.errorf(ei.Range.Start, p.pos, "unterminated edge index")
   851  				return nil
   852  			}
   853  			if r == ']' {
   854  				p.rewind()
   855  				break
   856  			}
   857  			p.commit()
   858  			if !unicode.IsDigit(r) {
   859  				p.errorf(p.pos.Subtract(r, p.utf16Pos), p.pos, "unexpected character in edge index")
   860  				continue
   861  			}
   862  			sb.WriteRune(r)
   863  		}
   864  		i, _ := strconv.Atoi(sb.String())
   865  		ei.Int = &i
   866  	} else if r == '*' {
   867  		p.commit()
   868  		ei.Glob = true
   869  	} else {
   870  		p.errorf(p.pos.Subtract(r, p.utf16Pos), p.pos, "unexpected character in edge index")
   871  		// TODO: skip to ], maybe add a p.skipTo to skip to certain characters
   872  	}
   873  
   874  	r, newlines, eof = p.peekNotSpace()
   875  	if eof || newlines > 0 || r != ']' {
   876  		p.rewind()
   877  		p.errorf(ei.Range.Start, p.pos, "unterminated edge index")
   878  		return ei
   879  	}
   880  	p.commit()
   881  	return ei
   882  }
   883  
   884  func (p *parser) parseEdges(mk *d2ast.Key, src *d2ast.KeyPath) {
   885  	for {
   886  		e := &d2ast.Edge{
   887  			Range: d2ast.Range{
   888  				Path: p.path,
   889  			},
   890  			Src: src,
   891  		}
   892  		if src != nil {
   893  			e.Range.Start = src.Range.Start
   894  		} else {
   895  			e.Range.Start = p.pos
   896  		}
   897  
   898  		r, newlines, eof := p.peekNotSpace()
   899  		if eof {
   900  			return
   901  		}
   902  		if newlines > 0 {
   903  			p.rewind()
   904  			return
   905  		}
   906  		if r == '<' || r == '*' {
   907  			e.SrcArrow = string(r)
   908  		} else if r != '-' {
   909  			p.rewind()
   910  			return
   911  		}
   912  		if src == nil {
   913  			p.errorf(p.lookaheadPos.Subtract(r, p.utf16Pos), p.lookaheadPos, "connection missing source")
   914  			e.Range.Start = p.lookaheadPos.Subtract(r, p.utf16Pos)
   915  		}
   916  		p.commit()
   917  
   918  		if !p.parseEdge(e) {
   919  			return
   920  		}
   921  
   922  		dst := p.parseKey()
   923  		if dst == nil {
   924  			p.errorf(e.Range.Start, p.pos, "connection missing destination")
   925  		} else {
   926  			e.Dst = dst
   927  			e.Range.End = e.Dst.Range.End
   928  		}
   929  		mk.Edges = append(mk.Edges, e)
   930  		src = dst
   931  	}
   932  }
   933  
   934  func (p *parser) parseEdge(e *d2ast.Edge) (ok bool) {
   935  	defer e.Range.End.From(&p.pos)
   936  
   937  	for {
   938  		r, eof := p.peek()
   939  		if eof {
   940  			p.errorf(e.Range.Start, p.readerPos, "unterminated connection")
   941  			return false
   942  		}
   943  		switch r {
   944  		case '>', '*':
   945  			e.DstArrow = string(r)
   946  			p.commit()
   947  			return true
   948  		case '\\':
   949  			p.commit()
   950  			r, newlines, eof := p.peekNotSpace()
   951  			if eof {
   952  				continue
   953  			}
   954  			if newlines == 0 {
   955  				p.rewind()
   956  				p.errorf(e.Range.Start, p.readerPos, "only newline escapes are allowed in connections")
   957  				return false
   958  			}
   959  			if newlines > 1 {
   960  				p.rewind()
   961  				continue
   962  			}
   963  			p.commit()
   964  			p.replay(r)
   965  		case '-':
   966  			p.commit()
   967  		default:
   968  			p.rewind()
   969  			return true
   970  		}
   971  	}
   972  }
   973  
   974  func (p *parser) parseKey() (k *d2ast.KeyPath) {
   975  	k = &d2ast.KeyPath{
   976  		Range: d2ast.Range{
   977  			Path:  p.path,
   978  			Start: p.pos,
   979  		},
   980  	}
   981  
   982  	defer func() {
   983  		if len(k.Path) == 0 {
   984  			k = nil
   985  		} else {
   986  			k.Range.End = k.Path[len(k.Path)-1].Unbox().GetRange().End
   987  		}
   988  	}()
   989  
   990  	for {
   991  		r, newlines, eof := p.peekNotSpace()
   992  		if eof {
   993  			return k
   994  		}
   995  		if newlines > 0 || r == '(' {
   996  			p.rewind()
   997  			return k
   998  		}
   999  		// TODO: error if begin, but see below too
  1000  		if r == '.' {
  1001  			continue
  1002  		}
  1003  		p.rewind()
  1004  
  1005  		sb := p.parseString(true)
  1006  		s := sb.Unbox()
  1007  		if s == nil {
  1008  			return k
  1009  		}
  1010  		if sb.UnquotedString != nil && strings.HasPrefix(s.ScalarString(), "@") {
  1011  			p.errorf(s.GetRange().Start, s.GetRange().End, "%s is not a valid import, did you mean ...%[2]s?", s.ScalarString())
  1012  		}
  1013  
  1014  		if len(k.Path) == 0 {
  1015  			k.Range.Start = s.GetRange().Start
  1016  		}
  1017  		k.Path = append(k.Path, &sb)
  1018  
  1019  		r, newlines, eof = p.peekNotSpace()
  1020  		if eof {
  1021  			return k
  1022  		}
  1023  		if newlines > 0 || r != '.' {
  1024  			p.rewind()
  1025  			return k
  1026  		}
  1027  		// TODO: error if not string or ( after, see above too
  1028  		p.commit()
  1029  	}
  1030  }
  1031  
  1032  // TODO: inKey -> p.inKey (means I have to restore though)
  1033  func (p *parser) parseString(inKey bool) d2ast.StringBox {
  1034  	var box d2ast.StringBox
  1035  
  1036  	r, newlines, eof := p.peekNotSpace()
  1037  	if eof || newlines > 0 {
  1038  		p.rewind()
  1039  		return box
  1040  	}
  1041  	p.commit()
  1042  
  1043  	switch r {
  1044  	case '"':
  1045  		box.DoubleQuotedString = p.parseDoubleQuotedString(inKey)
  1046  		return box
  1047  	case '\'':
  1048  		box.SingleQuotedString = p.parseSingleQuotedString()
  1049  		return box
  1050  	case '|':
  1051  		box.BlockString = p.parseBlockString()
  1052  		return box
  1053  	default:
  1054  		p.replay(r)
  1055  		box.UnquotedString = p.parseUnquotedString(inKey)
  1056  		return box
  1057  	}
  1058  }
  1059  
  1060  func (p *parser) parseUnquotedString(inKey bool) (s *d2ast.UnquotedString) {
  1061  	s = &d2ast.UnquotedString{
  1062  		Range: d2ast.Range{
  1063  			Path:  p.path,
  1064  			Start: p.pos,
  1065  		},
  1066  	}
  1067  	// TODO: fix unquoted end whitespace handling to peekNotSpace
  1068  	lastNonSpace := p.pos
  1069  	defer s.Range.End.From(&lastNonSpace)
  1070  
  1071  	var sb strings.Builder
  1072  	var rawb strings.Builder
  1073  	lastPatternIndex := 0
  1074  	defer func() {
  1075  		sv := strings.TrimRightFunc(sb.String(), unicode.IsSpace)
  1076  		rawv := strings.TrimRightFunc(rawb.String(), unicode.IsSpace)
  1077  		if s.Pattern != nil {
  1078  			if lastPatternIndex < len(sv) {
  1079  				s.Pattern = append(s.Pattern, sv[lastPatternIndex:])
  1080  			}
  1081  		}
  1082  		if sv == "" {
  1083  			if len(s.Value) > 0 {
  1084  				return
  1085  			}
  1086  			s = nil
  1087  			// TODO: this should be in the parent and instead they check the delimiters first
  1088  			// 			 or last really. only in parseMapNode && parseArrayNode
  1089  			// TODO: give specific descriptions for each kind of special character that could have caused this.
  1090  			return
  1091  		}
  1092  		s.Value = append(s.Value, d2ast.InterpolationBox{String: &sv, StringRaw: &rawv})
  1093  	}()
  1094  
  1095  	_s, eof := p.peekn(4)
  1096  	p.rewind()
  1097  	if !eof {
  1098  		if _s == "...@" {
  1099  			p.errorf(p.pos, p.pos.AdvanceString("...@", p.utf16Pos), "unquoted strings cannot begin with ...@ as that's import spread syntax")
  1100  		}
  1101  	}
  1102  
  1103  	for {
  1104  		r, eof := p.peek()
  1105  		if eof {
  1106  			return s
  1107  		}
  1108  
  1109  		if p.inEdgeGroup && r == ')' {
  1110  			// TODO: need a peekNotSpace across escaped newlines
  1111  			r2, newlines, eof := p.peekNotSpace()
  1112  			if eof || newlines > 0 {
  1113  				p.rewind()
  1114  				return s
  1115  			}
  1116  			switch r2 {
  1117  			case '\n', '#', '{', '}', '[', ']', ':', '.':
  1118  				p.rewind()
  1119  				return s
  1120  			}
  1121  			p.rewind()
  1122  			p.peek()
  1123  			p.commit()
  1124  			lastNonSpace = p.pos
  1125  			sb.WriteRune(r)
  1126  			rawb.WriteRune(r)
  1127  			continue
  1128  		}
  1129  
  1130  		// top:   '\n', '#', '{', '}', '[', ']'
  1131  		// keys:  ':', '.'
  1132  		// edges: '<', '>', '(', ')',
  1133  		// edges: --, ->, -*, *-
  1134  		switch r {
  1135  		case '\n', ';', '#', '{', '}', '[', ']':
  1136  			p.rewind()
  1137  			return s
  1138  		}
  1139  		if inKey {
  1140  			switch r {
  1141  			case ':', '.', '<', '>', '&':
  1142  				p.rewind()
  1143  				return s
  1144  			case '-':
  1145  				// TODO: need a peekNotSpace across escaped newlines
  1146  				r2, eof := p.peek()
  1147  				if eof {
  1148  					return s
  1149  				}
  1150  				switch r2 {
  1151  				case '\n', ';', '#', '{', '}', '[', ']':
  1152  					p.rewind()
  1153  					p.peek()
  1154  					p.commit()
  1155  					sb.WriteRune(r)
  1156  					rawb.WriteRune(r)
  1157  					return s
  1158  				}
  1159  				if r2 == '-' || r2 == '>' || r2 == '*' {
  1160  					p.rewind()
  1161  					return s
  1162  				}
  1163  				sb.WriteRune(r)
  1164  				rawb.WriteRune(r)
  1165  				r = r2
  1166  			case '*':
  1167  				if sb.Len() == 0 {
  1168  					s.Pattern = append(s.Pattern, "*")
  1169  				} else {
  1170  					s.Pattern = append(s.Pattern, sb.String()[lastPatternIndex:], "*")
  1171  				}
  1172  				lastPatternIndex = len(sb.String()) + 1
  1173  			}
  1174  		}
  1175  
  1176  		p.commit()
  1177  
  1178  		if !unicode.IsSpace(r) {
  1179  			lastNonSpace = p.pos
  1180  		}
  1181  
  1182  		if !inKey && r == '$' {
  1183  			subst := p.parseSubstitution(false)
  1184  			if subst != nil {
  1185  				if sb.Len() > 0 {
  1186  					sv := sb.String()
  1187  					rawv := rawb.String()
  1188  					s.Value = append(s.Value, d2ast.InterpolationBox{String: &sv, StringRaw: &rawv})
  1189  					sb.Reset()
  1190  					rawb.Reset()
  1191  				}
  1192  				s.Value = append(s.Value, d2ast.InterpolationBox{Substitution: subst})
  1193  				continue
  1194  			}
  1195  			continue
  1196  		}
  1197  
  1198  		if r != '\\' {
  1199  			sb.WriteRune(r)
  1200  			rawb.WriteRune(r)
  1201  			continue
  1202  		}
  1203  
  1204  		r2, eof := p.read()
  1205  		if eof {
  1206  			p.errorf(p.pos.Subtract('\\', p.utf16Pos), p.readerPos, "unfinished escape sequence")
  1207  			return s
  1208  		}
  1209  
  1210  		if r2 == '\n' {
  1211  			r, newlines, eof := p.peekNotSpace()
  1212  			if eof || newlines > 0 {
  1213  				p.rewind()
  1214  				return s
  1215  			}
  1216  			p.commit()
  1217  			p.replay(r)
  1218  			continue
  1219  		}
  1220  
  1221  		sb.WriteRune(decodeEscape(r2))
  1222  		rawb.WriteByte('\\')
  1223  		rawb.WriteRune(r2)
  1224  	}
  1225  }
  1226  
  1227  // https://go.dev/ref/spec#Rune_literals
  1228  // TODO: implement all Go escapes like the unicode ones
  1229  func decodeEscape(r2 rune) rune {
  1230  	switch r2 {
  1231  	case 'a':
  1232  		return '\a'
  1233  	case 'b':
  1234  		return '\b'
  1235  	case 'f':
  1236  		return '\f'
  1237  	case 'n':
  1238  		return '\n'
  1239  	case 'r':
  1240  		return '\r'
  1241  	case 't':
  1242  		return '\t'
  1243  	case 'v':
  1244  		return '\v'
  1245  	case '\\':
  1246  		return '\\'
  1247  	case '"':
  1248  		return '"'
  1249  	default:
  1250  		return r2
  1251  	}
  1252  }
  1253  
  1254  func (p *parser) parseDoubleQuotedString(inKey bool) *d2ast.DoubleQuotedString {
  1255  	s := &d2ast.DoubleQuotedString{
  1256  		Range: d2ast.Range{
  1257  			Path:  p.path,
  1258  			Start: p.pos.Subtract('"', p.utf16Pos),
  1259  		},
  1260  	}
  1261  	defer s.Range.End.From(&p.pos)
  1262  
  1263  	var sb strings.Builder
  1264  	var rawb strings.Builder
  1265  	defer func() {
  1266  		if sb.Len() > 0 {
  1267  			sv := sb.String()
  1268  			rawv := rawb.String()
  1269  			s.Value = append(s.Value, d2ast.InterpolationBox{String: &sv, StringRaw: &rawv})
  1270  		}
  1271  	}()
  1272  
  1273  	for {
  1274  		r, eof := p.peek()
  1275  		if eof {
  1276  			p.errorf(s.Range.Start, p.readerPos, `double quoted strings must be terminated with "`)
  1277  			return s
  1278  		}
  1279  		if r == '\n' {
  1280  			p.rewind()
  1281  			p.errorf(s.Range.Start, p.pos, `double quoted strings must be terminated with "`)
  1282  			return s
  1283  		}
  1284  
  1285  		p.commit()
  1286  		if !inKey && r == '$' {
  1287  			subst := p.parseSubstitution(false)
  1288  			if subst != nil {
  1289  				if sb.Len() > 0 {
  1290  					s.Value = append(s.Value, d2ast.InterpolationBox{String: go2.Pointer(sb.String())})
  1291  					sb.Reset()
  1292  				}
  1293  				s.Value = append(s.Value, d2ast.InterpolationBox{Substitution: subst})
  1294  				continue
  1295  			}
  1296  		}
  1297  
  1298  		if r == '"' {
  1299  			return s
  1300  		}
  1301  
  1302  		if r != '\\' {
  1303  			sb.WriteRune(r)
  1304  			rawb.WriteRune(r)
  1305  			continue
  1306  		}
  1307  
  1308  		r2, eof := p.read()
  1309  		if eof {
  1310  			p.errorf(p.pos.Subtract('\\', p.utf16Pos), p.readerPos, "unfinished escape sequence")
  1311  			p.errorf(s.Range.Start, p.readerPos, `double quoted strings must be terminated with "`)
  1312  			return s
  1313  		}
  1314  
  1315  		if r2 == '\n' {
  1316  			// TODO: deindent
  1317  			continue
  1318  		}
  1319  		sb.WriteRune(decodeEscape(r2))
  1320  		rawb.WriteByte('\\')
  1321  		rawb.WriteRune(r2)
  1322  	}
  1323  }
  1324  
  1325  func (p *parser) parseSingleQuotedString() *d2ast.SingleQuotedString {
  1326  	s := &d2ast.SingleQuotedString{
  1327  		Range: d2ast.Range{
  1328  			Path:  p.path,
  1329  			Start: p.pos.Subtract('\'', p.utf16Pos),
  1330  		},
  1331  	}
  1332  	defer s.Range.End.From(&p.pos)
  1333  
  1334  	var sb strings.Builder
  1335  	defer func() {
  1336  		s.Value = sb.String()
  1337  	}()
  1338  
  1339  	for {
  1340  		r, eof := p.peek()
  1341  		if eof {
  1342  			p.errorf(s.Range.Start, p.readerPos, `single quoted strings must be terminated with '`)
  1343  			return s
  1344  		}
  1345  		if r == '\n' {
  1346  			p.rewind()
  1347  			p.errorf(s.Range.Start, p.pos, `single quoted strings must be terminated with '`)
  1348  			return s
  1349  		}
  1350  		p.commit()
  1351  
  1352  		if r == '\'' {
  1353  			r, eof = p.peek()
  1354  			if eof {
  1355  				return s
  1356  			}
  1357  			if r == '\'' {
  1358  				p.commit()
  1359  				sb.WriteByte('\'')
  1360  				continue
  1361  			}
  1362  			p.rewind()
  1363  			return s
  1364  		}
  1365  
  1366  		if r != '\\' {
  1367  			sb.WriteRune(r)
  1368  			continue
  1369  		}
  1370  
  1371  		r2, eof := p.peek()
  1372  		if eof {
  1373  			continue
  1374  		}
  1375  
  1376  		switch r2 {
  1377  		case '\n':
  1378  			p.commit()
  1379  			continue
  1380  		default:
  1381  			sb.WriteRune(r)
  1382  			p.rewind()
  1383  		}
  1384  	}
  1385  }
  1386  
  1387  func (p *parser) parseBlockString() *d2ast.BlockString {
  1388  	bs := &d2ast.BlockString{
  1389  		Range: d2ast.Range{
  1390  			Path:  p.path,
  1391  			Start: p.pos.Subtract('|', p.utf16Pos),
  1392  		},
  1393  	}
  1394  	defer bs.Range.End.From(&p.pos)
  1395  
  1396  	p.depth++
  1397  	defer dec(&p.depth)
  1398  
  1399  	var sb strings.Builder
  1400  	defer func() {
  1401  		bs.Value = trimSpaceAfterLastNewline(sb.String())
  1402  		bs.Value = trimCommonIndent(bs.Value)
  1403  	}()
  1404  
  1405  	// Do we have more symbol quotes?
  1406  	bs.Quote = ""
  1407  	for {
  1408  		r, eof := p.peek()
  1409  		if eof {
  1410  			p.errorf(bs.Range.Start, p.readerPos, `block string must be terminated with %v`, bs.Quote+"|")
  1411  			return bs
  1412  		}
  1413  
  1414  		if unicode.IsSpace(r) || unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' {
  1415  			p.rewind()
  1416  			break
  1417  		}
  1418  		p.commit()
  1419  		bs.Quote += string(r)
  1420  	}
  1421  
  1422  	// Do we have a tag?
  1423  	for {
  1424  		r, eof := p.peek()
  1425  		if eof {
  1426  			p.errorf(bs.Range.Start, p.readerPos, `block string must be terminated with %v`, bs.Quote+"|")
  1427  			return bs
  1428  		}
  1429  
  1430  		if unicode.IsSpace(r) {
  1431  			p.rewind()
  1432  			break
  1433  		}
  1434  		p.commit()
  1435  		bs.Tag += string(r)
  1436  	}
  1437  	if bs.Tag == "" {
  1438  		// TODO: no and fix compiler to not set text/markdown shape always.
  1439  		//       reason being not all multiline text is markdown by default.
  1440  		//       for example markdown edge labels or other random text.
  1441  		//       maybe we can be smart about this at some point and only set
  1442  		//       if the block string is being interpreted as markdown.
  1443  		bs.Tag = "md"
  1444  	}
  1445  
  1446  	// Skip non newline whitespace.
  1447  	for {
  1448  		r, eof := p.peek()
  1449  		if eof {
  1450  			p.errorf(bs.Range.Start, p.readerPos, `block string must be terminated with %v`, bs.Quote+"|")
  1451  			return bs
  1452  		}
  1453  		if !unicode.IsSpace(r) {
  1454  			// Non whitespace characters on the first line have an implicit indent.
  1455  			sb.WriteString(p.getIndent())
  1456  			p.rewind()
  1457  			break
  1458  		}
  1459  		p.commit()
  1460  		if r == '\n' {
  1461  			break
  1462  		}
  1463  	}
  1464  
  1465  	endHint := '|'
  1466  	endRest := ""
  1467  	if len(bs.Quote) > 0 {
  1468  		var size int
  1469  		endHint, size = utf8.DecodeLastRuneInString(bs.Quote)
  1470  		endRest = bs.Quote[size:] + "|"
  1471  	}
  1472  
  1473  	for {
  1474  		r, eof := p.read()
  1475  		if eof {
  1476  			p.errorf(bs.Range.Start, p.readerPos, `block string must be terminated with %v`, bs.Quote+"|")
  1477  			return bs
  1478  		}
  1479  
  1480  		if r != endHint {
  1481  			sb.WriteRune(r)
  1482  			continue
  1483  		}
  1484  
  1485  		s, eof := p.peekn(len(endRest))
  1486  		if eof {
  1487  			p.errorf(bs.Range.Start, p.readerPos, `block string must be terminated with %v`, bs.Quote+"|")
  1488  			return bs
  1489  		}
  1490  		if s != endRest {
  1491  			sb.WriteRune(endHint)
  1492  			p.rewind()
  1493  			continue
  1494  		}
  1495  		p.commit()
  1496  		return bs
  1497  	}
  1498  }
  1499  
  1500  func (p *parser) parseArray() *d2ast.Array {
  1501  	a := &d2ast.Array{
  1502  		Range: d2ast.Range{
  1503  			Path:  p.path,
  1504  			Start: p.pos.Subtract('[', p.utf16Pos),
  1505  		},
  1506  	}
  1507  	defer a.Range.End.From(&p.readerPos)
  1508  
  1509  	p.depth++
  1510  	defer dec(&p.depth)
  1511  
  1512  	for {
  1513  		r, eof := p.readNotSpace()
  1514  		if eof {
  1515  			p.errorf(a.Range.Start, p.readerPos, "arrays must be terminated with ]")
  1516  			return a
  1517  		}
  1518  
  1519  		switch r {
  1520  		case ';':
  1521  			continue
  1522  		case ']':
  1523  			return a
  1524  		}
  1525  
  1526  		n := p.parseArrayNode(r)
  1527  		if n.Unbox() != nil {
  1528  			a.Nodes = append(a.Nodes, n)
  1529  		}
  1530  
  1531  		if n.BlockComment != nil {
  1532  			// Anything after a block comment is ok.
  1533  			continue
  1534  		}
  1535  
  1536  		after := p.pos
  1537  		for {
  1538  			r, newlines, eof := p.peekNotSpace()
  1539  			if eof || newlines != 0 || r == ';' || r == ']' || r == '#' {
  1540  				p.rewind()
  1541  				break
  1542  			}
  1543  			p.commit()
  1544  		}
  1545  
  1546  		if after != p.pos {
  1547  			if n.Unbox() != nil {
  1548  				p.errorf(after, p.pos, "unexpected text after %v", n.Unbox().Type())
  1549  			} else {
  1550  				p.errorf(after, p.pos, "invalid text beginning unquoted string")
  1551  			}
  1552  		}
  1553  	}
  1554  }
  1555  
  1556  func (p *parser) parseArrayNode(r rune) d2ast.ArrayNodeBox {
  1557  	var box d2ast.ArrayNodeBox
  1558  
  1559  	switch r {
  1560  	case '#':
  1561  		box.Comment = p.parseComment()
  1562  		return box
  1563  	case '"':
  1564  		s, eof := p.peekn(2)
  1565  		if eof {
  1566  			break
  1567  		}
  1568  		if s != `""` {
  1569  			p.rewind()
  1570  			break
  1571  		}
  1572  		p.commit()
  1573  		box.BlockComment = p.parseBlockComment()
  1574  		return box
  1575  	case '.':
  1576  		s, eof := p.peekn(2)
  1577  		if eof {
  1578  			break
  1579  		}
  1580  		if s != ".." {
  1581  			p.rewind()
  1582  			break
  1583  		}
  1584  		r, eof := p.peek()
  1585  		if eof {
  1586  			break
  1587  		}
  1588  		if r == '$' {
  1589  			p.commit()
  1590  			box.Substitution = p.parseSubstitution(true)
  1591  			return box
  1592  		}
  1593  		if r == '@' {
  1594  			p.commit()
  1595  			box.Import = p.parseImport(true)
  1596  			return box
  1597  		}
  1598  		p.rewind()
  1599  		break
  1600  	}
  1601  
  1602  	p.replay(r)
  1603  	vbox := p.parseValue()
  1604  	if vbox.UnquotedString != nil && vbox.UnquotedString.ScalarString() == "" &&
  1605  		!(len(vbox.UnquotedString.Value) > 0 && vbox.UnquotedString.Value[0].Substitution != nil) {
  1606  		p.errorf(p.pos, p.pos.Advance(r, p.utf16Pos), "unquoted strings cannot start on %q", r)
  1607  	}
  1608  	box.Null = vbox.Null
  1609  	box.Boolean = vbox.Boolean
  1610  	box.Number = vbox.Number
  1611  	box.UnquotedString = vbox.UnquotedString
  1612  	box.DoubleQuotedString = vbox.DoubleQuotedString
  1613  	box.SingleQuotedString = vbox.SingleQuotedString
  1614  	box.BlockString = vbox.BlockString
  1615  	box.Array = vbox.Array
  1616  	box.Map = vbox.Map
  1617  	box.Import = vbox.Import
  1618  	return box
  1619  }
  1620  
  1621  func (p *parser) parseValue() d2ast.ValueBox {
  1622  	var box d2ast.ValueBox
  1623  
  1624  	r, newlines, eof := p.peekNotSpace()
  1625  	if eof || newlines > 0 {
  1626  		p.rewind()
  1627  		return box
  1628  	}
  1629  	p.commit()
  1630  
  1631  	switch r {
  1632  	case '[':
  1633  		box.Array = p.parseArray()
  1634  		return box
  1635  	case '{':
  1636  		box.Map = p.parseMap(false)
  1637  		return box
  1638  	case '@':
  1639  		box.Import = p.parseImport(false)
  1640  		return box
  1641  	}
  1642  
  1643  	p.replay(r)
  1644  	sb := p.parseString(false)
  1645  	if sb.DoubleQuotedString != nil {
  1646  		box.DoubleQuotedString = sb.DoubleQuotedString
  1647  		return box
  1648  	}
  1649  	if sb.SingleQuotedString != nil {
  1650  		box.SingleQuotedString = sb.SingleQuotedString
  1651  		return box
  1652  	}
  1653  	if sb.BlockString != nil {
  1654  		box.BlockString = sb.BlockString
  1655  		return box
  1656  	}
  1657  
  1658  	if sb.UnquotedString == nil {
  1659  		return box
  1660  	}
  1661  
  1662  	s := sb.UnquotedString
  1663  	if strings.EqualFold(s.ScalarString(), "null") {
  1664  		box.Null = &d2ast.Null{
  1665  			Range: s.Range,
  1666  		}
  1667  		return box
  1668  	}
  1669  
  1670  	if strings.EqualFold(s.ScalarString(), "true") {
  1671  		box.Boolean = &d2ast.Boolean{
  1672  			Range: s.Range,
  1673  			Value: true,
  1674  		}
  1675  		return box
  1676  	}
  1677  
  1678  	if strings.EqualFold(s.ScalarString(), "false") {
  1679  		box.Boolean = &d2ast.Boolean{
  1680  			Range: s.Range,
  1681  			Value: false,
  1682  		}
  1683  		return box
  1684  	}
  1685  
  1686  	// TODO: only if matches regex
  1687  	rat, ok := big.NewRat(0, 1).SetString(s.ScalarString())
  1688  	if ok {
  1689  		box.Number = &d2ast.Number{
  1690  			Range: s.Range,
  1691  			Raw:   s.ScalarString(),
  1692  			Value: rat,
  1693  		}
  1694  		return box
  1695  	}
  1696  
  1697  	box.UnquotedString = s
  1698  	return box
  1699  }
  1700  
  1701  func (p *parser) parseSubstitution(spread bool) *d2ast.Substitution {
  1702  	subst := &d2ast.Substitution{
  1703  		Range: d2ast.Range{
  1704  			Path:  p.path,
  1705  			Start: p.pos.SubtractString("$", p.utf16Pos),
  1706  		},
  1707  		Spread: spread,
  1708  	}
  1709  	defer subst.Range.End.From(&p.pos)
  1710  
  1711  	if subst.Spread {
  1712  		subst.Range.Start = subst.Range.Start.SubtractString("...", p.utf16Pos)
  1713  	}
  1714  
  1715  	r, newlines, eof := p.peekNotSpace()
  1716  	if eof {
  1717  		return nil
  1718  	}
  1719  	if newlines > 0 {
  1720  		p.rewind()
  1721  		return nil
  1722  	}
  1723  	if r != '{' {
  1724  		p.rewind()
  1725  		p.errorf(subst.Range.Start, p.readerPos, "substitutions must begin on {")
  1726  		return nil
  1727  	} else {
  1728  		p.commit()
  1729  	}
  1730  
  1731  	k := p.parseKey()
  1732  	if k != nil {
  1733  		subst.Path = k.Path
  1734  	}
  1735  
  1736  	r, newlines, eof = p.peekNotSpace()
  1737  	if eof {
  1738  		p.errorf(subst.Range.Start, p.readerPos, "substitutions must be terminated by }")
  1739  		return subst
  1740  	}
  1741  	if newlines > 0 || r != '}' {
  1742  		p.rewind()
  1743  		p.errorf(subst.Range.Start, p.pos, "substitutions must be terminated by }")
  1744  		return subst
  1745  	}
  1746  	p.commit()
  1747  
  1748  	return subst
  1749  }
  1750  
  1751  func (p *parser) parseImport(spread bool) *d2ast.Import {
  1752  	imp := &d2ast.Import{
  1753  		Range: d2ast.Range{
  1754  			Path:  p.path,
  1755  			Start: p.pos.SubtractString("$", p.utf16Pos),
  1756  		},
  1757  		Spread: spread,
  1758  	}
  1759  	defer imp.Range.End.From(&p.pos)
  1760  
  1761  	if imp.Spread {
  1762  		imp.Range.Start = imp.Range.Start.SubtractString("...", p.utf16Pos)
  1763  	}
  1764  
  1765  	var pre strings.Builder
  1766  	for {
  1767  		r, eof := p.peek()
  1768  		if eof {
  1769  			break
  1770  		}
  1771  		if r != '.' && r != '/' {
  1772  			p.rewind()
  1773  			break
  1774  		}
  1775  		pre.WriteRune(r)
  1776  		p.commit()
  1777  	}
  1778  	imp.Pre = pre.String()
  1779  
  1780  	k := p.parseKey()
  1781  	if k == nil {
  1782  		return imp
  1783  	}
  1784  	if k.Path[0].UnquotedString != nil && len(k.Path) > 1 && k.Path[1].UnquotedString != nil && k.Path[1].Unbox().ScalarString() == "d2" {
  1785  		k.Path = append(k.Path[:1], k.Path[2:]...)
  1786  	}
  1787  	imp.Path = k.Path
  1788  	return imp
  1789  }
  1790  
  1791  // func marshalKey(k *d2ast.Key) string {
  1792  // 	var sb strings.Builder
  1793  // 	for i, s := range k.Path {
  1794  // 		// TODO: Need to encode specials and quotes.
  1795  // 		sb.WriteString(s.Unbox().ScalarString())
  1796  // 		if i < len(k.Path)-1 {
  1797  // 			sb.WriteByte('.')
  1798  // 		}
  1799  // 	}
  1800  // 	return sb.String()
  1801  // }
  1802  
  1803  func dec(i *int) {
  1804  	*i -= 1
  1805  }
  1806  
  1807  func (p *parser) getIndent() string {
  1808  	return strings.Repeat(" ", p.depth*2)
  1809  }
  1810  
  1811  func trimIndent(s, indent string) string {
  1812  	lines := strings.Split(s, "\n")
  1813  	for i, l := range lines {
  1814  		if l == "" {
  1815  			continue
  1816  		}
  1817  		_, l = splitLeadingIndent(l, len(indent))
  1818  		lines[i] = l
  1819  	}
  1820  	return strings.Join(lines, "\n")
  1821  }
  1822  
  1823  func trimCommonIndent(s string) string {
  1824  	commonIndent := ""
  1825  	for _, l := range strings.Split(s, "\n") {
  1826  		if l == "" {
  1827  			continue
  1828  		}
  1829  		lineIndent, l := splitLeadingIndent(l, -1)
  1830  		if lineIndent == "" {
  1831  			// No common indent return as is.
  1832  			return s
  1833  		}
  1834  		if l == "" {
  1835  			// Whitespace only line.
  1836  			continue
  1837  		}
  1838  		if commonIndent == "" || len(lineIndent) < len(commonIndent) {
  1839  			commonIndent = lineIndent
  1840  		}
  1841  	}
  1842  	if commonIndent == "" {
  1843  		return s
  1844  	}
  1845  	return trimIndent(s, commonIndent)
  1846  }
  1847  
  1848  func splitLeadingIndent(s string, maxSpaces int) (indent, rets string) {
  1849  	var indentb strings.Builder
  1850  	i := 0
  1851  	for _, r := range s {
  1852  		if !unicode.IsSpace(r) {
  1853  			break
  1854  		}
  1855  		i++
  1856  		if r != '\t' {
  1857  			indentb.WriteRune(r)
  1858  		} else {
  1859  			indentb.WriteByte(' ')
  1860  			indentb.WriteByte(' ')
  1861  		}
  1862  		if maxSpaces > -1 && indentb.Len() == maxSpaces {
  1863  			break
  1864  		}
  1865  	}
  1866  	return indentb.String(), s[i:]
  1867  }
  1868
View as plain text