delegate.go

Documentation: github.com/alecthomas/chroma

     1  package chroma
     2  
     3  import (
     4  	"bytes"
     5  )
     6  
     7  type delegatingLexer struct {
     8  	root     Lexer
     9  	language Lexer
    10  }
    11  
    12  // DelegatingLexer combines two lexers to handle the common case of a language embedded inside another, such as PHP
    13  // inside HTML or PHP inside plain text.
    14  //
    15  // It takes two lexer as arguments: a root lexer and a language lexer.  First everything is scanned using the language
    16  // lexer, which must return "Other" for unrecognised tokens. Then all "Other" tokens are lexed using the root lexer.
    17  // Finally, these two sets of tokens are merged.
    18  //
    19  // The lexers from the template lexer package use this base lexer.
    20  func DelegatingLexer(root Lexer, language Lexer) Lexer {
    21  	return &delegatingLexer{
    22  		root:     root,
    23  		language: language,
    24  	}
    25  }
    26  
    27  func (d *delegatingLexer) Config() *Config {
    28  	return d.language.Config()
    29  }
    30  
    31  // An insertion is the character range where language tokens should be inserted.
    32  type insertion struct {
    33  	start, end int
    34  	tokens     []Token
    35  }
    36  
    37  func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) { // nolint: gocognit
    38  	tokens, err := Tokenise(Coalesce(d.language), options, text)
    39  	if err != nil {
    40  		return nil, err
    41  	}
    42  	// Compute insertions and gather "Other" tokens.
    43  	others := &bytes.Buffer{}
    44  	insertions := []*insertion{}
    45  	var insert *insertion
    46  	offset := 0
    47  	var last Token
    48  	for _, t := range tokens {
    49  		if t.Type == Other {
    50  			if last != EOF && insert != nil && last.Type != Other {
    51  				insert.end = offset
    52  			}
    53  			others.WriteString(t.Value)
    54  		} else {
    55  			if last == EOF || last.Type == Other {
    56  				insert = &insertion{start: offset}
    57  				insertions = append(insertions, insert)
    58  			}
    59  			insert.tokens = append(insert.tokens, t)
    60  		}
    61  		last = t
    62  		offset += len(t.Value)
    63  	}
    64  
    65  	if len(insertions) == 0 {
    66  		return d.root.Tokenise(options, text)
    67  	}
    68  
    69  	// Lex the other tokens.
    70  	rootTokens, err := Tokenise(Coalesce(d.root), options, others.String())
    71  	if err != nil {
    72  		return nil, err
    73  	}
    74  
    75  	// Interleave the two sets of tokens.
    76  	var out []Token
    77  	offset = 0 // Offset into text.
    78  	tokenIndex := 0
    79  	nextToken := func() Token {
    80  		if tokenIndex >= len(rootTokens) {
    81  			return EOF
    82  		}
    83  		t := rootTokens[tokenIndex]
    84  		tokenIndex++
    85  		return t
    86  	}
    87  	insertionIndex := 0
    88  	nextInsertion := func() *insertion {
    89  		if insertionIndex >= len(insertions) {
    90  			return nil
    91  		}
    92  		i := insertions[insertionIndex]
    93  		insertionIndex++
    94  		return i
    95  	}
    96  	t := nextToken()
    97  	i := nextInsertion()
    98  	for t != EOF || i != nil {
    99  		// fmt.Printf("%d->%d:%q   %d->%d:%q\n", offset, offset+len(t.Value), t.Value, i.start, i.end, Stringify(i.tokens...))
   100  		if t == EOF || (i != nil && i.start < offset+len(t.Value)) {
   101  			var l Token
   102  			l, t = splitToken(t, i.start-offset)
   103  			if l != EOF {
   104  				out = append(out, l)
   105  				offset += len(l.Value)
   106  			}
   107  			out = append(out, i.tokens...)
   108  			offset += i.end - i.start
   109  			if t == EOF {
   110  				t = nextToken()
   111  			}
   112  			i = nextInsertion()
   113  		} else {
   114  			out = append(out, t)
   115  			offset += len(t.Value)
   116  			t = nextToken()
   117  		}
   118  	}
   119  	return Literator(out...), nil
   120  }
   121  
   122  func splitToken(t Token, offset int) (l Token, r Token) {
   123  	if t == EOF {
   124  		return EOF, EOF
   125  	}
   126  	if offset == 0 {
   127  		return EOF, t
   128  	}
   129  	if offset == len(t.Value) {
   130  		return t, EOF
   131  	}
   132  	l = t.Clone()
   133  	r = t.Clone()
   134  	l.Value = l.Value[:offset]
   135  	r.Value = r.Value[offset:]
   136  	return
   137  }
   138
View as plain text