...

Source file src/github.com/alecthomas/chroma/v2/emitters.go

Documentation: github.com/alecthomas/chroma/v2

     1  package chroma
     2  
     3  import (
     4  	"fmt"
     5  )
     6  
     7  // An Emitter takes group matches and returns tokens.
     8  type Emitter interface {
     9  	// Emit tokens for the given regex groups.
    10  	Emit(groups []string, state *LexerState) Iterator
    11  }
    12  
    13  // SerialisableEmitter is an Emitter that can be serialised and deserialised to/from JSON.
    14  type SerialisableEmitter interface {
    15  	Emitter
    16  	EmitterKind() string
    17  }
    18  
    19  // EmitterFunc is a function that is an Emitter.
    20  type EmitterFunc func(groups []string, state *LexerState) Iterator
    21  
    22  // Emit tokens for groups.
    23  func (e EmitterFunc) Emit(groups []string, state *LexerState) Iterator {
    24  	return e(groups, state)
    25  }
    26  
    27  type Emitters []Emitter
    28  
    29  type byGroupsEmitter struct {
    30  	Emitters
    31  }
    32  
    33  // ByGroups emits a token for each matching group in the rule's regex.
    34  func ByGroups(emitters ...Emitter) Emitter {
    35  	return &byGroupsEmitter{Emitters: emitters}
    36  }
    37  
    38  func (b *byGroupsEmitter) EmitterKind() string { return "bygroups" }
    39  
    40  func (b *byGroupsEmitter) Emit(groups []string, state *LexerState) Iterator {
    41  	iterators := make([]Iterator, 0, len(groups)-1)
    42  	if len(b.Emitters) != len(groups)-1 {
    43  		iterators = append(iterators, Error.Emit(groups, state))
    44  		// panic(errors.Errorf("number of groups %q does not match number of emitters %v", groups, emitters))
    45  	} else {
    46  		for i, group := range groups[1:] {
    47  			if b.Emitters[i] != nil {
    48  				iterators = append(iterators, b.Emitters[i].Emit([]string{group}, state))
    49  			}
    50  		}
    51  	}
    52  	return Concaterator(iterators...)
    53  }
    54  
    55  // ByGroupNames emits a token for each named matching group in the rule's regex.
    56  func ByGroupNames(emitters map[string]Emitter) Emitter {
    57  	return EmitterFunc(func(groups []string, state *LexerState) Iterator {
    58  		iterators := make([]Iterator, 0, len(state.NamedGroups)-1)
    59  		if len(state.NamedGroups)-1 == 0 {
    60  			if emitter, ok := emitters[`0`]; ok {
    61  				iterators = append(iterators, emitter.Emit(groups, state))
    62  			} else {
    63  				iterators = append(iterators, Error.Emit(groups, state))
    64  			}
    65  		} else {
    66  			ruleRegex := state.Rules[state.State][state.Rule].Regexp
    67  			for i := 1; i < len(state.NamedGroups); i++ {
    68  				groupName := ruleRegex.GroupNameFromNumber(i)
    69  				group := state.NamedGroups[groupName]
    70  				if emitter, ok := emitters[groupName]; ok {
    71  					if emitter != nil {
    72  						iterators = append(iterators, emitter.Emit([]string{group}, state))
    73  					}
    74  				} else {
    75  					iterators = append(iterators, Error.Emit([]string{group}, state))
    76  				}
    77  			}
    78  		}
    79  		return Concaterator(iterators...)
    80  	})
    81  }
    82  
    83  // UsingByGroup emits tokens for the matched groups in the regex using a
    84  // sublexer. Used when lexing code blocks where the name of a sublexer is
    85  // contained within the block, for example on a Markdown text block or SQL
    86  // language block.
    87  //
    88  // An attempt to load the sublexer will be made using the captured value from
    89  // the text of the matched sublexerNameGroup. If a sublexer matching the
    90  // sublexerNameGroup is available, then tokens for the matched codeGroup will
    91  // be emitted using the sublexer. Otherwise, if no sublexer is available, then
    92  // tokens will be emitted from the passed emitter.
    93  //
    94  // Example:
    95  //
    96  //	var Markdown = internal.Register(MustNewLexer(
    97  //		&Config{
    98  //			Name:      "markdown",
    99  //			Aliases:   []string{"md", "mkd"},
   100  //			Filenames: []string{"*.md", "*.mkd", "*.markdown"},
   101  //			MimeTypes: []string{"text/x-markdown"},
   102  //		},
   103  //		Rules{
   104  //			"root": {
   105  //				{"^(```)(\\w+)(\\n)([\\w\\W]*?)(^```$)",
   106  //					UsingByGroup(
   107  //						2, 4,
   108  //						String, String, String, Text, String,
   109  //					),
   110  //					nil,
   111  //				},
   112  //			},
   113  //		},
   114  //	))
   115  //
   116  // See the lexers/markdown.go for the complete example.
   117  //
   118  // Note: panic's if the number of emitters does not equal the number of matched
   119  // groups in the regex.
   120  func UsingByGroup(sublexerNameGroup, codeGroup int, emitters ...Emitter) Emitter {
   121  	return &usingByGroup{
   122  		SublexerNameGroup: sublexerNameGroup,
   123  		CodeGroup:         codeGroup,
   124  		Emitters:          emitters,
   125  	}
   126  }
   127  
   128  type usingByGroup struct {
   129  	SublexerNameGroup int      `xml:"sublexer_name_group"`
   130  	CodeGroup         int      `xml:"code_group"`
   131  	Emitters          Emitters `xml:"emitters"`
   132  }
   133  
   134  func (u *usingByGroup) EmitterKind() string { return "usingbygroup" }
   135  func (u *usingByGroup) Emit(groups []string, state *LexerState) Iterator {
   136  	// bounds check
   137  	if len(u.Emitters) != len(groups)-1 {
   138  		panic("UsingByGroup expects number of emitters to be the same as len(groups)-1")
   139  	}
   140  
   141  	// grab sublexer
   142  	sublexer := state.Registry.Get(groups[u.SublexerNameGroup])
   143  
   144  	// build iterators
   145  	iterators := make([]Iterator, len(groups)-1)
   146  	for i, group := range groups[1:] {
   147  		if i == u.CodeGroup-1 && sublexer != nil {
   148  			var err error
   149  			iterators[i], err = sublexer.Tokenise(nil, groups[u.CodeGroup])
   150  			if err != nil {
   151  				panic(err)
   152  			}
   153  		} else if u.Emitters[i] != nil {
   154  			iterators[i] = u.Emitters[i].Emit([]string{group}, state)
   155  		}
   156  	}
   157  	return Concaterator(iterators...)
   158  }
   159  
   160  // UsingLexer returns an Emitter that uses a given Lexer for parsing and emitting.
   161  //
   162  // This Emitter is not serialisable.
   163  func UsingLexer(lexer Lexer) Emitter {
   164  	return EmitterFunc(func(groups []string, _ *LexerState) Iterator {
   165  		it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0])
   166  		if err != nil {
   167  			panic(err)
   168  		}
   169  		return it
   170  	})
   171  }
   172  
   173  type usingEmitter struct {
   174  	Lexer string `xml:"lexer,attr"`
   175  }
   176  
   177  func (u *usingEmitter) EmitterKind() string { return "using" }
   178  
   179  func (u *usingEmitter) Emit(groups []string, state *LexerState) Iterator {
   180  	if state.Registry == nil {
   181  		panic(fmt.Sprintf("no LexerRegistry available for Using(%q)", u.Lexer))
   182  	}
   183  	lexer := state.Registry.Get(u.Lexer)
   184  	if lexer == nil {
   185  		panic(fmt.Sprintf("no such lexer %q", u.Lexer))
   186  	}
   187  	it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0])
   188  	if err != nil {
   189  		panic(err)
   190  	}
   191  	return it
   192  }
   193  
   194  // Using returns an Emitter that uses a given Lexer reference for parsing and emitting.
   195  //
   196  // The referenced lexer must be stored in the same LexerRegistry.
   197  func Using(lexer string) Emitter {
   198  	return &usingEmitter{Lexer: lexer}
   199  }
   200  
   201  type usingSelfEmitter struct {
   202  	State string `xml:"state,attr"`
   203  }
   204  
   205  func (u *usingSelfEmitter) EmitterKind() string { return "usingself" }
   206  
   207  func (u *usingSelfEmitter) Emit(groups []string, state *LexerState) Iterator {
   208  	it, err := state.Lexer.Tokenise(&TokeniseOptions{State: u.State, Nested: true}, groups[0])
   209  	if err != nil {
   210  		panic(err)
   211  	}
   212  	return it
   213  }
   214  
   215  // UsingSelf is like Using, but uses the current Lexer.
   216  func UsingSelf(stateName string) Emitter {
   217  	return &usingSelfEmitter{stateName}
   218  }
   219  

View as plain text