lexer.go

Documentation: github.com/alecthomas/chroma/v2

     1  package chroma
     2  
     3  import (
     4  	"fmt"
     5  	"strings"
     6  )
     7  
     8  var (
     9  	defaultOptions = &TokeniseOptions{
    10  		State:    "root",
    11  		EnsureLF: true,
    12  	}
    13  )
    14  
    15  // Config for a lexer.
    16  type Config struct {
    17  	// Name of the lexer.
    18  	Name string `xml:"name,omitempty"`
    19  
    20  	// Shortcuts for the lexer
    21  	Aliases []string `xml:"alias,omitempty"`
    22  
    23  	// File name globs
    24  	Filenames []string `xml:"filename,omitempty"`
    25  
    26  	// Secondary file name globs
    27  	AliasFilenames []string `xml:"alias_filename,omitempty"`
    28  
    29  	// MIME types
    30  	MimeTypes []string `xml:"mime_type,omitempty"`
    31  
    32  	// Regex matching is case-insensitive.
    33  	CaseInsensitive bool `xml:"case_insensitive,omitempty"`
    34  
    35  	// Regex matches all characters.
    36  	DotAll bool `xml:"dot_all,omitempty"`
    37  
    38  	// Regex does not match across lines ($ matches EOL).
    39  	//
    40  	// Defaults to multiline.
    41  	NotMultiline bool `xml:"not_multiline,omitempty"`
    42  
    43  	// Don't strip leading and trailing newlines from the input.
    44  	// DontStripNL bool
    45  
    46  	// Strip all leading and trailing whitespace from the input
    47  	// StripAll bool
    48  
    49  	// Make sure that the input ends with a newline. This
    50  	// is required for some lexers that consume input linewise.
    51  	EnsureNL bool `xml:"ensure_nl,omitempty"`
    52  
    53  	// If given and greater than 0, expand tabs in the input.
    54  	// TabSize int
    55  
    56  	// Priority of lexer.
    57  	//
    58  	// If this is 0 it will be treated as a default of 1.
    59  	Priority float32 `xml:"priority,omitempty"`
    60  }
    61  
    62  // Token output to formatter.
    63  type Token struct {
    64  	Type  TokenType `json:"type"`
    65  	Value string    `json:"value"`
    66  }
    67  
    68  func (t *Token) String() string   { return t.Value }
    69  func (t *Token) GoString() string { return fmt.Sprintf("&Token{%s, %q}", t.Type, t.Value) }
    70  
    71  // Clone returns a clone of the Token.
    72  func (t *Token) Clone() Token {
    73  	return *t
    74  }
    75  
    76  // EOF is returned by lexers at the end of input.
    77  var EOF Token
    78  
    79  // TokeniseOptions contains options for tokenisers.
    80  type TokeniseOptions struct {
    81  	// State to start tokenisation in. Defaults to "root".
    82  	State string
    83  	// Nested tokenisation.
    84  	Nested bool
    85  
    86  	// If true, all EOLs are converted into LF
    87  	// by replacing CRLF and CR
    88  	EnsureLF bool
    89  }
    90  
    91  // A Lexer for tokenising source code.
    92  type Lexer interface {
    93  	// Config describing the features of the Lexer.
    94  	Config() *Config
    95  	// Tokenise returns an Iterator over tokens in text.
    96  	Tokenise(options *TokeniseOptions, text string) (Iterator, error)
    97  	// SetRegistry sets the registry this Lexer is associated with.
    98  	//
    99  	// The registry should be used by the Lexer if it needs to look up other
   100  	// lexers.
   101  	SetRegistry(registry *LexerRegistry) Lexer
   102  	// SetAnalyser sets a function the Lexer should use for scoring how
   103  	// likely a fragment of text is to match this lexer, between 0.0 and 1.0.
   104  	// A value of 1 indicates high confidence.
   105  	//
   106  	// Lexers may ignore this if they implement their own analysers.
   107  	SetAnalyser(analyser func(text string) float32) Lexer
   108  	// AnalyseText scores how likely a fragment of text is to match
   109  	// this lexer, between 0.0 and 1.0. A value of 1 indicates high confidence.
   110  	AnalyseText(text string) float32
   111  }
   112  
   113  // Lexers is a slice of lexers sortable by name.
   114  type Lexers []Lexer
   115  
   116  func (l Lexers) Len() int      { return len(l) }
   117  func (l Lexers) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
   118  func (l Lexers) Less(i, j int) bool {
   119  	return strings.ToLower(l[i].Config().Name) < strings.ToLower(l[j].Config().Name)
   120  }
   121  
   122  // PrioritisedLexers is a slice of lexers sortable by priority.
   123  type PrioritisedLexers []Lexer
   124  
   125  func (l PrioritisedLexers) Len() int      { return len(l) }
   126  func (l PrioritisedLexers) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
   127  func (l PrioritisedLexers) Less(i, j int) bool {
   128  	ip := l[i].Config().Priority
   129  	if ip == 0 {
   130  		ip = 1
   131  	}
   132  	jp := l[j].Config().Priority
   133  	if jp == 0 {
   134  		jp = 1
   135  	}
   136  	return ip > jp
   137  }
   138  
   139  // Analyser determines how appropriate this lexer is for the given text.
   140  type Analyser interface {
   141  	AnalyseText(text string) float32
   142  }
   143
View as plain text