lexer.go

Documentation: github.com/alecthomas/chroma

     1  package chroma
     2  
     3  import (
     4  	"fmt"
     5  	"strings"
     6  )
     7  
     8  var (
     9  	defaultOptions = &TokeniseOptions{
    10  		State:    "root",
    11  		EnsureLF: true,
    12  	}
    13  )
    14  
    15  // Config for a lexer.
    16  type Config struct {
    17  	// Name of the lexer.
    18  	Name string
    19  
    20  	// Shortcuts for the lexer
    21  	Aliases []string
    22  
    23  	// File name globs
    24  	Filenames []string
    25  
    26  	// Secondary file name globs
    27  	AliasFilenames []string
    28  
    29  	// MIME types
    30  	MimeTypes []string
    31  
    32  	// Regex matching is case-insensitive.
    33  	CaseInsensitive bool
    34  
    35  	// Regex matches all characters.
    36  	DotAll bool
    37  
    38  	// Regex does not match across lines ($ matches EOL).
    39  	//
    40  	// Defaults to multiline.
    41  	NotMultiline bool
    42  
    43  	// Don't strip leading and trailing newlines from the input.
    44  	// DontStripNL bool
    45  
    46  	// Strip all leading and trailing whitespace from the input
    47  	// StripAll bool
    48  
    49  	// Make sure that the input ends with a newline. This
    50  	// is required for some lexers that consume input linewise.
    51  	EnsureNL bool
    52  
    53  	// If given and greater than 0, expand tabs in the input.
    54  	// TabSize int
    55  
    56  	// Priority of lexer.
    57  	//
    58  	// If this is 0 it will be treated as a default of 1.
    59  	Priority float32
    60  }
    61  
    62  // Token output to formatter.
    63  type Token struct {
    64  	Type  TokenType `json:"type"`
    65  	Value string    `json:"value"`
    66  }
    67  
    68  func (t *Token) String() string   { return t.Value }
    69  func (t *Token) GoString() string { return fmt.Sprintf("&Token{%s, %q}", t.Type, t.Value) }
    70  
    71  // Clone returns a clone of the Token.
    72  func (t *Token) Clone() Token {
    73  	return *t
    74  }
    75  
    76  // EOF is returned by lexers at the end of input.
    77  var EOF Token
    78  
    79  // TokeniseOptions contains options for tokenisers.
    80  type TokeniseOptions struct {
    81  	// State to start tokenisation in. Defaults to "root".
    82  	State string
    83  	// Nested tokenisation.
    84  	Nested bool
    85  
    86  	// If true, all EOLs are converted into LF
    87  	// by replacing CRLF and CR
    88  	EnsureLF bool
    89  }
    90  
    91  // A Lexer for tokenising source code.
    92  type Lexer interface {
    93  	// Config describing the features of the Lexer.
    94  	Config() *Config
    95  	// Tokenise returns an Iterator over tokens in text.
    96  	Tokenise(options *TokeniseOptions, text string) (Iterator, error)
    97  }
    98  
    99  // Lexers is a slice of lexers sortable by name.
   100  type Lexers []Lexer
   101  
   102  func (l Lexers) Len() int      { return len(l) }
   103  func (l Lexers) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
   104  func (l Lexers) Less(i, j int) bool {
   105  	return strings.ToLower(l[i].Config().Name) < strings.ToLower(l[j].Config().Name)
   106  }
   107  
   108  // PrioritisedLexers is a slice of lexers sortable by priority.
   109  type PrioritisedLexers []Lexer
   110  
   111  func (l PrioritisedLexers) Len() int      { return len(l) }
   112  func (l PrioritisedLexers) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
   113  func (l PrioritisedLexers) Less(i, j int) bool {
   114  	ip := l[i].Config().Priority
   115  	if ip == 0 {
   116  		ip = 1
   117  	}
   118  	jp := l[j].Config().Priority
   119  	if jp == 0 {
   120  		jp = 1
   121  	}
   122  	return ip > jp
   123  }
   124  
   125  // Analyser determines how appropriate this lexer is for the given text.
   126  type Analyser interface {
   127  	AnalyseText(text string) float32
   128  }
   129
View as plain text