1 package chroma
2
3 import (
4 "compress/gzip"
5 "encoding/xml"
6 "errors"
7 "fmt"
8 "io"
9 "io/fs"
10 "path/filepath"
11 "reflect"
12 "regexp"
13 "strings"
14 )
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54 var (
55
56 ErrNotSerialisable = fmt.Errorf("not serialisable")
57 emitterTemplates = func() map[string]SerialisableEmitter {
58 out := map[string]SerialisableEmitter{}
59 for _, emitter := range []SerialisableEmitter{
60 &byGroupsEmitter{},
61 &usingSelfEmitter{},
62 TokenType(0),
63 &usingEmitter{},
64 &usingByGroup{},
65 } {
66 out[emitter.EmitterKind()] = emitter
67 }
68 return out
69 }()
70 mutatorTemplates = func() map[string]SerialisableMutator {
71 out := map[string]SerialisableMutator{}
72 for _, mutator := range []SerialisableMutator{
73 &includeMutator{},
74 &combinedMutator{},
75 &multiMutator{},
76 &pushMutator{},
77 &popMutator{},
78 } {
79 out[mutator.MutatorKind()] = mutator
80 }
81 return out
82 }()
83 )
84
85
86 func fastUnmarshalConfig(from fs.FS, path string) (*Config, error) {
87 r, err := from.Open(path)
88 if err != nil {
89 return nil, err
90 }
91 defer r.Close()
92 dec := xml.NewDecoder(r)
93 for {
94 token, err := dec.Token()
95 if err != nil {
96 if errors.Is(err, io.EOF) {
97 return nil, fmt.Errorf("could not find <config> element")
98 }
99 return nil, err
100 }
101 switch se := token.(type) {
102 case xml.StartElement:
103 if se.Name.Local != "config" {
104 break
105 }
106
107 var config Config
108 err = dec.DecodeElement(&config, &se)
109 if err != nil {
110 panic(err)
111 }
112 return &config, nil
113 }
114 }
115 }
116
117
118 func MustNewXMLLexer(from fs.FS, path string) *RegexLexer {
119 lex, err := NewXMLLexer(from, path)
120 if err != nil {
121 panic(err)
122 }
123 return lex
124 }
125
126
127 func NewXMLLexer(from fs.FS, path string) (*RegexLexer, error) {
128 config, err := fastUnmarshalConfig(from, path)
129 if err != nil {
130 return nil, err
131 }
132 for _, glob := range append(config.Filenames, config.AliasFilenames...) {
133 _, err := filepath.Match(glob, "")
134 if err != nil {
135 return nil, fmt.Errorf("%s: %q is not a valid glob: %w", config.Name, glob, err)
136 }
137 }
138 return &RegexLexer{
139 config: config,
140 fetchRulesFunc: func() (Rules, error) {
141 var lexer struct {
142 Config
143 Rules Rules `xml:"rules"`
144 }
145
146 fr, err := from.Open(path)
147 if err != nil {
148 if errors.Is(err, fs.ErrNotExist) {
149 path += ".gz"
150 fr, err = from.Open(path)
151 if err != nil {
152 return nil, err
153 }
154 } else {
155 return nil, err
156 }
157 }
158 defer fr.Close()
159 var r io.Reader = fr
160 if strings.HasSuffix(path, ".gz") {
161 r, err = gzip.NewReader(r)
162 if err != nil {
163 return nil, fmt.Errorf("%s: %w", path, err)
164 }
165 }
166 err = xml.NewDecoder(r).Decode(&lexer)
167 if err != nil {
168 return nil, fmt.Errorf("%s: %w", path, err)
169 }
170 return lexer.Rules, nil
171 },
172 }, nil
173 }
174
175
176 func Marshal(l *RegexLexer) ([]byte, error) {
177 type lexer struct {
178 Config Config `xml:"config"`
179 Rules Rules `xml:"rules"`
180 }
181
182 rules, err := l.Rules()
183 if err != nil {
184 return nil, err
185 }
186 root := &lexer{
187 Config: *l.Config(),
188 Rules: rules,
189 }
190 data, err := xml.MarshalIndent(root, "", " ")
191 if err != nil {
192 return nil, err
193 }
194 re := regexp.MustCompile(`></[a-zA-Z]+>`)
195 data = re.ReplaceAll(data, []byte(`/>`))
196 return data, nil
197 }
198
199
200 func Unmarshal(data []byte) (*RegexLexer, error) {
201 type lexer struct {
202 Config Config `xml:"config"`
203 Rules Rules `xml:"rules"`
204 }
205 root := &lexer{}
206 err := xml.Unmarshal(data, root)
207 if err != nil {
208 return nil, fmt.Errorf("invalid Lexer XML: %w", err)
209 }
210 lex, err := NewLexer(&root.Config, func() Rules { return root.Rules })
211 if err != nil {
212 return nil, err
213 }
214 return lex, nil
215 }
216
217 func marshalMutator(e *xml.Encoder, mutator Mutator) error {
218 if mutator == nil {
219 return nil
220 }
221 smutator, ok := mutator.(SerialisableMutator)
222 if !ok {
223 return fmt.Errorf("unsupported mutator: %w", ErrNotSerialisable)
224 }
225 return e.EncodeElement(mutator, xml.StartElement{Name: xml.Name{Local: smutator.MutatorKind()}})
226 }
227
228 func unmarshalMutator(d *xml.Decoder, start xml.StartElement) (Mutator, error) {
229 kind := start.Name.Local
230 mutator, ok := mutatorTemplates[kind]
231 if !ok {
232 return nil, fmt.Errorf("unknown mutator %q: %w", kind, ErrNotSerialisable)
233 }
234 value, target := newFromTemplate(mutator)
235 if err := d.DecodeElement(target, &start); err != nil {
236 return nil, err
237 }
238 return value().(SerialisableMutator), nil
239 }
240
241 func marshalEmitter(e *xml.Encoder, emitter Emitter) error {
242 if emitter == nil {
243 return nil
244 }
245 semitter, ok := emitter.(SerialisableEmitter)
246 if !ok {
247 return fmt.Errorf("unsupported emitter %T: %w", emitter, ErrNotSerialisable)
248 }
249 return e.EncodeElement(emitter, xml.StartElement{
250 Name: xml.Name{Local: semitter.EmitterKind()},
251 })
252 }
253
254 func unmarshalEmitter(d *xml.Decoder, start xml.StartElement) (Emitter, error) {
255 kind := start.Name.Local
256 mutator, ok := emitterTemplates[kind]
257 if !ok {
258 return nil, fmt.Errorf("unknown emitter %q: %w", kind, ErrNotSerialisable)
259 }
260 value, target := newFromTemplate(mutator)
261 if err := d.DecodeElement(target, &start); err != nil {
262 return nil, err
263 }
264 return value().(SerialisableEmitter), nil
265 }
266
267 func (r Rule) MarshalXML(e *xml.Encoder, _ xml.StartElement) error {
268 start := xml.StartElement{
269 Name: xml.Name{Local: "rule"},
270 }
271 if r.Pattern != "" {
272 start.Attr = append(start.Attr, xml.Attr{
273 Name: xml.Name{Local: "pattern"},
274 Value: r.Pattern,
275 })
276 }
277 if err := e.EncodeToken(start); err != nil {
278 return err
279 }
280 if err := marshalEmitter(e, r.Type); err != nil {
281 return err
282 }
283 if err := marshalMutator(e, r.Mutator); err != nil {
284 return err
285 }
286 return e.EncodeToken(xml.EndElement{Name: start.Name})
287 }
288
289 func (r *Rule) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
290 for _, attr := range start.Attr {
291 if attr.Name.Local == "pattern" {
292 r.Pattern = attr.Value
293 break
294 }
295 }
296 for {
297 token, err := d.Token()
298 if err != nil {
299 return err
300 }
301 switch token := token.(type) {
302 case xml.StartElement:
303 mutator, err := unmarshalMutator(d, token)
304 if err != nil && !errors.Is(err, ErrNotSerialisable) {
305 return err
306 } else if err == nil {
307 if r.Mutator != nil {
308 return fmt.Errorf("duplicate mutator")
309 }
310 r.Mutator = mutator
311 continue
312 }
313 emitter, err := unmarshalEmitter(d, token)
314 if err != nil && !errors.Is(err, ErrNotSerialisable) {
315 return err
316 } else if err == nil {
317 if r.Type != nil {
318 return fmt.Errorf("duplicate emitter")
319 }
320 r.Type = emitter
321 continue
322 } else {
323 return err
324 }
325
326 case xml.EndElement:
327 return nil
328 }
329 }
330 }
331
332 type xmlRuleState struct {
333 Name string `xml:"name,attr"`
334 Rules []Rule `xml:"rule"`
335 }
336
337 type xmlRules struct {
338 States []xmlRuleState `xml:"state"`
339 }
340
341 func (r Rules) MarshalXML(e *xml.Encoder, _ xml.StartElement) error {
342 xr := xmlRules{}
343 for state, rules := range r {
344 xr.States = append(xr.States, xmlRuleState{
345 Name: state,
346 Rules: rules,
347 })
348 }
349 return e.EncodeElement(xr, xml.StartElement{Name: xml.Name{Local: "rules"}})
350 }
351
352 func (r *Rules) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
353 xr := xmlRules{}
354 if err := d.DecodeElement(&xr, &start); err != nil {
355 return err
356 }
357 if *r == nil {
358 *r = Rules{}
359 }
360 for _, state := range xr.States {
361 (*r)[state.Name] = state.Rules
362 }
363 return nil
364 }
365
366 type xmlTokenType struct {
367 Type string `xml:"type,attr"`
368 }
369
370 func (t *TokenType) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
371 el := xmlTokenType{}
372 if err := d.DecodeElement(&el, &start); err != nil {
373 return err
374 }
375 tt, err := TokenTypeString(el.Type)
376 if err != nil {
377 return err
378 }
379 *t = tt
380 return nil
381 }
382
383 func (t TokenType) MarshalXML(e *xml.Encoder, start xml.StartElement) error {
384 start.Attr = append(start.Attr, xml.Attr{Name: xml.Name{Local: "type"}, Value: t.String()})
385 if err := e.EncodeToken(start); err != nil {
386 return err
387 }
388 return e.EncodeToken(xml.EndElement{Name: start.Name})
389 }
390
391
392 func newFromTemplate(template interface{}) (value func() interface{}, target interface{}) {
393 t := reflect.TypeOf(template)
394 if t.Kind() == reflect.Ptr {
395 v := reflect.New(t.Elem())
396 return v.Interface, v.Interface()
397 }
398 v := reflect.New(t)
399 return func() interface{} { return v.Elem().Interface() }, v.Interface()
400 }
401
402 func (b *Emitters) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
403 for {
404 token, err := d.Token()
405 if err != nil {
406 return err
407 }
408 switch token := token.(type) {
409 case xml.StartElement:
410 emitter, err := unmarshalEmitter(d, token)
411 if err != nil {
412 return err
413 }
414 *b = append(*b, emitter)
415
416 case xml.EndElement:
417 return nil
418 }
419 }
420 }
421
422 func (b Emitters) MarshalXML(e *xml.Encoder, start xml.StartElement) error {
423 if err := e.EncodeToken(start); err != nil {
424 return err
425 }
426 for _, m := range b {
427 if err := marshalEmitter(e, m); err != nil {
428 return err
429 }
430 }
431 return e.EncodeToken(xml.EndElement{Name: start.Name})
432 }
433
View as plain text