pygments2chroma.py

Documentation: github.com/alecthomas/chroma/v2/_tools

     1import functools
     2import importlib
     3import json
     4import os
     5import re
     6import sys
     7import types
     8
     9import pystache
    10from pygments import lexer as pygments_lexer
    11from pygments.token import _TokenType
    12
    13
    14TEMPLATE = r'''
    15package {{package}}
    16
    17import (
    18	. "github.com/alecthomas/chroma/v2" // nolint
    19	"github.com/alecthomas/chroma/v2/lexers/internal"
    20)
    21
    22// {{upper_name}} lexer.
    23var {{upper_name}} = internal.Register(MustNewLazyLexer(
    24	&Config{
    25		Name:      "{{name}}",
    26		{{=<% %>=}}
    27		Aliases:   []string{<%#aliases%>"<%.%>", <%/aliases%>},
    28		Filenames: []string{<%#filenames%>"<%.%>", <%/filenames%>},
    29		MimeTypes: []string{<%#mimetypes%>"<%.%>", <%/mimetypes%>},
    30		<%={{ }}=%>
    31{{#re_not_multiline}}
    32		NotMultiline: true,
    33{{/re_not_multiline}}
    34{{#re_dotall}}
    35		DotAll: true,
    36{{/re_dotall}}
    37{{#re_ignorecase}}
    38		CaseInsensitive: true,
    39{{/re_ignorecase}}
    40	},
    41	func() Rules {
    42		return Rules{
    43{{#tokens}}
    44			"{{state}}": {
    45				{{#rules}}
    46				{{{.}}},
    47				{{/rules}}
    48			},
    49{{/tokens}}
    50		}
    51	},
    52))
    53'''
    54
    55
    56def go_regex(s):
    57    return go_string(s)
    58
    59
    60def go_string(s):
    61    if '`' not in s:
    62        return '`' + s + '`'
    63    return json.dumps(s)
    64
    65
    66def to_camel_case(snake_str):
    67    components = snake_str.split('_')
    68    return ''.join(x.title() for x in components)
    69
    70
    71def warning(message):
    72    print('warning: ' + message, file=sys.stderr)
    73
    74
    75def resolve_emitter(emitter):
    76    if isinstance(emitter, types.FunctionType):
    77        if repr(emitter).startswith('<function bygroups.'):
    78            args = emitter.__closure__[0].cell_contents
    79            emitter = 'ByGroups(%s)' % ', '.join(resolve_emitter(e) for e in args)
    80        elif repr(emitter).startswith('<function using.'):
    81            args = emitter.__closure__[0].cell_contents
    82            if isinstance(args, dict):
    83                state = 'root'
    84                if 'stack' in args:
    85                    state = args['stack'][1]
    86                    args.pop('stack')
    87                assert args == {}, args
    88                emitter = 'UsingSelf("%s")' % state
    89            elif issubclass(args, pygments_lexer.Lexer):
    90                name = args.__name__
    91                if name.endswith('Lexer'):
    92                    name = name[:-5]
    93                emitter = 'Using(%s)' % name
    94            else:
    95                raise ValueError('only support "using" with lexer classes, not %r' % args)
    96        else:
    97            warning('unsupported emitter function %r' % emitter)
    98            emitter = '?? %r ??' % emitter
    99    elif isinstance(emitter, _TokenType):
   100        emitter = str(emitter).replace('.', '')[5:]
   101    elif emitter is None:
   102        # This generally only occurs when a lookahead/behind assertion is used, so we just allow it
   103        # through.
   104        return 'None'
   105    else:
   106        raise ValueError('unsupported emitter type %r' % emitter)
   107    assert isinstance(emitter, str)
   108    return emitter
   109
   110
   111def process_state_action(action):
   112    if isinstance(action, tuple):
   113        return functools.reduce(lambda a, b: a + b, (process_state_action(a) for a in action))
   114    if action.startswith('#'):
   115        action = action[1:]
   116        if action== 'pop':
   117            action = 'Pop(1)'
   118        elif action.startswith('pop:'):
   119            action = 'Pop(%s)' % action[4:]
   120        elif action == 'push':
   121            action = 'Push()'
   122        elif action.startswith('push:'):
   123            action = 'Push("%s")' % action[5:]
   124        else:
   125            raise ValueError('unsupported action %r' % (action,))
   126    else:
   127        action = 'Push("%s")' % action
   128    return (action,)
   129
   130
   131def translate_rules(rules):
   132    out = []
   133    for rule in rules:
   134        if isinstance(rule, tuple):
   135            regex = rule[0]
   136            if isinstance(regex, str):
   137                regex = go_regex(regex)
   138            elif isinstance(regex, pygments_lexer.words):
   139                regex = 'Words(%s, %s, %s)' % (go_string(regex.prefix),
   140                                               go_string(regex.suffix),
   141                                               ', '.join(go_string(w) for w in regex.words))
   142            else:
   143                raise ValueError('expected regex string but got %r' % regex)
   144            emitter = resolve_emitter(rule[1])
   145            if len(rule) == 2:
   146                modifier = 'nil'
   147            elif type(rule[2]) is str:
   148                modifier = process_state_action(rule[2])[0]
   149            elif isinstance(rule[2], pygments_lexer.combined):
   150                modifier = 'Combined("%s")' % '", "'.join(rule[2])
   151            elif type(rule[2]) is tuple:
   152                modifier = 'Push("%s")' % '", "'.join(rule[2])
   153            else:
   154                raise ValueError('unsupported modifier %r' % (rule[2],))
   155            out.append('{{{}, {}, {}}}'.format(regex, emitter, modifier))
   156        elif isinstance(rule, pygments_lexer.include):
   157            out.append('Include("{}")'.format(rule))
   158        elif isinstance(rule, pygments_lexer.default):
   159            out.append('Default({})'.format(', '.join(process_state_action(rule.state))))
   160        else:
   161            raise ValueError('unsupported rule %r' % (rule,))
   162    return out
   163
   164
   165class TemplateView(object):
   166    def __init__(self, **kwargs):
   167        for key, value in kwargs.items():
   168            setattr(self, key, value)
   169
   170    def re_not_multiline(self):
   171        return not (self.regex_flags & re.MULTILINE)
   172
   173    def re_dotall(self):
   174        return self.regex_flags & re.DOTALL
   175
   176    def re_ignorecase(self):
   177        return self.regex_flags & re.IGNORECASE
   178
   179
   180def main():
   181    package_name, symbol_name = sys.argv[1].rsplit(sep=".", maxsplit=1)
   182
   183    package = importlib.import_module(package_name)
   184
   185    lexer_cls = getattr(package, symbol_name)
   186
   187    assert issubclass(lexer_cls, pygments_lexer.RegexLexer), 'can only translate from RegexLexer'
   188
   189    print(pystache.render(TEMPLATE, TemplateView(
   190        package=lexer_cls.name.lower()[0],
   191        name=lexer_cls.name,
   192        regex_flags=lexer_cls.flags,
   193        upper_name=to_camel_case(re.sub(r'\W', '_', lexer_cls.name)),
   194        aliases=lexer_cls.aliases,
   195        filenames=lexer_cls.filenames,
   196        mimetypes=lexer_cls.mimetypes,
   197        tokens=[{'state': state, 'rules': translate_rules(rules)} for (state, rules) in lexer_cls.get_tokendefs().items()],
   198    )))
   199
   200
   201if __name__ == '__main__':
   202    main()
View as plain text