1import functools
2import importlib
3import json
4import os
5import re
6import sys
7import types
8
9import pystache
10from pygments import lexer as pygments_lexer
11from pygments.token import _TokenType
12
13
14TEMPLATE = r'''
15package {{package}}
16
17import (
18 . "github.com/alecthomas/chroma/v2" // nolint
19 "github.com/alecthomas/chroma/v2/lexers/internal"
20)
21
22// {{upper_name}} lexer.
23var {{upper_name}} = internal.Register(MustNewLazyLexer(
24 &Config{
25 Name: "{{name}}",
26 {{=<% %>=}}
27 Aliases: []string{<%#aliases%>"<%.%>", <%/aliases%>},
28 Filenames: []string{<%#filenames%>"<%.%>", <%/filenames%>},
29 MimeTypes: []string{<%#mimetypes%>"<%.%>", <%/mimetypes%>},
30 <%={{ }}=%>
31{{#re_not_multiline}}
32 NotMultiline: true,
33{{/re_not_multiline}}
34{{#re_dotall}}
35 DotAll: true,
36{{/re_dotall}}
37{{#re_ignorecase}}
38 CaseInsensitive: true,
39{{/re_ignorecase}}
40 },
41 func() Rules {
42 return Rules{
43{{#tokens}}
44 "{{state}}": {
45 {{#rules}}
46 {{{.}}},
47 {{/rules}}
48 },
49{{/tokens}}
50 }
51 },
52))
53'''
54
55
56def go_regex(s):
57 return go_string(s)
58
59
60def go_string(s):
61 if '`' not in s:
62 return '`' + s + '`'
63 return json.dumps(s)
64
65
66def to_camel_case(snake_str):
67 components = snake_str.split('_')
68 return ''.join(x.title() for x in components)
69
70
71def warning(message):
72 print('warning: ' + message, file=sys.stderr)
73
74
75def resolve_emitter(emitter):
76 if isinstance(emitter, types.FunctionType):
77 if repr(emitter).startswith('<function bygroups.'):
78 args = emitter.__closure__[0].cell_contents
79 emitter = 'ByGroups(%s)' % ', '.join(resolve_emitter(e) for e in args)
80 elif repr(emitter).startswith('<function using.'):
81 args = emitter.__closure__[0].cell_contents
82 if isinstance(args, dict):
83 state = 'root'
84 if 'stack' in args:
85 state = args['stack'][1]
86 args.pop('stack')
87 assert args == {}, args
88 emitter = 'UsingSelf("%s")' % state
89 elif issubclass(args, pygments_lexer.Lexer):
90 name = args.__name__
91 if name.endswith('Lexer'):
92 name = name[:-5]
93 emitter = 'Using(%s)' % name
94 else:
95 raise ValueError('only support "using" with lexer classes, not %r' % args)
96 else:
97 warning('unsupported emitter function %r' % emitter)
98 emitter = '?? %r ??' % emitter
99 elif isinstance(emitter, _TokenType):
100 emitter = str(emitter).replace('.', '')[5:]
101 elif emitter is None:
102 # This generally only occurs when a lookahead/behind assertion is used, so we just allow it
103 # through.
104 return 'None'
105 else:
106 raise ValueError('unsupported emitter type %r' % emitter)
107 assert isinstance(emitter, str)
108 return emitter
109
110
111def process_state_action(action):
112 if isinstance(action, tuple):
113 return functools.reduce(lambda a, b: a + b, (process_state_action(a) for a in action))
114 if action.startswith('#'):
115 action = action[1:]
116 if action== 'pop':
117 action = 'Pop(1)'
118 elif action.startswith('pop:'):
119 action = 'Pop(%s)' % action[4:]
120 elif action == 'push':
121 action = 'Push()'
122 elif action.startswith('push:'):
123 action = 'Push("%s")' % action[5:]
124 else:
125 raise ValueError('unsupported action %r' % (action,))
126 else:
127 action = 'Push("%s")' % action
128 return (action,)
129
130
131def translate_rules(rules):
132 out = []
133 for rule in rules:
134 if isinstance(rule, tuple):
135 regex = rule[0]
136 if isinstance(regex, str):
137 regex = go_regex(regex)
138 elif isinstance(regex, pygments_lexer.words):
139 regex = 'Words(%s, %s, %s)' % (go_string(regex.prefix),
140 go_string(regex.suffix),
141 ', '.join(go_string(w) for w in regex.words))
142 else:
143 raise ValueError('expected regex string but got %r' % regex)
144 emitter = resolve_emitter(rule[1])
145 if len(rule) == 2:
146 modifier = 'nil'
147 elif type(rule[2]) is str:
148 modifier = process_state_action(rule[2])[0]
149 elif isinstance(rule[2], pygments_lexer.combined):
150 modifier = 'Combined("%s")' % '", "'.join(rule[2])
151 elif type(rule[2]) is tuple:
152 modifier = 'Push("%s")' % '", "'.join(rule[2])
153 else:
154 raise ValueError('unsupported modifier %r' % (rule[2],))
155 out.append('{{{}, {}, {}}}'.format(regex, emitter, modifier))
156 elif isinstance(rule, pygments_lexer.include):
157 out.append('Include("{}")'.format(rule))
158 elif isinstance(rule, pygments_lexer.default):
159 out.append('Default({})'.format(', '.join(process_state_action(rule.state))))
160 else:
161 raise ValueError('unsupported rule %r' % (rule,))
162 return out
163
164
165class TemplateView(object):
166 def __init__(self, **kwargs):
167 for key, value in kwargs.items():
168 setattr(self, key, value)
169
170 def re_not_multiline(self):
171 return not (self.regex_flags & re.MULTILINE)
172
173 def re_dotall(self):
174 return self.regex_flags & re.DOTALL
175
176 def re_ignorecase(self):
177 return self.regex_flags & re.IGNORECASE
178
179
180def main():
181 package_name, symbol_name = sys.argv[1].rsplit(sep=".", maxsplit=1)
182
183 package = importlib.import_module(package_name)
184
185 lexer_cls = getattr(package, symbol_name)
186
187 assert issubclass(lexer_cls, pygments_lexer.RegexLexer), 'can only translate from RegexLexer'
188
189 print(pystache.render(TEMPLATE, TemplateView(
190 package=lexer_cls.name.lower()[0],
191 name=lexer_cls.name,
192 regex_flags=lexer_cls.flags,
193 upper_name=to_camel_case(re.sub(r'\W', '_', lexer_cls.name)),
194 aliases=lexer_cls.aliases,
195 filenames=lexer_cls.filenames,
196 mimetypes=lexer_cls.mimetypes,
197 tokens=[{'state': state, 'rules': translate_rules(rules)} for (state, rules) in lexer_cls.get_tokendefs().items()],
198 )))
199
200
201if __name__ == '__main__':
202 main()
View as plain text