...
1import functools
2import importlib
3import json
4import os
5import re
6import sys
7import types
8import html
9
10import pystache
11from pygments import lexer as pygments_lexer
12from pygments.token import _TokenType
13
14TEMPLATE = r'''
15<lexer>
16 <config>
17 <name>{{name}}</name>
18 {{#aliases}}
19 <alias>{{alias}}</alias>
20 {{/aliases}}
21 {{#filenames}}
22 <filename>{{filename}}</filename>
23 {{/filenames}}
24 {{#mimetypes}}
25 <mime_type>{{mimetype}}</mime_type>
26 {{/mimetypes}}
27 {{#re_ignorecase}}
28 <case_insensitive>true</case_insensitive>
29 {{/re_ignorecase}}
30 {{#re_dotall}}
31 <dot_all>true</dot_all>
32 {{/re_dotall}}
33 {{#re_not_multiline}}
34 <not_multiline>true</not_multiline>
35 {{/re_not_multiline}}
36 </config>
37 <rules>
38 {{#tokens}}
39 <state name="{{state}}">
40 {{#rules}}
41 {{{.}}}
42 {{/rules}}
43 </state>
44 {{/tokens}}
45 </rules>
46</lexer>
47'''
48
49
50def xml_regex(s):
51 return xml_string(s)
52
53def xml_string(s):
54 s = html.escape(s)
55 return '"' + s + '"'
56
57
58def to_camel_case(snake_str):
59 components = snake_str.split('_')
60 return ''.join(x.title() for x in components)
61
62
63def warning(message):
64 print('warning: ' + message, file=sys.stderr)
65
66
67def resolve_emitter(emitter):
68 if isinstance(emitter, types.FunctionType):
69 if repr(emitter).startswith('<function bygroups.'):
70 args = emitter.__closure__[0].cell_contents
71 emitter = '<bygroups>%s</bygroups>' % ''.join(resolve_emitter(e) for e in args)
72 elif repr(emitter).startswith('<function using.'):
73 args = emitter.__closure__[0].cell_contents
74 if isinstance(args, dict):
75 state = 'root'
76 if 'stack' in args:
77 state = args['stack'][1]
78 args.pop('stack')
79 assert args == {}, args
80 emitter = '<usingself state="%s"/>' % state
81 elif issubclass(args, pygments_lexer.Lexer):
82 name = args.__name__
83 if name.endswith('Lexer'):
84 name = name[:-5]
85 emitter = '<using lexer="%s"/>' % state
86 else:
87 raise ValueError('only support "using" with lexer classes, not %r' % args)
88 else:
89 warning('unsupported emitter function %r' % emitter)
90 emitter = '?? %r ??' % emitter
91 elif isinstance(emitter, _TokenType):
92 emitter = '<token type="%s"/>' % str(emitter).replace('.', '')[5:]
93 elif emitter is None:
94 return 'None'
95 else:
96 raise ValueError('unsupported emitter type %r' % emitter)
97 assert isinstance(emitter, str)
98 return emitter
99
100
101def process_state_action(action):
102 if isinstance(action, tuple):
103 return functools.reduce(lambda a, b: a + b, (process_state_action(a) for a in action))
104 if action.startswith('#'):
105 action = action[1:]
106 if action== 'pop':
107 action = '<pop depth="1"/>'
108 elif action.startswith('pop:'):
109 action = '<pop depth="%s"/>' % action[4:]
110 elif action == 'push':
111 action = '<push/>'
112 elif action.startswith('push:'):
113 action = '<push state="%s"/>' % action[5:]
114 else:
115 raise ValueError('unsupported action %r' % (action,))
116 else:
117 action = '<push state="%s"/>' % action
118 return (action,)
119
120
121def translate_rules(rules):
122 out = []
123 for rule in rules:
124 if isinstance(rule, tuple):
125 regex = rule[0]
126 if isinstance(regex, str):
127 regex = xml_regex(regex)
128 elif isinstance(regex, pygments_lexer.words):
129 regex = xml_string('%s(%s)%s' % (regex.prefix,
130 '|'.join(w for w in regex.words),
131 regex.suffix))
132 else:
133 raise ValueError('expected regex string but got %r' % regex)
134 emitter = resolve_emitter(rule[1])
135 if len(rule) == 2:
136 modifier = ''
137 elif type(rule[2]) is str:
138 modifier = process_state_action(rule[2])[0]
139 elif isinstance(rule[2], pygments_lexer.combined):
140 modifier = '<combined state="%s"/>' % '" state="'.join(rule[2])
141 elif type(rule[2]) is tuple:
142 modifier = '<push state="%s"/>' % '" state="'.join(rule[2])
143 else:
144 raise ValueError('unsupported modifier %r' % (rule[2],))
145 out.append('<rule pattern={}>{}{}</rule>'.format(regex, emitter, modifier))
146 elif isinstance(rule, pygments_lexer.include):
147 out.append('<rule><include state="{}"/></rule>'.format(rule))
148 elif isinstance(rule, pygments_lexer.default):
149 process_state_action(rule.state)
150 out.append('<rule>{}</rule>'.format(''.join(process_state_action(rule.state))))
151 else:
152 raise ValueError('unsupported rule %r' % (rule,))
153 return out
154
155
156class TemplateView(object):
157 def __init__(self, **kwargs):
158 for key, value in kwargs.items():
159 setattr(self, key, value)
160
161 def re_not_multiline(self):
162 return not (self.regex_flags & re.MULTILINE)
163
164 def re_dotall(self):
165 return self.regex_flags & re.DOTALL
166
167 def re_ignorecase(self):
168 return self.regex_flags & re.IGNORECASE
169
170
171def main():
172 package_name, symbol_name = sys.argv[1].rsplit(sep=".", maxsplit=1)
173
174 package = importlib.import_module(package_name)
175
176 lexer_cls = getattr(package, symbol_name)
177
178 assert issubclass(lexer_cls, pygments_lexer.RegexLexer), 'can only translate from RegexLexer'
179
180 print(pystache.render(TEMPLATE, TemplateView(
181 name=lexer_cls.name,
182 regex_flags=lexer_cls.flags,
183 aliases=[{'alias': alias} for alias in lexer_cls.aliases],
184 filenames=[{'filename': filename} for filename in lexer_cls.filenames],
185 mimetypes=[{'mimetype': mimetype} for mimetype in lexer_cls.mimetypes],
186 tokens=[{'state': state, 'rules': translate_rules(rules)} for (state, rules) in lexer_cls.get_tokendefs().items()],
187 )))
188
189
190if __name__ == '__main__':
191 main()
View as plain text