import functools import importlib import json import os import re import sys import types import html import pystache from pygments import lexer as pygments_lexer from pygments.token import _TokenType TEMPLATE = r''' {{name}} {{#aliases}} {{alias}} {{/aliases}} {{#filenames}} {{filename}} {{/filenames}} {{#mimetypes}} {{mimetype}} {{/mimetypes}} {{#re_ignorecase}} true {{/re_ignorecase}} {{#re_dotall}} true {{/re_dotall}} {{#re_not_multiline}} true {{/re_not_multiline}} {{#tokens}} {{#rules}} {{{.}}} {{/rules}} {{/tokens}} ''' def xml_regex(s): return xml_string(s) def xml_string(s): s = html.escape(s) return '"' + s + '"' def to_camel_case(snake_str): components = snake_str.split('_') return ''.join(x.title() for x in components) def warning(message): print('warning: ' + message, file=sys.stderr) def resolve_emitter(emitter): if isinstance(emitter, types.FunctionType): if repr(emitter).startswith('' % '" state="'.join(rule[2]) else: raise ValueError('unsupported modifier %r' % (rule[2],)) out.append('{}{}'.format(regex, emitter, modifier)) elif isinstance(rule, pygments_lexer.include): out.append(''.format(rule)) elif isinstance(rule, pygments_lexer.default): process_state_action(rule.state) out.append('{}'.format(''.join(process_state_action(rule.state)))) else: raise ValueError('unsupported rule %r' % (rule,)) return out class TemplateView(object): def __init__(self, **kwargs): for key, value in kwargs.items(): setattr(self, key, value) def re_not_multiline(self): return not (self.regex_flags & re.MULTILINE) def re_dotall(self): return self.regex_flags & re.DOTALL def re_ignorecase(self): return self.regex_flags & re.IGNORECASE def main(): package_name, symbol_name = sys.argv[1].rsplit(sep=".", maxsplit=1) package = importlib.import_module(package_name) lexer_cls = getattr(package, symbol_name) assert issubclass(lexer_cls, pygments_lexer.RegexLexer), 'can only translate from RegexLexer' print(pystache.render(TEMPLATE, TemplateView( name=lexer_cls.name, regex_flags=lexer_cls.flags, aliases=[{'alias': alias} for alias in lexer_cls.aliases], filenames=[{'filename': filename} for filename in lexer_cls.filenames], mimetypes=[{'mimetype': mimetype} for mimetype in lexer_cls.mimetypes], tokens=[{'state': state, 'rules': translate_rules(rules)} for (state, rules) in lexer_cls.get_tokendefs().items()], ))) if __name__ == '__main__': main()