1 package lexers
2
3 import (
4 "regexp"
5 "strings"
6 "unicode/utf8"
7
8 "github.com/dlclark/regexp2"
9
10 . "github.com/alecthomas/chroma/v2"
11 )
12
13
14 var Raku Lexer = Register(MustNewLexer(
15 &Config{
16 Name: "Raku",
17 Aliases: []string{"perl6", "pl6", "raku"},
18 Filenames: []string{
19 "*.pl", "*.pm", "*.nqp", "*.p6", "*.6pl", "*.p6l", "*.pl6", "*.6pm",
20 "*.p6m", "*.pm6", "*.t", "*.raku", "*.rakumod", "*.rakutest", "*.rakudoc",
21 },
22 MimeTypes: []string{
23 "text/x-perl6", "application/x-perl6",
24 "text/x-raku", "application/x-raku",
25 },
26 DotAll: true,
27 },
28 rakuRules,
29 ))
30
31 func rakuRules() Rules {
32 type RakuToken int
33
34 const (
35 rakuQuote RakuToken = iota
36 rakuNameAttribute
37 rakuPod
38 rakuPodFormatter
39 rakuPodDeclaration
40 rakuMultilineComment
41 rakuMatchRegex
42 rakuSubstitutionRegex
43 )
44
45 const (
46 colonPairOpeningBrackets = `(?:<<|<|«|\(|\[|\{)`
47 colonPairClosingBrackets = `(?:>>|>|»|\)|\]|\})`
48 colonPairPattern = `(?<!:)(?<colon>:)(?<key>\w[\w'-]*)(?<opening_delimiters>` + colonPairOpeningBrackets + `)`
49 colonPairLookahead = `(?=(:['\w-]+` +
50 colonPairOpeningBrackets + `.+?` + colonPairClosingBrackets + `)?`
51 namePattern = `(?:(?!` + colonPairPattern + `)(?:::|[\w':-]))+`
52 variablePattern = `[$@%&]+[.^:?=!~]?` + namePattern
53 globalVariablePattern = `[$@%&]+\*` + namePattern
54 )
55
56 keywords := []string{
57 `BEGIN`, `CATCH`, `CHECK`, `CLOSE`, `CONTROL`, `DOC`, `END`, `ENTER`, `FIRST`, `INIT`,
58 `KEEP`, `LAST`, `LEAVE`, `NEXT`, `POST`, `PRE`, `QUIT`, `UNDO`, `anon`, `augment`, `but`,
59 `class`, `constant`, `default`, `does`, `else`, `elsif`, `enum`, `for`, `gather`, `given`,
60 `grammar`, `has`, `if`, `import`, `is`, `of`, `let`, `loop`, `made`, `make`, `method`,
61 `module`, `multi`, `my`, `need`, `orwith`, `our`, `proceed`, `proto`, `repeat`, `require`,
62 `where`, `return`, `return-rw`, `returns`, `->`, `-->`, `role`, `state`, `sub`, `no`,
63 `submethod`, `subset`, `succeed`, `supersede`, `try`, `unit`, `unless`, `until`,
64 `use`, `when`, `while`, `with`, `without`, `export`, `native`, `repr`, `required`, `rw`,
65 `symbol`, `default`, `cached`, `DEPRECATED`, `dynamic`, `hidden-from-backtrace`, `nodal`,
66 `pure`, `raw`, `start`, `react`, `supply`, `whenever`, `also`, `rule`, `token`, `regex`,
67 `dynamic-scope`, `built`, `temp`,
68 }
69
70 keywordsPattern := Words(`(?<!['\w:-])`, `(?!['\w:-])`, keywords...)
71
72 wordOperators := []string{
73 `X`, `Z`, `R`, `after`, `and`, `andthen`, `before`, `cmp`, `div`, `eq`, `eqv`, `extra`, `ge`,
74 `gt`, `le`, `leg`, `lt`, `mod`, `ne`, `or`, `orelse`, `x`, `xor`, `xx`, `gcd`, `lcm`,
75 `but`, `min`, `max`, `^fff`, `fff^`, `fff`, `^ff`, `ff^`, `ff`, `so`, `not`, `unicmp`,
76 `TR`, `o`, `(&)`, `(.)`, `(|)`, `(+)`, `(-)`, `(^)`, `coll`, `(elem)`, `(==)`,
77 `(cont)`, `(<)`, `(<=)`, `(>)`, `(>=)`, `minmax`, `notandthen`, `S`,
78 }
79
80 wordOperatorsPattern := Words(`(?<=^|\b|\s)`, `(?=$|\b|\s)`, wordOperators...)
81
82 operators := []string{
83 `++`, `--`, `-`, `**`, `!`, `+`, `~`, `?`, `+^`, `~^`, `?^`, `^`, `*`, `/`, `%`, `%%`, `+&`,
84 `+<`, `+>`, `~&`, `~<`, `~>`, `?&`, `+|`, `+^`, `~|`, `~^`, `?`, `?|`, `?^`, `&`, `^`,
85 `<=>`, `^…^`, `^…`, `…^`, `…`, `...`, `...^`, `^...`, `^...^`, `..`, `..^`, `^..`, `^..^`,
86 `::=`, `:=`, `!=`, `==`, `<=`, `<`, `>=`, `>`, `~~`, `===`, `&&`, `||`, `|`, `^^`, `//`,
87 `??`, `!!`, `^fff^`, `^ff^`, `<==`, `==>`, `<<==`, `==>>`, `=>`, `=`, `<<`, `«`, `>>`, `»`,
88 `,`, `>>.`, `».`, `.&`, `.=`, `.^`, `.?`, `.+`, `.*`, `.`, `∘`, `∩`, `⊍`, `∪`, `⊎`, `∖`,
89 `⊖`, `≠`, `≤`, `≥`, `=:=`, `=~=`, `≅`, `∈`, `∉`, `≡`, `≢`, `∋`, `∌`, `⊂`, `⊄`, `⊆`, `⊈`,
90 `⊃`, `⊅`, `⊇`, `⊉`, `:`, `!!!`, `???`, `¯`, `×`, `÷`, `−`, `⁺`, `⁻`,
91 }
92
93 operatorsPattern := Words(``, ``, operators...)
94
95 builtinTypes := []string{
96 `False`, `True`, `Order`, `More`, `Less`, `Same`, `Any`, `Array`, `Associative`, `AST`,
97 `atomicint`, `Attribute`, `Backtrace`, `Backtrace::Frame`, `Bag`, `Baggy`, `BagHash`,
98 `Blob`, `Block`, `Bool`, `Buf`, `Callable`, `CallFrame`, `Cancellation`, `Capture`,
99 `CArray`, `Channel`, `Code`, `compiler`, `Complex`, `ComplexStr`, `CompUnit`,
100 `CompUnit::PrecompilationRepository`, `CompUnit::Repository`, `Empty`,
101 `CompUnit::Repository::FileSystem`, `CompUnit::Repository::Installation`, `Cool`,
102 `CurrentThreadScheduler`, `CX::Warn`, `CX::Take`, `CX::Succeed`, `CX::Return`, `CX::Redo`,
103 `CX::Proceed`, `CX::Next`, `CX::Last`, `CX::Emit`, `CX::Done`, `Cursor`, `Date`, `Dateish`,
104 `DateTime`, `Distribution`, `Distribution::Hash`, `Distribution::Locally`,
105 `Distribution::Path`, `Distribution::Resource`, `Distro`, `Duration`, `Encoding`,
106 `Encoding::GlobalLexerRegistry`, `Endian`, `Enumeration`, `Exception`, `Failure`, `FatRat`, `Grammar`,
107 `Hash`, `HyperWhatever`, `Instant`, `Int`, `int`, `int16`, `int32`, `int64`, `int8`, `str`,
108 `IntStr`, `IO`, `IO::ArgFiles`, `IO::CatHandle`, `IO::Handle`, `IO::Notification`,
109 `IO::Notification::Change`, `IO::Path`, `IO::Path::Cygwin`, `IO::Path::Parts`,
110 `IO::Path::QNX`, `IO::Path::Unix`, `IO::Path::Win32`, `IO::Pipe`, `IO::Socket`,
111 `IO::Socket::Async`, `IO::Socket::Async::ListenSocket`, `IO::Socket::INET`, `IO::Spec`,
112 `IO::Spec::Cygwin`, `IO::Spec::QNX`, `IO::Spec::Unix`, `IO::Spec::Win32`, `IO::Special`,
113 `Iterable`, `Iterator`, `Junction`, `Kernel`, `Label`, `List`, `Lock`, `Lock::Async`,
114 `Lock::ConditionVariable`, `long`, `longlong`, `Macro`, `Map`, `Match`,
115 `Metamodel::AttributeContainer`, `Metamodel::C3MRO`, `Metamodel::ClassHOW`,
116 `Metamodel::ConcreteRoleHOW`, `Metamodel::CurriedRoleHOW`, `Metamodel::DefiniteHOW`,
117 `Metamodel::Documenting`, `Metamodel::EnumHOW`, `Metamodel::Finalization`,
118 `Metamodel::MethodContainer`, `Metamodel::Mixins`, `Metamodel::MROBasedMethodDispatch`,
119 `Metamodel::MultipleInheritance`, `Metamodel::Naming`, `Metamodel::Primitives`,
120 `Metamodel::PrivateMethodContainer`, `Metamodel::RoleContainer`, `Metamodel::RolePunning`,
121 `Metamodel::Stashing`, `Metamodel::Trusting`, `Metamodel::Versioning`, `Method`, `Mix`,
122 `MixHash`, `Mixy`, `Mu`, `NFC`, `NFD`, `NFKC`, `NFKD`, `Nil`, `Num`, `num32`, `num64`,
123 `Numeric`, `NumStr`, `ObjAt`, `Order`, `Pair`, `Parameter`, `Perl`, `Pod::Block`,
124 `Pod::Block::Code`, `Pod::Block::Comment`, `Pod::Block::Declarator`, `Pod::Block::Named`,
125 `Pod::Block::Para`, `Pod::Block::Table`, `Pod::Heading`, `Pod::Item`, `Pointer`,
126 `Positional`, `PositionalBindFailover`, `Proc`, `Proc::Async`, `Promise`, `Proxy`,
127 `PseudoStash`, `QuantHash`, `RaceSeq`, `Raku`, `Range`, `Rat`, `Rational`, `RatStr`,
128 `Real`, `Regex`, `Routine`, `Routine::WrapHandle`, `Scalar`, `Scheduler`, `Semaphore`,
129 `Seq`, `Sequence`, `Set`, `SetHash`, `Setty`, `Signature`, `size_t`, `Slip`, `Stash`,
130 `Str`, `StrDistance`, `Stringy`, `Sub`, `Submethod`, `Supplier`, `Supplier::Preserving`,
131 `Supply`, `Systemic`, `Tap`, `Telemetry`, `Telemetry::Instrument::Thread`,
132 `Telemetry::Instrument::ThreadPool`, `Telemetry::Instrument::Usage`, `Telemetry::Period`,
133 `Telemetry::Sampler`, `Thread`, `Test`, `ThreadPoolScheduler`, `UInt`, `uint16`, `uint32`,
134 `uint64`, `uint8`, `Uni`, `utf8`, `ValueObjAt`, `Variable`, `Version`, `VM`, `Whatever`,
135 `WhateverCode`, `WrapHandle`, `NativeCall`,
136
137 `precompilation`, `experimental`, `worries`, `MONKEY-TYPING`, `MONKEY-SEE-NO-EVAL`,
138 `MONKEY-GUTS`, `fatal`, `lib`, `isms`, `newline`, `nqp`, `soft`,
139 `strict`, `trace`, `variables`,
140 }
141
142 builtinTypesPattern := Words(`(?<!['\w:-])`, `(?::[_UD])?(?!['\w:-])`, builtinTypes...)
143
144 builtinRoutines := []string{
145 `ACCEPTS`, `abs`, `abs2rel`, `absolute`, `accept`, `accepts_type`, `accessed`, `acos`,
146 `acosec`, `acosech`, `acosh`, `acotan`, `acotanh`, `acquire`, `act`, `action`, `actions`,
147 `add`, `add_attribute`, `add_enum_value`, `add_fallback`, `add_method`, `add_parent`,
148 `add_private_method`, `add_role`, `add_stash`, `add_trustee`, `addendum`, `adverb`, `after`,
149 `all`, `allocate`, `allof`, `allowed`, `alternative-names`, `annotations`, `antipair`,
150 `antipairs`, `any`, `anyof`, `api`, `app_lifetime`, `append`, `arch`, `archetypes`,
151 `archname`, `args`, `ARGS-TO-CAPTURE`, `arity`, `Array`, `asec`, `asech`, `asin`, `asinh`,
152 `ASSIGN-KEY`, `ASSIGN-POS`, `assuming`, `ast`, `at`, `atan`, `atan2`, `atanh`, `AT-KEY`,
153 `atomic-assign`, `atomic-dec-fetch`, `atomic-fetch`, `atomic-fetch-add`, `atomic-fetch-dec`,
154 `atomic-fetch-inc`, `atomic-fetch-sub`, `atomic-inc-fetch`, `AT-POS`, `attributes`, `auth`,
155 `await`, `backend`, `backtrace`, `Bag`, `bag`, `Baggy`, `BagHash`, `bail-out`, `base`,
156 `basename`, `base-repeating`, `base_type`, `batch`, `BIND-KEY`, `BIND-POS`, `bind-stderr`,
157 `bind-stdin`, `bind-stdout`, `bind-udp`, `bits`, `bless`, `block`, `Bool`, `bool-only`,
158 `bounds`, `break`, `Bridge`, `broken`, `BUILD`, `TWEAK`, `build-date`, `bytes`, `cache`,
159 `callframe`, `calling-package`, `CALL-ME`, `callsame`, `callwith`, `can`, `cancel`,
160 `candidates`, `cando`, `can-ok`, `canonpath`, `caps`, `caption`, `Capture`, `capture`,
161 `cas`, `catdir`, `categorize`, `categorize-list`, `catfile`, `catpath`, `cause`, `ceiling`,
162 `cglobal`, `changed`, `Channel`, `channel`, `chars`, `chdir`, `child`, `child-name`,
163 `child-typename`, `chmod`, `chomp`, `chop`, `chr`, `chrs`, `chunks`, `cis`, `classify`,
164 `classify-list`, `cleanup`, `clone`, `close`, `closed`, `close-stdin`, `cmp-ok`, `code`,
165 `codename`, `codes`, `coerce_type`, `coll`, `collate`, `column`, `comb`, `combinations`,
166 `command`, `comment`, `compiler`, `Complex`, `compose`, `composalizer`, `compose_type`,
167 `compose_values`, `composer`, `compute_mro`, `condition`, `config`, `configure_destroy`,
168 `configure_type_checking`, `conj`, `connect`, `constraints`, `construct`, `contains`,
169 `content`, `contents`, `copy`, `cos`, `cosec`, `cosech`, `cosh`, `cotan`, `cotanh`, `count`,
170 `count-only`, `cpu-cores`, `cpu-usage`, `CREATE`, `create_type`, `cross`, `cue`, `curdir`,
171 `curupdir`, `d`, `Date`, `DateTime`, `day`, `daycount`, `day-of-month`, `day-of-week`,
172 `day-of-year`, `days-in-month`, `dd-mm-yyyy`, `declaration`, `decode`, `decoder`, `deepmap`,
173 `default`, `defined`, `DEFINITE`, `definite`, `delayed`, `delete`, `delete-by-compiler`,
174 `DELETE-KEY`, `DELETE-POS`, `denominator`, `desc`, `DESTROY`, `destroyers`, `devnull`,
175 `diag`, `did-you-mean`, `die`, `dies-ok`, `dir`, `dirname`, `distribution`, `dir-sep`,
176 `DISTROnames`, `do`, `does`, `does-ok`, `done`, `done-testing`, `duckmap`, `dynamic`, `e`,
177 `eager`, `earlier`, `elems`, `emit`, `enclosing`, `encode`, `encoder`, `encoding`, `end`,
178 `endian`, `ends-with`, `enum_from_value`, `enum_value_list`, `enum_values`, `enums`, `EOF`,
179 `eof`, `EVAL`, `eval-dies-ok`, `EVALFILE`, `eval-lives-ok`, `event`, `exception`,
180 `excludes-max`, `excludes-min`, `EXISTS-KEY`, `EXISTS-POS`, `exit`, `exitcode`, `exp`,
181 `expected`, `explicitly-manage`, `expmod`, `export_callback`, `extension`, `f`, `fail`,
182 `FALLBACK`, `fails-like`, `fc`, `feature`, `file`, `filename`, `files`, `find`,
183 `find_method`, `find_method_qualified`, `finish`, `first`, `flat`, `first-date-in-month`,
184 `flatmap`, `flip`, `floor`, `flunk`, `flush`, `flush_cache`, `fmt`, `format`, `formatter`,
185 `free-memory`, `freeze`, `from`, `from-list`, `from-loop`, `from-posix`, `from-slurpy`,
186 `full`, `full-barrier`, `GENERATE-USAGE`, `generate_mixin`, `get`, `get_value`, `getc`,
187 `gist`, `got`, `grab`, `grabpairs`, `grep`, `handle`, `handled`, `handles`, `hardware`,
188 `has_accessor`, `Hash`, `hash`, `head`, `headers`, `hh-mm-ss`, `hidden`, `hides`, `hostname`,
189 `hour`, `how`, `hyper`, `id`, `illegal`, `im`, `in`, `in-timezone`, `indent`, `index`,
190 `indices`, `indir`, `infinite`, `infix`, `postcirumfix`, `cicumfix`, `install`,
191 `install_method_cache`, `Instant`, `instead`, `Int`, `int-bounds`, `interval`, `in-timezone`,
192 `invalid-str`, `invert`, `invocant`, `IO`, `IO::Notification.watch-path`, `is_trusted`,
193 `is_type`, `isa`, `is-absolute`, `isa-ok`, `is-approx`, `is-deeply`, `is-hidden`,
194 `is-initial-thread`, `is-int`, `is-lazy`, `is-leap-year`, `isNaN`, `isnt`, `is-prime`,
195 `is-relative`, `is-routine`, `is-setting`, `is-win`, `item`, `iterator`, `join`, `keep`,
196 `kept`, `KERNELnames`, `key`, `keyof`, `keys`, `kill`, `kv`, `kxxv`, `l`, `lang`, `last`,
197 `lastcall`, `later`, `lazy`, `lc`, `leading`, `level`, `like`, `line`, `lines`, `link`,
198 `List`, `list`, `listen`, `live`, `lives-ok`, `load`, `load-repo-id`, `load-unit`, `loaded`,
199 `loads`, `local`, `lock`, `log`, `log10`, `lookup`, `lsb`, `made`, `MAIN`, `make`, `Map`,
200 `map`, `match`, `max`, `maxpairs`, `merge`, `message`, `method`, `meta`, `method_table`,
201 `methods`, `migrate`, `min`, `minmax`, `minpairs`, `minute`, `misplaced`, `Mix`, `mix`,
202 `MixHash`, `mixin`, `mixin_attribute`, `Mixy`, `mkdir`, `mode`, `modified`, `month`, `move`,
203 `mro`, `msb`, `multi`, `multiness`, `name`, `named`, `named_names`, `narrow`,
204 `nativecast`, `native-descriptor`, `nativesizeof`, `need`, `new`, `new_type`,
205 `new-from-daycount`, `new-from-pairs`, `next`, `nextcallee`, `next-handle`, `nextsame`,
206 `nextwith`, `next-interesting-index`, `NFC`, `NFD`, `NFKC`, `NFKD`, `nice`, `nl-in`,
207 `nl-out`, `nodemap`, `nok`, `normalize`, `none`, `norm`, `not`, `note`, `now`, `nude`,
208 `Num`, `numerator`, `Numeric`, `of`, `offset`, `offset-in-hours`, `offset-in-minutes`,
209 `ok`, `old`, `on-close`, `one`, `on-switch`, `open`, `opened`, `operation`, `optional`,
210 `ord`, `ords`, `orig`, `os-error`, `osname`, `out-buffer`, `pack`, `package`, `package-kind`,
211 `package-name`, `packages`, `Pair`, `pair`, `pairs`, `pairup`, `parameter`, `params`,
212 `parent`, `parent-name`, `parents`, `parse`, `parse-base`, `parsefile`, `parse-names`,
213 `parts`, `pass`, `path`, `path-sep`, `payload`, `peer-host`, `peer-port`, `periods`, `perl`,
214 `permutations`, `phaser`, `pick`, `pickpairs`, `pid`, `placeholder`, `plan`, `plus`,
215 `polar`, `poll`, `polymod`, `pop`, `pos`, `positional`, `posix`, `postfix`, `postmatch`,
216 `precomp-ext`, `precomp-target`, `precompiled`, `pred`, `prefix`, `prematch`, `prepend`,
217 `primary`, `print`, `printf`, `print-nl`, `print-to`, `private`, `private_method_names`,
218 `private_method_table`, `proc`, `produce`, `Promise`, `promise`, `prompt`, `protect`,
219 `protect-or-queue-on-recursion`, `publish_method_cache`, `pull-one`, `push`, `push-all`,
220 `push-at-least`, `push-exactly`, `push-until-lazy`, `put`, `qualifier-type`, `quaternary`,
221 `quit`, `r`, `race`, `radix`, `raku`, `rand`, `Range`, `range`, `Rat`, `raw`, `re`, `read`,
222 `read-bits`, `read-int128`, `read-int16`, `read-int32`, `read-int64`, `read-int8`,
223 `read-num32`, `read-num64`, `read-ubits`, `read-uint128`, `read-uint16`, `read-uint32`,
224 `read-uint64`, `read-uint8`, `readchars`, `readonly`, `ready`, `Real`, `reallocate`,
225 `reals`, `reason`, `rebless`, `receive`, `recv`, `redispatcher`, `redo`, `reduce`,
226 `rel2abs`, `relative`, `release`, `remove`, `rename`, `repeated`, `replacement`,
227 `replace-with`, `repo`, `repo-id`, `report`, `required`, `reserved`, `resolve`, `restore`,
228 `result`, `resume`, `rethrow`, `return`, `return-rw`, `returns`, `reverse`, `right`,
229 `rindex`, `rmdir`, `role`, `roles_to_compose`, `rolish`, `roll`, `rootdir`, `roots`,
230 `rotate`, `rotor`, `round`, `roundrobin`, `routine-type`, `run`, `RUN-MAIN`, `rw`, `rwx`,
231 `samecase`, `samemark`, `samewith`, `say`, `schedule-on`, `scheduler`, `scope`, `sec`,
232 `sech`, `second`, `secondary`, `seek`, `self`, `send`, `Seq`, `Set`, `set`, `serial`,
233 `set_hidden`, `set_name`, `set_package`, `set_rw`, `set_value`, `set_api`, `set_auth`,
234 `set_composalizer`, `set_export_callback`, `set_is_mixin`, `set_mixin_attribute`,
235 `set_package`, `set_ver`, `set_why`, `SetHash`, `Setty`, `set-instruments`,
236 `setup_finalization`, `setup_mixin_cache`, `shape`, `share`, `shell`, `short-id`,
237 `short-name`, `shortname`, `shift`, `sibling`, `sigil`, `sign`, `signal`, `signals`,
238 `signature`, `sin`, `sinh`, `sink`, `sink-all`, `skip`, `skip-at-least`,
239 `skip-at-least-pull-one`, `skip-one`, `skip-rest`, `sleep`, `sleep-timer`, `sleep-until`,
240 `Slip`, `slip`, `slurp`, `slurp-rest`, `slurpy`, `snap`, `snapper`, `so`, `socket-host`,
241 `socket-port`, `sort`, `source`, `source-package`, `spawn`, `SPEC`, `splice`, `split`,
242 `splitdir`, `splitpath`, `sprintf`, `spurt`, `sqrt`, `squish`, `srand`, `stable`, `start`,
243 `started`, `starts-with`, `status`, `stderr`, `stdout`, `STORE`, `store-file`,
244 `store-repo-id`, `store-unit`, `Str`, `Stringy`, `sub_signature`, `subbuf`, `subbuf-rw`,
245 `subname`, `subparse`, `subst`, `subst-mutate`, `substr`, `substr-eq`, `substr-rw`,
246 `subtest`, `succ`, `sum`, `suffix`, `summary`, `Supply`, `symlink`, `T`, `t`, `tail`,
247 `take`, `take-rw`, `tan`, `tanh`, `tap`, `target`, `target-name`, `tc`, `tclc`, `tell`,
248 `term`, `tertiary`, `then`, `throttle`, `throw`, `throws-like`, `time`, `timezone`,
249 `tmpdir`, `to`, `today`, `todo`, `toggle`, `to-posix`, `total`, `total-memory`, `trailing`,
250 `trans`, `tree`, `trim`, `trim-leading`, `trim-trailing`, `truncate`, `truncated-to`,
251 `trusts`, `try_acquire`, `trying`, `twigil`, `type`, `type_captures`, `type_check`,
252 `typename`, `uc`, `udp`, `uncaught_handler`, `undefine`, `unimatch`, `unicmp`, `uniname`,
253 `uninames`, `uninstall`, `uniparse`, `uniprop`, `uniprops`, `unique`, `unival`, `univals`,
254 `unlike`, `unlink`, `unlock`, `unpack`, `unpolar`, `unset`, `unshift`, `unwrap`, `updir`,
255 `USAGE`, `usage-name`, `use-ok`, `utc`, `val`, `value`, `values`, `VAR`, `variable`, `ver`,
256 `verbose-config`, `Version`, `version`, `VMnames`, `volume`, `vow`, `w`, `wait`, `warn`,
257 `watch`, `watch-path`, `week`, `weekday-of-month`, `week-number`, `week-year`, `WHAT`,
258 `what`, `when`, `WHERE`, `WHEREFORE`, `WHICH`, `WHO`, `whole-second`, `WHY`, `why`,
259 `with-lock-hidden-from-recursion-check`, `wordcase`, `words`, `workaround`, `wrap`,
260 `write`, `write-bits`, `write-int128`, `write-int16`, `write-int32`, `write-int64`,
261 `write-int8`, `write-num32`, `write-num64`, `write-ubits`, `write-uint128`, `write-uint16`,
262 `write-uint32`, `write-uint64`, `write-uint8`, `write-to`, `x`, `yada`, `year`, `yield`,
263 `yyyy-mm-dd`, `z`, `zip`, `zip-latest`, `HOW`, `s`, `DEPRECATED`, `trait_mod`,
264 }
265
266 builtinRoutinesPattern := Words(`(?<!['\w:-])`, `(?!['\w-])`, builtinRoutines...)
267
268
269 brackets := map[rune]rune{
270 '\u0028': '\u0029', '\u003c': '\u003e', '\u005b': '\u005d',
271 '\u007b': '\u007d', '\u00ab': '\u00bb', '\u0f3a': '\u0f3b',
272 '\u0f3c': '\u0f3d', '\u169b': '\u169c', '\u2018': '\u2019',
273 '\u201a': '\u2019', '\u201b': '\u2019', '\u201c': '\u201d',
274 '\u201e': '\u201d', '\u201f': '\u201d', '\u2039': '\u203a',
275 '\u2045': '\u2046', '\u207d': '\u207e', '\u208d': '\u208e',
276 '\u2208': '\u220b', '\u2209': '\u220c', '\u220a': '\u220d',
277 '\u2215': '\u29f5', '\u223c': '\u223d', '\u2243': '\u22cd',
278 '\u2252': '\u2253', '\u2254': '\u2255', '\u2264': '\u2265',
279 '\u2266': '\u2267', '\u2268': '\u2269', '\u226a': '\u226b',
280 '\u226e': '\u226f', '\u2270': '\u2271', '\u2272': '\u2273',
281 '\u2274': '\u2275', '\u2276': '\u2277', '\u2278': '\u2279',
282 '\u227a': '\u227b', '\u227c': '\u227d', '\u227e': '\u227f',
283 '\u2280': '\u2281', '\u2282': '\u2283', '\u2284': '\u2285',
284 '\u2286': '\u2287', '\u2288': '\u2289', '\u228a': '\u228b',
285 '\u228f': '\u2290', '\u2291': '\u2292', '\u2298': '\u29b8',
286 '\u22a2': '\u22a3', '\u22a6': '\u2ade', '\u22a8': '\u2ae4',
287 '\u22a9': '\u2ae3', '\u22ab': '\u2ae5', '\u22b0': '\u22b1',
288 '\u22b2': '\u22b3', '\u22b4': '\u22b5', '\u22b6': '\u22b7',
289 '\u22c9': '\u22ca', '\u22cb': '\u22cc', '\u22d0': '\u22d1',
290 '\u22d6': '\u22d7', '\u22d8': '\u22d9', '\u22da': '\u22db',
291 '\u22dc': '\u22dd', '\u22de': '\u22df', '\u22e0': '\u22e1',
292 '\u22e2': '\u22e3', '\u22e4': '\u22e5', '\u22e6': '\u22e7',
293 '\u22e8': '\u22e9', '\u22ea': '\u22eb', '\u22ec': '\u22ed',
294 '\u22f0': '\u22f1', '\u22f2': '\u22fa', '\u22f3': '\u22fb',
295 '\u22f4': '\u22fc', '\u22f6': '\u22fd', '\u22f7': '\u22fe',
296 '\u2308': '\u2309', '\u230a': '\u230b', '\u2329': '\u232a',
297 '\u23b4': '\u23b5', '\u2768': '\u2769', '\u276a': '\u276b',
298 '\u276c': '\u276d', '\u276e': '\u276f', '\u2770': '\u2771',
299 '\u2772': '\u2773', '\u2774': '\u2775', '\u27c3': '\u27c4',
300 '\u27c5': '\u27c6', '\u27d5': '\u27d6', '\u27dd': '\u27de',
301 '\u27e2': '\u27e3', '\u27e4': '\u27e5', '\u27e6': '\u27e7',
302 '\u27e8': '\u27e9', '\u27ea': '\u27eb', '\u2983': '\u2984',
303 '\u2985': '\u2986', '\u2987': '\u2988', '\u2989': '\u298a',
304 '\u298b': '\u298c', '\u298d': '\u298e', '\u298f': '\u2990',
305 '\u2991': '\u2992', '\u2993': '\u2994', '\u2995': '\u2996',
306 '\u2997': '\u2998', '\u29c0': '\u29c1', '\u29c4': '\u29c5',
307 '\u29cf': '\u29d0', '\u29d1': '\u29d2', '\u29d4': '\u29d5',
308 '\u29d8': '\u29d9', '\u29da': '\u29db', '\u29f8': '\u29f9',
309 '\u29fc': '\u29fd', '\u2a2b': '\u2a2c', '\u2a2d': '\u2a2e',
310 '\u2a34': '\u2a35', '\u2a3c': '\u2a3d', '\u2a64': '\u2a65',
311 '\u2a79': '\u2a7a', '\u2a7d': '\u2a7e', '\u2a7f': '\u2a80',
312 '\u2a81': '\u2a82', '\u2a83': '\u2a84', '\u2a8b': '\u2a8c',
313 '\u2a91': '\u2a92', '\u2a93': '\u2a94', '\u2a95': '\u2a96',
314 '\u2a97': '\u2a98', '\u2a99': '\u2a9a', '\u2a9b': '\u2a9c',
315 '\u2aa1': '\u2aa2', '\u2aa6': '\u2aa7', '\u2aa8': '\u2aa9',
316 '\u2aaa': '\u2aab', '\u2aac': '\u2aad', '\u2aaf': '\u2ab0',
317 '\u2ab3': '\u2ab4', '\u2abb': '\u2abc', '\u2abd': '\u2abe',
318 '\u2abf': '\u2ac0', '\u2ac1': '\u2ac2', '\u2ac3': '\u2ac4',
319 '\u2ac5': '\u2ac6', '\u2acd': '\u2ace', '\u2acf': '\u2ad0',
320 '\u2ad1': '\u2ad2', '\u2ad3': '\u2ad4', '\u2ad5': '\u2ad6',
321 '\u2aec': '\u2aed', '\u2af7': '\u2af8', '\u2af9': '\u2afa',
322 '\u2e02': '\u2e03', '\u2e04': '\u2e05', '\u2e09': '\u2e0a',
323 '\u2e0c': '\u2e0d', '\u2e1c': '\u2e1d', '\u2e20': '\u2e21',
324 '\u3008': '\u3009', '\u300a': '\u300b', '\u300c': '\u300d',
325 '\u300e': '\u300f', '\u3010': '\u3011', '\u3014': '\u3015',
326 '\u3016': '\u3017', '\u3018': '\u3019', '\u301a': '\u301b',
327 '\u301d': '\u301e', '\ufd3e': '\ufd3f', '\ufe17': '\ufe18',
328 '\ufe35': '\ufe36', '\ufe37': '\ufe38', '\ufe39': '\ufe3a',
329 '\ufe3b': '\ufe3c', '\ufe3d': '\ufe3e', '\ufe3f': '\ufe40',
330 '\ufe41': '\ufe42', '\ufe43': '\ufe44', '\ufe47': '\ufe48',
331 '\ufe59': '\ufe5a', '\ufe5b': '\ufe5c', '\ufe5d': '\ufe5e',
332 '\uff08': '\uff09', '\uff1c': '\uff1e', '\uff3b': '\uff3d',
333 '\uff5b': '\uff5d', '\uff5f': '\uff60', '\uff62': '\uff63',
334 }
335
336 bracketsPattern := `[` + regexp.QuoteMeta(joinRuneMap(brackets)) + `]`
337
338
339
340 findBrackets := func(tokenClass RakuToken) MutatorFunc {
341 return func(state *LexerState) error {
342 var openingChars []rune
343 var adverbs []rune
344 switch tokenClass {
345 case rakuPod:
346 openingChars = []rune(strings.Join(state.Groups[1:5], ``))
347 default:
348 adverbs = []rune(state.NamedGroups[`adverbs`])
349 openingChars = []rune(state.NamedGroups[`opening_delimiters`])
350 }
351
352 openingChar := openingChars[0]
353
354 nChars := len(openingChars)
355
356 var closingChar rune
357 var closingCharExists bool
358 var closingChars []rune
359
360 switch tokenClass {
361 case rakuPod:
362 closingCharExists = true
363 default:
364 closingChar, closingCharExists = brackets[openingChar]
365 }
366
367 switch tokenClass {
368 case rakuPodFormatter:
369 formatter := StringOther
370
371 switch state.NamedGroups[`keyword`] {
372 case "B":
373 formatter = GenericStrong
374 case "I":
375 formatter = GenericEmph
376 case "U":
377 formatter = GenericUnderline
378 }
379
380 formatterRule := ruleReplacingConfig{
381 pattern: `.+?`,
382 tokenType: formatter,
383 mutator: nil,
384 stateName: `pod-formatter`,
385 rulePosition: bottomRule,
386 }
387
388 err := replaceRule(formatterRule)(state)
389 if err != nil {
390 panic(err)
391 }
392
393 err = replaceRule(ruleReplacingConfig{
394 delimiter: []rune{closingChar},
395 tokenType: Punctuation,
396 stateName: `pod-formatter`,
397 pushState: true,
398 numberOfDelimiterChars: nChars,
399 appendMutator: popRule(formatterRule),
400 })(state)
401 if err != nil {
402 panic(err)
403 }
404
405 return nil
406 case rakuMatchRegex:
407 var delimiter []rune
408 if closingCharExists {
409 delimiter = []rune{closingChar}
410 } else {
411 delimiter = openingChars
412 }
413
414 err := replaceRule(ruleReplacingConfig{
415 delimiter: delimiter,
416 tokenType: Punctuation,
417 stateName: `regex`,
418 popState: true,
419 pushState: true,
420 })(state)
421 if err != nil {
422 panic(err)
423 }
424
425 return nil
426 case rakuSubstitutionRegex:
427 delimiter := regexp2.Escape(string(openingChars))
428
429 err := replaceRule(ruleReplacingConfig{
430 pattern: `(` + delimiter + `)` + `((?:\\\\|\\/|.)*?)` + `(` + delimiter + `)`,
431 tokenType: ByGroups(Punctuation, UsingSelf(`qq`), Punctuation),
432 rulePosition: topRule,
433 stateName: `regex`,
434 popState: true,
435 pushState: true,
436 })(state)
437 if err != nil {
438 panic(err)
439 }
440
441 return nil
442 }
443
444 text := state.Text
445
446 var endPos int
447
448 var nonMirroredOpeningCharPosition int
449
450 if !closingCharExists {
451
452
453 closingChars = openingChars
454 nonMirroredOpeningCharPosition = indexAt(text, closingChars, state.Pos)
455 endPos = nonMirroredOpeningCharPosition
456 } else {
457 var podRegex *regexp2.Regexp
458 if tokenClass == rakuPod {
459 podRegex = regexp2.MustCompile(
460 state.NamedGroups[`ws`]+`=end`+`\s+`+regexp2.Escape(state.NamedGroups[`name`]),
461 0,
462 )
463 } else {
464 closingChars = []rune(strings.Repeat(string(closingChar), nChars))
465 }
466
467
468
469 nestingLevel := 1
470
471 searchPos := state.Pos - nChars
472
473 var nextClosePos int
474
475 for nestingLevel > 0 {
476 if tokenClass == rakuPod {
477 match, err := podRegex.FindRunesMatchStartingAt(text, searchPos+nChars)
478 if err == nil {
479 closingChars = match.Runes()
480 nextClosePos = match.Index
481 } else {
482 nextClosePos = -1
483 }
484 } else {
485 nextClosePos = indexAt(text, closingChars, searchPos+nChars)
486 }
487
488 nextOpenPos := indexAt(text, openingChars, searchPos+nChars)
489
490 switch {
491 case nextClosePos == -1:
492 nextClosePos = len(text)
493 nestingLevel = 0
494 case nextOpenPos != -1 && nextOpenPos < nextClosePos:
495 nestingLevel++
496 nChars = len(openingChars)
497 searchPos = nextOpenPos
498 default:
499 nestingLevel--
500 nChars = len(closingChars)
501 searchPos = nextClosePos
502 }
503 }
504
505 endPos = nextClosePos
506 }
507
508 if endPos < 0 {
509
510
511 endPos = len(text)
512 }
513
514 adverbre := regexp.MustCompile(`:to\b|:heredoc\b`)
515 var heredocTerminator []rune
516 var endHeredocPos int
517 if adverbre.MatchString(string(adverbs)) {
518 if endPos != len(text) {
519 heredocTerminator = text[state.Pos:endPos]
520 nChars = len(heredocTerminator)
521 } else {
522 endPos = state.Pos + 1
523 heredocTerminator = []rune{}
524 nChars = 0
525 }
526
527 if nChars > 0 {
528 endHeredocPos = indexAt(text[endPos:], heredocTerminator, 0)
529 if endHeredocPos > -1 {
530 endPos += endHeredocPos
531 } else {
532 endPos = len(text)
533 }
534 }
535 }
536
537 textBetweenBrackets := string(text[state.Pos:endPos])
538 switch tokenClass {
539 case rakuPod, rakuPodDeclaration, rakuNameAttribute:
540 state.NamedGroups[`value`] = textBetweenBrackets
541 state.NamedGroups[`closing_delimiters`] = string(closingChars)
542 case rakuQuote:
543 if len(heredocTerminator) > 0 {
544
545 heredocFristPunctuationLen := nChars + len(openingChars) + 1
546
547 state.NamedGroups[`opening_delimiters`] = string(openingChars) +
548 string(text[state.Pos:state.Pos+heredocFristPunctuationLen])
549
550 state.NamedGroups[`value`] =
551 string(text[state.Pos+heredocFristPunctuationLen : endPos])
552
553 if endHeredocPos > -1 {
554 state.NamedGroups[`closing_delimiters`] = string(heredocTerminator)
555 }
556 } else {
557 state.NamedGroups[`value`] = textBetweenBrackets
558 if nChars > 0 {
559 state.NamedGroups[`closing_delimiters`] = string(closingChars)
560 }
561 }
562 default:
563 state.Groups = []string{state.Groups[0] + string(text[state.Pos:endPos+nChars])}
564 }
565
566 state.Pos = endPos + nChars
567
568 return nil
569 }
570 }
571
572
573
574
575 return Rules{
576 "root": {
577
578 {`\A\z`, nil, nil},
579 Include("common"),
580 {`{`, Punctuation, Push(`root`)},
581 {`\(`, Punctuation, Push(`root`)},
582 {`[)}]`, Punctuation, Pop(1)},
583 {`;`, Punctuation, nil},
584 {`\[|\]`, Operator, nil},
585 {`.+?`, Text, nil},
586 },
587 "common": {
588 {`^#![^\n]*$`, CommentHashbang, nil},
589 Include("pod"),
590
591 {
592 "#`(?<opening_delimiters>(?<delimiter>" + bracketsPattern + `)\k<delimiter>*)`,
593 CommentMultiline,
594 findBrackets(rakuMultilineComment),
595 },
596 {`#[^\n]*$`, CommentSingle, nil},
597
598 {
599 `(?<=(?:^|\(|=|:|~~|\[|{|,|=>)\s*)(/)(?!\]|\))((?:\\\\|\\/|.)*?)((?<!(?<!\\)\\)/(?!'|"))`,
600 ByGroups(Punctuation, UsingSelf("regex"), Punctuation),
601 nil,
602 },
603 Include("variable"),
604
605 {`::\?\w+(?::[_UD])?`, NameVariableGlobal, nil},
606
607 {
608 `\b(v)(\d+)((?:\.(?:\*|[\d\w]+))*)(\+)?`,
609 ByGroups(Keyword, NumberInteger, NameEntity, Operator),
610 nil,
611 },
612 Include("number"),
613
614 {`(>>)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
615 {`(»)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
616
617 {`(<<)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
618 {`(«)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
619
620 {`(>>)(\S+?)(>>)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
621 {`(»)(\S+?)(»)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
622
623 {`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(<<)(?!(?:(?!>>)[^\n])+?[},;] *\n)(?!(?:(?!>>).)+?>>\S+?>>)`, Punctuation, Push("<<")},
624
625 {`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(«)(?![^»]+?[},;] *\n)(?![^»]+?»\S+?»)`, Punctuation, Push("«")},
626
627 {`(?<=\[\\?)<(?=\])`, Operator, nil},
628
629 {
630 `(?<=[$@%&]?\w[\w':-]* +)(<=?)( *[^ ]+? *)(>=?)(?= *[$@%&]?\w[\w':-]*)`,
631 ByGroups(Operator, UsingSelf("root"), Operator),
632 nil,
633 },
634
635 {
636 `(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(<)((?:(?![,;)}] *(?:#[^\n]+)?\n)[^<>])+?)(>)(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?\w[\w':-]*[^(]|\s+\[))`,
637 ByGroups(Punctuation, String, Punctuation),
638 nil,
639 },
640 {`C?X::['\w:-]+`, NameException, nil},
641 Include("metaoperator"),
642
643 {
644 `(\w[\w'-]*)(\s*)(=>)`,
645 ByGroups(String, Text, Operator),
646 nil,
647 },
648 Include("colon-pair"),
649
650 {
651 `(?<=(?:^|\s)(?:regex|token|rule)(\s+))` + namePattern + colonPairLookahead + `\s*[({])`,
652 NameFunction,
653 Push("token", "name-adverb"),
654 },
655
656 {`(?<=^|\b|\s)(?<!\.)(ss|S|s|TR|tr)\b(\s*)`, ByGroups(Keyword, Text), Push("substitution")},
657 {keywordsPattern, Keyword, nil},
658 {builtinTypesPattern, KeywordType, nil},
659 {builtinRoutinesPattern, NameBuiltin, nil},
660
661 {
662 `(?<=(?:^|\s)(?:class|grammar|role|does|but|is|subset|of)\s+)` + namePattern,
663 NameClass,
664 Push("name-adverb"),
665 },
666
667 {
668 `(?<=(?:^|\s)(?:sub|method|multi sub|multi)\s+)!?` + namePattern + colonPairLookahead + `\s*[({])`,
669 NameFunction,
670 Push("name-adverb"),
671 },
672
673 {`(?<=\bconstant\s+)` + namePattern, NameConstant, Push("name-adverb")},
674
675 {`(?<=\b(?:use|module|package)\s+)` + namePattern, NameNamespace, Push("name-adverb")},
676 Include("operator"),
677 Include("single-quote"),
678 {`(?<!(?<!\\)\\)"`, Punctuation, Push("double-quotes")},
679
680 {`(?<=^|\b|\s)(ms|m|rx)\b(\s*)`, ByGroups(Keyword, Text), Push("rx")},
681
682 {
683 `(?<=^|\b|\s)(?<keyword>(?:qq|q|Q))(?<adverbs>(?::?(?:heredoc|to|qq|ww|q|w|s|a|h|f|c|b|to|v|x))*)(?<ws>\s*)(?<opening_delimiters>(?<delimiter>[^0-9a-zA-Z:\s])\k<delimiter>*)`,
684 EmitterFunc(quote),
685 findBrackets(rakuQuote),
686 },
687
688 {
689 `\b` + namePattern + colonPairLookahead + `\()`,
690 NameFunction,
691 Push("name-adverb"),
692 },
693
694 {
695 `(?<!\.\.[?^*+]?)(?<=(?:\.[?^*+&]?)|self!)` + namePattern + colonPairLookahead + `\b)`,
696 NameFunction,
697 Push("name-adverb"),
698 },
699
700 {namePattern + `(?=\s+\W?['\w:-]+:\W)`, NameFunction, Push("name-adverb")},
701 {`(?<=\W)(?:∅|i|e|𝑒|tau|τ|pi|π|Inf|∞)(?=\W)`, NameConstant, nil},
702 {`(「)([^」]*)(」)`, ByGroups(Punctuation, String, Punctuation), nil},
703 {`(?<=^ *)\b` + namePattern + `(?=:\s*(?:for|while|loop))`, NameLabel, nil},
704
705 {
706 `(?<=\b(?:my|our|constant|let|temp)\s+)\\` + namePattern,
707 NameVariable,
708 Push("name-adverb"),
709 },
710 {namePattern, Name, Push("name-adverb")},
711 },
712 "rx": {
713 Include("colon-pair-attribute"),
714 {
715 `(?<opening_delimiters>(?<delimiter>[^\w:\s])\k<delimiter>*)`,
716 ByGroupNames(
717 map[string]Emitter{
718 `opening_delimiters`: Punctuation,
719 `delimiter`: nil,
720 },
721 ),
722 findBrackets(rakuMatchRegex),
723 },
724 },
725 "substitution": {
726 Include("colon-pair-attribute"),
727
728 {
729 `(?<opening_delimiters>(?<delimiter>` + bracketsPattern + `)\k<delimiter>*)`,
730 ByGroupNames(map[string]Emitter{
731 `opening_delimiters`: Punctuation,
732 `delimiter`: nil,
733 }),
734 findBrackets(rakuMatchRegex),
735 },
736
737 {
738 `(?<opening_delimiters>[^\w:\s])`,
739 Punctuation,
740 findBrackets(rakuSubstitutionRegex),
741 },
742 },
743 "number": {
744 {`0_?[0-7]+(_[0-7]+)*`, LiteralNumberOct, nil},
745 {`0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*`, LiteralNumberHex, nil},
746 {`0b[01]+(_[01]+)*`, LiteralNumberBin, nil},
747 {
748 `(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?`,
749 LiteralNumberFloat,
750 nil,
751 },
752 {`(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*`, LiteralNumberFloat, nil},
753 {`(?<=\d+)i`, NameConstant, nil},
754 {`\d+(_\d+)*`, LiteralNumberInteger, nil},
755 },
756 "name-adverb": {
757 Include("colon-pair-attribute-keyvalue"),
758 Default(Pop(1)),
759 },
760 "colon-pair": {
761
762 {colonPairPattern, colonPair(String), findBrackets(rakuNameAttribute)},
763
764 {
765 `(:)(\d+)(\w[\w'-]*)`,
766 ByGroups(Punctuation, UsingSelf("number"), String),
767 nil,
768 },
769
770 {`(:)(!?)(\w[\w'-]*)`, ByGroups(Punctuation, Operator, String), nil},
771 {`\s+`, Text, nil},
772 },
773 "colon-pair-attribute": {
774
775 {colonPairPattern, colonPair(NameAttribute), findBrackets(rakuNameAttribute)},
776
777 {
778 `(:)(\d+)(\w[\w'-]*)`,
779 ByGroups(Punctuation, UsingSelf("number"), NameAttribute),
780 nil,
781 },
782
783 {`(:)(!?)(\w[\w'-]*)`, ByGroups(Punctuation, Operator, NameAttribute), nil},
784 {`\s+`, Text, nil},
785 },
786 "colon-pair-attribute-keyvalue": {
787
788 {colonPairPattern, colonPair(NameAttribute), findBrackets(rakuNameAttribute)},
789 },
790 "escape-qq": {
791 {
792 `(?<!(?<!\\)\\)(\\qq)(\[)(.+?)(\])`,
793 ByGroups(StringEscape, Punctuation, UsingSelf("qq"), Punctuation),
794 nil,
795 },
796 },
797 `escape-char`: {
798 {`(?<!(?<!\\)\\)(\\[abfrnrt])`, StringEscape, nil},
799 },
800 `escape-single-quote`: {
801 {`(?<!(?<!\\)\\)(\\)(['\\])`, ByGroups(StringEscape, StringSingle), nil},
802 },
803 "escape-c-name": {
804 {
805 `(?<!(?<!\\)\\)(\\[c|C])(\[)(.+?)(\])`,
806 ByGroups(StringEscape, Punctuation, String, Punctuation),
807 nil,
808 },
809 },
810 "escape-hexadecimal": {
811 {
812 `(?<!(?<!\\)\\)(\\[x|X])(\[)([0-9a-fA-F]+)(\])`,
813 ByGroups(StringEscape, Punctuation, NumberHex, Punctuation),
814 nil,
815 },
816 {`(\\[x|X])([0-9a-fA-F]+)`, ByGroups(StringEscape, NumberHex), nil},
817 },
818 "regex": {
819
820 {`\A\z`, nil, nil},
821 Include("regex-escape-class"),
822 Include(`regex-character-escape`),
823
824 {
825 `([$@])((?<!(?<!\\)\\)\()`,
826 ByGroups(Keyword, Punctuation),
827 replaceRule(ruleReplacingConfig{
828 delimiter: []rune(`)`),
829 tokenType: Punctuation,
830 stateName: `root`,
831 pushState: true,
832 }),
833 },
834
835 {`\$(?=/)`, NameEntity, nil},
836
837 {`\$(?=\z|\s|[^<(\w*!.])`, NameEntity, nil},
838 Include("variable"),
839 Include("escape-c-name"),
840 Include("escape-hexadecimal"),
841 Include("number"),
842 Include("single-quote"),
843
844 {
845 `(?<!(?<!\\)\\)(:)(my|our|state|constant|temp|let)`,
846 ByGroups(Operator, KeywordDeclaration),
847 replaceRule(ruleReplacingConfig{
848 delimiter: []rune(`;`),
849 tokenType: Punctuation,
850 stateName: `root`,
851 pushState: true,
852 }),
853 },
854
855 {
856 `(?<!(?<!\\)\\)(<)([?!.]*)((?<!(?<!\\)\\){)`,
857 ByGroups(Punctuation, Operator, Punctuation),
858 replaceRule(ruleReplacingConfig{
859 delimiter: []rune(`}>`),
860 tokenType: Punctuation,
861 stateName: `root`,
862 pushState: true,
863 }),
864 },
865
866 Include(`closure`),
867
868 {`(:)(\w+)`, ByGroups(Punctuation, NameAttribute), nil},
869
870 {`\|\||\||&&|&|\.\.|\*\*|%%|%|:|!|<<|«|>>|»|\+|\*\*|\*|\?|=|~|<~~>`, Operator, nil},
871
872 {`\^\^|\^|\$\$|\$`, NameEntity, nil},
873 {`\.`, NameEntity, nil},
874 {`#[^\n]*\n`, CommentSingle, nil},
875
876 {
877 `(?<!(?<!\\)\\)(<)(\s*)([?!.]+)(\s*)(after|before)`,
878 ByGroups(Punctuation, Text, Operator, Text, OperatorWord),
879 replaceRule(ruleReplacingConfig{
880 delimiter: []rune(`>`),
881 tokenType: Punctuation,
882 stateName: `regex`,
883 pushState: true,
884 }),
885 },
886 {
887 `(?<!(?<!\\)\\)(<)([|!?.]*)(wb|ww|ws|w)(>)`,
888 ByGroups(Punctuation, Operator, OperatorWord, Punctuation),
889 nil,
890 },
891
892 {
893 `(?<!(?<!\\)\\)(<)([?!.]*)([$@]\w[\w:-]*)(>)`,
894 ByGroups(Punctuation, Operator, NameVariable, Punctuation),
895 nil,
896 },
897
898 {`(?<!(?<!\\)\\)<\(|\)>`, Operator, nil},
899 {
900 `(?<!(?<!\\)\\)(<)(\w[\w:-]*)(=\.?)`,
901 ByGroups(Punctuation, NameVariable, Operator),
902 Push(`regex-variable`),
903 },
904 {
905 `(?<!(?<!\\)\\)(<)([|!?.&]*)(\w(?:(?!:\s)[\w':-])*)`,
906 ByGroups(Punctuation, Operator, NameFunction),
907 Push(`regex-function`),
908 },
909 {`(?<!(?<!\\)\\)<`, Punctuation, Push("regex-property")},
910 {`(?<!(?<!\\)\\)"`, Punctuation, Push("double-quotes")},
911 {`(?<!(?<!\\)\\)(?:\]|\))`, Punctuation, Pop(1)},
912 {`(?<!(?<!\\)\\)(?:\[|\()`, Punctuation, Push("regex")},
913 {`.+?`, StringRegex, nil},
914 },
915 "regex-class-builtin": {
916 {
917 `\b(?:alnum|alpha|blank|cntrl|digit|graph|lower|print|punct|space|upper|xdigit|same|ident)\b`,
918 NameBuiltin,
919 nil,
920 },
921 },
922 "regex-function": {
923
924 {`(?<!(?<!\\)\\)>`, Punctuation, Pop(1)},
925
926 {
927 `\(`,
928 Punctuation,
929 replaceRule(ruleReplacingConfig{
930 delimiter: []rune(`)>`),
931 tokenType: Punctuation,
932 stateName: `root`,
933 popState: true,
934 pushState: true,
935 }),
936 },
937
938 {
939 `\s+`,
940 StringRegex,
941 replaceRule(ruleReplacingConfig{
942 delimiter: []rune(`>`),
943 tokenType: Punctuation,
944 stateName: `regex`,
945 popState: true,
946 pushState: true,
947 }),
948 },
949
950 {
951 `:`,
952 Punctuation,
953 replaceRule(ruleReplacingConfig{
954 delimiter: []rune(`>`),
955 tokenType: Punctuation,
956 stateName: `root`,
957 popState: true,
958 pushState: true,
959 }),
960 },
961 },
962 "regex-variable": {
963 Include(`regex-starting-operators`),
964
965 {
966 `(&)?(\w(?:(?!:\s)[\w':-])*)(?=\()`,
967 ByGroups(Operator, NameFunction),
968 Mutators(Pop(1), Push(`regex-function`)),
969 },
970
971 {`(&)?(\w[\w':-]*)(>)`, ByGroups(Operator, NameFunction, Punctuation), Pop(1)},
972
973 Default(Pop(1), Push(`regex-property`)),
974 },
975 "regex-property": {
976 {`(?<!(?<!\\)\\)>`, Punctuation, Pop(1)},
977 Include("regex-class-builtin"),
978 Include("variable"),
979 Include(`regex-starting-operators`),
980 Include("colon-pair-attribute"),
981 {`(?<!(?<!\\)\\)\[`, Punctuation, Push("regex-character-class")},
982 {`\+|\-`, Operator, nil},
983 {`@[\w':-]+`, NameVariable, nil},
984 {`.+?`, StringRegex, nil},
985 },
986 `regex-starting-operators`: {
987 {`(?<=<)[|!?.]+`, Operator, nil},
988 },
989 "regex-escape-class": {
990 {`(?i)\\n|\\t|\\h|\\v|\\s|\\d|\\w`, StringEscape, nil},
991 },
992 `regex-character-escape`: {
993 {`(?<!(?<!\\)\\)(\\)(.)`, ByGroups(StringEscape, StringRegex), nil},
994 },
995 "regex-character-class": {
996 {`(?<!(?<!\\)\\)\]`, Punctuation, Pop(1)},
997 Include("regex-escape-class"),
998 Include("escape-c-name"),
999 Include("escape-hexadecimal"),
1000 Include(`regex-character-escape`),
1001 Include("number"),
1002 {`\.\.`, Operator, nil},
1003 {`.+?`, StringRegex, nil},
1004 },
1005 "metaoperator": {
1006
1007 {
1008 `\b([RZX]+)\b(\[)([^\s\]]+?)(\])`,
1009 ByGroups(OperatorWord, Punctuation, UsingSelf("root"), Punctuation),
1010 nil,
1011 },
1012
1013 {`\b([RZX]+)\b([^\s\]]+)`, ByGroups(OperatorWord, UsingSelf("operator")), nil},
1014 },
1015 "operator": {
1016
1017 {wordOperatorsPattern, OperatorWord, nil},
1018
1019 {operatorsPattern, Operator, nil},
1020 },
1021 "pod": {
1022
1023 {`(#[|=])\s`, Keyword, Push("pod-single")},
1024
1025 {
1026 "(?<keyword>#[|=])(?<opening_delimiters>(?<delimiter>" + bracketsPattern + `)\k<delimiter>*)(?<value>)(?<closing_delimiters>)`,
1027 ByGroupNames(
1028 map[string]Emitter{
1029 `keyword`: Keyword,
1030 `opening_delimiters`: Punctuation,
1031 `delimiter`: nil,
1032 `value`: UsingSelf("pod-declaration"),
1033 `closing_delimiters`: Punctuation,
1034 }),
1035 findBrackets(rakuPodDeclaration),
1036 },
1037 Include("pod-blocks"),
1038 },
1039 "pod-blocks": {
1040
1041 {
1042 `(?<=^ *)(?<ws> *)(?<keyword>=begin)(?<ws2> +)(?<name>code)(?<config>[^\n]*)(?<value>.*?)(?<ws3>^\k<ws>)(?<end_keyword>=end)(?<ws4> +)\k<name>`,
1043 EmitterFunc(podCode),
1044 nil,
1045 },
1046
1047 {
1048 `(?<=^ *)(?<ws> *)(?<keyword>=begin)(?<ws2> +)(?!code)(?<name>\w[\w'-]*)(?<config>[^\n]*)(?<value>)(?<closing_delimiters>)`,
1049 ByGroupNames(
1050 map[string]Emitter{
1051 `ws`: Comment,
1052 `keyword`: Keyword,
1053 `ws2`: StringDoc,
1054 `name`: Keyword,
1055 `config`: EmitterFunc(podConfig),
1056 `value`: UsingSelf("pod-begin"),
1057 `closing_delimiters`: Keyword,
1058 }),
1059 findBrackets(rakuPod),
1060 },
1061
1062 {
1063 `(?<=^ *)(?<ws> *)(?<keyword>=(?:for|defn))(?<ws2> +)(?<name>\w[\w'-]*)(?<config>[^\n]*\n)`,
1064 ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)),
1065 Push("pod-paragraph"),
1066 },
1067
1068 {
1069 `(?<=^ *)(?<ws> *)(?<keyword>=config)(?<ws2> +)(?<name>\w[\w'-]*)(?<config>[^\n]*\n)`,
1070 ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)),
1071 nil,
1072 },
1073
1074 {
1075 `(?<=^ *)(?<ws> *)(?<keyword>=alias)(?<ws2> +)(?<name>\w[\w'-]*)(?<value>[^\n]*\n)`,
1076 ByGroups(Comment, Keyword, StringDoc, Keyword, StringDoc),
1077 nil,
1078 },
1079
1080 {
1081 `(?<=^ *)(?<ws> *)(?<keyword>=encoding)(?<ws2> +)(?<name>[^\n]+)`,
1082 ByGroups(Comment, Keyword, StringDoc, Name),
1083 nil,
1084 },
1085
1086 {
1087 `(?<=^ *)(?<ws> *)(?<keyword>=(?:para|table|pod))(?<config>(?<!\n\s*)[^\n]*\n)`,
1088 ByGroups(Comment, Keyword, EmitterFunc(podConfig)),
1089 Push("pod-paragraph"),
1090 },
1091
1092 {
1093 `(?<=^ *)(?<ws> *)(?<keyword>=head\d+)(?<ws2> *)(?<config>#?)`,
1094 ByGroups(Comment, Keyword, GenericHeading, Keyword),
1095 Push("pod-heading"),
1096 },
1097
1098 {
1099 `(?<=^ *)(?<ws> *)(?<keyword>=(?:item\d*|comment|data|[A-Z]+))(?<ws2> *)(?<config>#?)`,
1100 ByGroups(Comment, Keyword, StringDoc, Keyword),
1101 Push("pod-paragraph"),
1102 },
1103 {
1104 `(?<=^ *)(?<ws> *)(?<keyword>=finish)(?<config>[^\n]*)`,
1105 ByGroups(Comment, Keyword, EmitterFunc(podConfig)),
1106 Push("pod-finish"),
1107 },
1108
1109 {
1110 `(?<=^ *)(?<ws> *)(?<name>=\w[\w'-]*)(?<ws2> *)(?<config>#?)`,
1111 ByGroups(Comment, Name, StringDoc, Keyword),
1112 Push("pod-paragraph"),
1113 },
1114
1115 {
1116 `(?<=^ *)(?<keyword> *=)(?<ws> *)(?<config>(?::\w[\w'-]*(?:` + colonPairOpeningBrackets + `.+?` +
1117 colonPairClosingBrackets + `) *)*\n)`,
1118 ByGroups(Keyword, StringDoc, EmitterFunc(podConfig)),
1119 nil,
1120 },
1121 },
1122 "pod-begin": {
1123 Include("pod-blocks"),
1124 Include("pre-pod-formatter"),
1125 {`.+?`, StringDoc, nil},
1126 },
1127 "pod-declaration": {
1128 Include("pre-pod-formatter"),
1129 {`.+?`, StringDoc, nil},
1130 },
1131 "pod-paragraph": {
1132 {`\n *\n|\n(?=^ *=)`, StringDoc, Pop(1)},
1133 Include("pre-pod-formatter"),
1134 {`.+?`, StringDoc, nil},
1135 },
1136 "pod-single": {
1137 {`\n`, StringDoc, Pop(1)},
1138 Include("pre-pod-formatter"),
1139 {`.+?`, StringDoc, nil},
1140 },
1141 "pod-heading": {
1142 {`\n *\n|\n(?=^ *=)`, GenericHeading, Pop(1)},
1143 Include("pre-pod-formatter"),
1144 {`.+?`, GenericHeading, nil},
1145 },
1146 "pod-finish": {
1147 {`\z`, nil, Pop(1)},
1148 Include("pre-pod-formatter"),
1149 {`.+?`, StringDoc, nil},
1150 },
1151 "pre-pod-formatter": {
1152
1153 {
1154 `(?<keyword>[CBIUDTKRPAELZVMSXN])(?<opening_delimiters><+|«)`,
1155 ByGroups(Keyword, Punctuation),
1156 findBrackets(rakuPodFormatter),
1157 },
1158 },
1159 "pod-formatter": {
1160
1161 {`>`, Punctuation, Pop(1)},
1162 Include("pre-pod-formatter"),
1163
1164 {`.+?`, StringOther, nil},
1165 },
1166 "variable": {
1167 {variablePattern, NameVariable, Push("name-adverb")},
1168 {globalVariablePattern, NameVariableGlobal, Push("name-adverb")},
1169 {`[$@]<[^>]+>`, NameVariable, nil},
1170 {`\$[/!¢]`, NameVariable, nil},
1171 {`[$@%]`, NameVariable, nil},
1172 },
1173 "single-quote": {
1174 {`(?<!(?<!\\)\\)'`, Punctuation, Push("single-quote-inner")},
1175 },
1176 "single-quote-inner": {
1177 {`(?<!(?<!(?<!\\)\\)\\)'`, Punctuation, Pop(1)},
1178 Include("escape-single-quote"),
1179 Include("escape-qq"),
1180 {`(?:\\\\|\\[^\\]|[^'\\])+?`, StringSingle, nil},
1181 },
1182 "double-quotes": {
1183 {`(?<!(?<!\\)\\)"`, Punctuation, Pop(1)},
1184 Include("qq"),
1185 },
1186 "<<": {
1187 {`>>(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+|\s+\[))`, Punctuation, Pop(1)},
1188 Include("ww"),
1189 },
1190 "«": {
1191 {`»(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+|\s+\[))`, Punctuation, Pop(1)},
1192 Include("ww"),
1193 },
1194 "ww": {
1195 Include("single-quote"),
1196 Include("qq"),
1197 },
1198 "qq": {
1199 Include("qq-variable"),
1200 Include("closure"),
1201 Include(`escape-char`),
1202 Include("escape-hexadecimal"),
1203 Include("escape-c-name"),
1204 Include("escape-qq"),
1205 {`.+?`, StringDouble, nil},
1206 },
1207 "qq-variable": {
1208 {
1209 `(?<!(?<!\\)\\)(?:` + variablePattern + `|` + globalVariablePattern + `)` + colonPairLookahead + `)`,
1210 NameVariable,
1211 Push("qq-variable-extras", "name-adverb"),
1212 },
1213 },
1214 "qq-variable-extras": {
1215
1216 {
1217 `(?<operator>\.)(?<method_name>` + namePattern + `)` + colonPairLookahead + `\()`,
1218 ByGroupNames(map[string]Emitter{
1219 `operator`: Operator,
1220 `method_name`: NameFunction,
1221 }),
1222 Push(`name-adverb`),
1223 },
1224
1225 {
1226 `\(`, Punctuation, replaceRule(
1227 ruleReplacingConfig{
1228 delimiter: []rune(`)`),
1229 tokenType: Punctuation,
1230 stateName: `root`,
1231 pushState: true,
1232 }),
1233 },
1234 Default(Pop(1)),
1235 },
1236 "Q": {
1237 Include("escape-qq"),
1238 {`.+?`, String, nil},
1239 },
1240 "Q-closure": {
1241 Include("escape-qq"),
1242 Include("closure"),
1243 {`.+?`, String, nil},
1244 },
1245 "Q-variable": {
1246 Include("escape-qq"),
1247 Include("qq-variable"),
1248 {`.+?`, String, nil},
1249 },
1250 "closure": {
1251 {`(?<!(?<!\\)\\){`, Punctuation, replaceRule(
1252 ruleReplacingConfig{
1253 delimiter: []rune(`}`),
1254 tokenType: Punctuation,
1255 stateName: `root`,
1256 pushState: true,
1257 }),
1258 },
1259 },
1260 "token": {
1261
1262 {`\(`, Punctuation, replaceRule(
1263 ruleReplacingConfig{
1264 delimiter: []rune(`)`),
1265 tokenType: Punctuation,
1266 stateName: `root`,
1267 pushState: true,
1268 }),
1269 },
1270 {`{`, Punctuation, replaceRule(
1271 ruleReplacingConfig{
1272 delimiter: []rune(`}`),
1273 tokenType: Punctuation,
1274 stateName: `regex`,
1275 popState: true,
1276 pushState: true,
1277 }),
1278 },
1279 {`\s*`, Text, nil},
1280 Default(Pop(1)),
1281 },
1282 }
1283 }
1284
1285
1286 func joinRuneMap(m map[rune]rune) string {
1287 runes := make([]rune, 0, len(m))
1288 for k := range m {
1289 runes = append(runes, k)
1290 }
1291
1292 return string(runes)
1293 }
1294
1295
1296 func indexAt(str []rune, substr []rune, pos int) int {
1297 strFromPos := str[pos:]
1298 text := string(strFromPos)
1299
1300 idx := strings.Index(text, string(substr))
1301 if idx > -1 {
1302 idx = utf8.RuneCountInString(text[:idx])
1303
1304
1305 if (idx > 1 && strFromPos[idx-1] == '\\' && strFromPos[idx-2] != '\\') ||
1306 (idx == 1 && strFromPos[idx-1] == '\\') {
1307 idx = indexAt(str[pos:], substr, idx+1)
1308
1309 idx = utf8.RuneCountInString(text[:idx])
1310
1311 if idx < 0 {
1312 return idx
1313 }
1314 }
1315 idx += pos
1316 }
1317
1318 return idx
1319 }
1320
1321
1322 func contains(s []string, e string) bool {
1323 for _, value := range s {
1324 if value == e {
1325 return true
1326 }
1327 }
1328 return false
1329 }
1330
1331 type rulePosition int
1332
1333 const (
1334 topRule rulePosition = 0
1335 bottomRule = -1
1336 )
1337
1338 type ruleMakingConfig struct {
1339 delimiter []rune
1340 pattern string
1341 tokenType Emitter
1342 mutator Mutator
1343 numberOfDelimiterChars int
1344 }
1345
1346 type ruleReplacingConfig struct {
1347 delimiter []rune
1348 pattern string
1349 tokenType Emitter
1350 numberOfDelimiterChars int
1351 mutator Mutator
1352 appendMutator Mutator
1353 rulePosition rulePosition
1354 stateName string
1355 pop bool
1356 popState bool
1357 pushState bool
1358 }
1359
1360
1361 func popRule(rule ruleReplacingConfig) MutatorFunc {
1362 return func(state *LexerState) error {
1363 stackName := genStackName(rule.stateName, rule.rulePosition)
1364
1365 stack, ok := state.Get(stackName).([]ruleReplacingConfig)
1366
1367 if ok && len(stack) > 0 {
1368
1369 stack = stack[:len(stack)-1]
1370 lastRule := stack[len(stack)-1]
1371 lastRule.pushState = false
1372 lastRule.popState = false
1373 lastRule.pop = true
1374 state.Set(stackName, stack)
1375
1376
1377 err := replaceRule(lastRule)(state)
1378 if err != nil {
1379 panic(err)
1380 }
1381 }
1382
1383 return nil
1384 }
1385 }
1386
1387
1388 func replaceRule(rule ruleReplacingConfig) MutatorFunc {
1389 return func(state *LexerState) error {
1390 stateName := rule.stateName
1391 stackName := genStackName(rule.stateName, rule.rulePosition)
1392
1393 stack, ok := state.Get(stackName).([]ruleReplacingConfig)
1394 if !ok {
1395 stack = []ruleReplacingConfig{}
1396 }
1397
1398
1399 if len(stack) == 0 {
1400 stack = []ruleReplacingConfig{
1401 {
1402
1403 pattern: `\A\z`,
1404 tokenType: nil,
1405 mutator: nil,
1406 stateName: stateName,
1407 rulePosition: rule.rulePosition,
1408 },
1409 }
1410 state.Set(stackName, stack)
1411 }
1412
1413 var mutator Mutator
1414 mutators := []Mutator{}
1415
1416 switch {
1417 case rule.rulePosition == topRule && rule.mutator == nil:
1418
1419 mutators = []Mutator{Pop(1), popRule(rule)}
1420 case rule.rulePosition == topRule && rule.mutator != nil:
1421
1422 mutators = []Mutator{rule.mutator, popRule(rule)}
1423 case rule.mutator != nil:
1424 mutators = []Mutator{rule.mutator}
1425 }
1426
1427 if rule.appendMutator != nil {
1428 mutators = append(mutators, rule.appendMutator)
1429 }
1430
1431 if len(mutators) > 0 {
1432 mutator = Mutators(mutators...)
1433 } else {
1434 mutator = nil
1435 }
1436
1437 ruleConfig := ruleMakingConfig{
1438 pattern: rule.pattern,
1439 delimiter: rule.delimiter,
1440 numberOfDelimiterChars: rule.numberOfDelimiterChars,
1441 tokenType: rule.tokenType,
1442 mutator: mutator,
1443 }
1444
1445 cRule := makeRule(ruleConfig)
1446
1447 switch rule.rulePosition {
1448 case topRule:
1449 state.Rules[stateName][0] = cRule
1450 case bottomRule:
1451 state.Rules[stateName][len(state.Rules[stateName])-1] = cRule
1452 }
1453
1454
1455 if rule.popState {
1456 err := Pop(1).Mutate(state)
1457 if err != nil {
1458 panic(err)
1459 }
1460 }
1461
1462
1463 if rule.pushState {
1464 err := Push(stateName).Mutate(state)
1465 if err != nil {
1466 panic(err)
1467 }
1468 }
1469
1470 if !rule.pop {
1471 state.Set(stackName, append(stack, rule))
1472 }
1473
1474 return nil
1475 }
1476 }
1477
1478
1479 func genStackName(stateName string, rulePosition rulePosition) (stackName string) {
1480 switch rulePosition {
1481 case topRule:
1482 stackName = stateName + `-top-stack`
1483 case bottomRule:
1484 stackName = stateName + `-bottom-stack`
1485 }
1486 return
1487 }
1488
1489
1490 func makeRule(config ruleMakingConfig) *CompiledRule {
1491 var rePattern string
1492
1493 if len(config.delimiter) > 0 {
1494 delimiter := string(config.delimiter)
1495
1496 if config.numberOfDelimiterChars > 1 {
1497 delimiter = strings.Repeat(delimiter, config.numberOfDelimiterChars)
1498 }
1499
1500 rePattern = `(?<!(?<!\\)\\)` + regexp2.Escape(delimiter)
1501 } else {
1502 rePattern = config.pattern
1503 }
1504
1505 regex := regexp2.MustCompile(rePattern, regexp2.None)
1506
1507 cRule := &CompiledRule{
1508 Rule: Rule{rePattern, config.tokenType, config.mutator},
1509 Regexp: regex,
1510 }
1511
1512 return cRule
1513 }
1514
1515
1516 func colonPair(tokenClass TokenType) Emitter {
1517 return EmitterFunc(func(groups []string, state *LexerState) Iterator {
1518 iterators := []Iterator{}
1519 tokens := []Token{
1520 {Punctuation, state.NamedGroups[`colon`]},
1521 {Punctuation, state.NamedGroups[`opening_delimiters`]},
1522 {Punctuation, state.NamedGroups[`closing_delimiters`]},
1523 }
1524
1525
1526 iterators = append(iterators, Literator(tokens[0]))
1527
1528 if tokenClass == NameAttribute {
1529 iterators = append(iterators, Literator(Token{NameAttribute, state.NamedGroups[`key`]}))
1530 } else {
1531 var keyTokenState string
1532 keyre := regexp.MustCompile(`^\d+$`)
1533 if keyre.MatchString(state.NamedGroups[`key`]) {
1534 keyTokenState = "common"
1535 } else {
1536 keyTokenState = "Q"
1537 }
1538
1539
1540 if keyTokenState != "" {
1541 iterator, err := state.Lexer.Tokenise(
1542 &TokeniseOptions{
1543 State: keyTokenState,
1544 Nested: true,
1545 }, state.NamedGroups[`key`])
1546
1547 if err != nil {
1548 panic(err)
1549 } else {
1550
1551 iterators = append(iterators, iterator)
1552 }
1553 }
1554 }
1555
1556
1557 iterators = append(iterators, Literator(tokens[1]))
1558
1559 var valueTokenState string
1560
1561 switch state.NamedGroups[`opening_delimiters`] {
1562 case "(", "{", "[":
1563 valueTokenState = "root"
1564 case "<<", "«":
1565 valueTokenState = "ww"
1566 case "<":
1567 valueTokenState = "Q"
1568 }
1569
1570
1571 if valueTokenState != "" {
1572 iterator, err := state.Lexer.Tokenise(
1573 &TokeniseOptions{
1574 State: valueTokenState,
1575 Nested: true,
1576 }, state.NamedGroups[`value`])
1577
1578 if err != nil {
1579 panic(err)
1580 } else {
1581
1582 iterators = append(iterators, iterator)
1583 }
1584 }
1585
1586 iterators = append(iterators, Literator(tokens[2]))
1587
1588 return Concaterator(iterators...)
1589 })
1590 }
1591
1592
1593 func quote(groups []string, state *LexerState) Iterator {
1594 keyword := state.NamedGroups[`keyword`]
1595 adverbsStr := state.NamedGroups[`adverbs`]
1596 iterators := []Iterator{}
1597 tokens := []Token{
1598 {Keyword, keyword},
1599 {StringAffix, adverbsStr},
1600 {Text, state.NamedGroups[`ws`]},
1601 {Punctuation, state.NamedGroups[`opening_delimiters`]},
1602 {Punctuation, state.NamedGroups[`closing_delimiters`]},
1603 }
1604
1605
1606 iterators = append(iterators, Literator(tokens[:4]...))
1607
1608 var tokenStates []string
1609
1610
1611 adverbs := strings.Split(adverbsStr, ":")
1612 for _, adverb := range adverbs {
1613 switch adverb {
1614 case "c", "closure":
1615 tokenStates = append(tokenStates, "Q-closure")
1616 case "qq":
1617 tokenStates = append(tokenStates, "qq")
1618 case "ww":
1619 tokenStates = append(tokenStates, "ww")
1620 case "s", "scalar", "a", "array", "h", "hash", "f", "function":
1621 tokenStates = append(tokenStates, "Q-variable")
1622 }
1623 }
1624
1625 var tokenState string
1626
1627 switch {
1628 case keyword == "qq" || contains(tokenStates, "qq"):
1629 tokenState = "qq"
1630 case adverbsStr == "ww" || contains(tokenStates, "ww"):
1631 tokenState = "ww"
1632 case contains(tokenStates, "Q-closure") && contains(tokenStates, "Q-variable"):
1633 tokenState = "qq"
1634 case contains(tokenStates, "Q-closure"):
1635 tokenState = "Q-closure"
1636 case contains(tokenStates, "Q-variable"):
1637 tokenState = "Q-variable"
1638 default:
1639 tokenState = "Q"
1640 }
1641
1642 iterator, err := state.Lexer.Tokenise(
1643 &TokeniseOptions{
1644 State: tokenState,
1645 Nested: true,
1646 }, state.NamedGroups[`value`])
1647
1648 if err != nil {
1649 panic(err)
1650 } else {
1651 iterators = append(iterators, iterator)
1652 }
1653
1654
1655 iterators = append(iterators, Literator(tokens[4]))
1656
1657 return Concaterator(iterators...)
1658 }
1659
1660
1661 func podConfig(groups []string, state *LexerState) Iterator {
1662
1663 iterator, err := state.Lexer.Tokenise(
1664 &TokeniseOptions{
1665 State: "colon-pair-attribute",
1666 Nested: true,
1667 }, groups[0])
1668
1669 if err != nil {
1670 panic(err)
1671 } else {
1672 return iterator
1673 }
1674 }
1675
1676
1677 func podCode(groups []string, state *LexerState) Iterator {
1678 iterators := []Iterator{}
1679 tokens := []Token{
1680 {Comment, state.NamedGroups[`ws`]},
1681 {Keyword, state.NamedGroups[`keyword`]},
1682 {Keyword, state.NamedGroups[`ws2`]},
1683 {Keyword, state.NamedGroups[`name`]},
1684 {StringDoc, state.NamedGroups[`value`]},
1685 {Comment, state.NamedGroups[`ws3`]},
1686 {Keyword, state.NamedGroups[`end_keyword`]},
1687 {Keyword, state.NamedGroups[`ws4`]},
1688 {Keyword, state.NamedGroups[`name`]},
1689 }
1690
1691
1692 iterators = append(iterators, Literator(tokens[:4]...))
1693
1694
1695 iterators = append(iterators, podConfig([]string{state.NamedGroups[`config`]}, state))
1696
1697 langMatch := regexp.MustCompile(`:lang\W+(\w+)`).FindStringSubmatch(state.NamedGroups[`config`])
1698 var lang string
1699 if len(langMatch) > 1 {
1700 lang = langMatch[1]
1701 }
1702
1703
1704 sublexer := Get(lang)
1705 if sublexer != nil {
1706 iterator, err := sublexer.Tokenise(nil, state.NamedGroups[`value`])
1707
1708 if err != nil {
1709 panic(err)
1710 } else {
1711 iterators = append(iterators, iterator)
1712 }
1713 } else {
1714 iterators = append(iterators, Literator(tokens[4]))
1715 }
1716
1717
1718 iterators = append(iterators, Literator(tokens[5:]...))
1719
1720 return Concaterator(iterators...)
1721 }
1722
View as plain text