...

Source file src/github.com/alecthomas/chroma/lexers/r/raku.go

Documentation: github.com/alecthomas/chroma/lexers/r

     1  package r
     2  
     3  import (
     4  	"regexp"
     5  	"strings"
     6  	"unicode/utf8"
     7  
     8  	. "github.com/alecthomas/chroma" // nolint
     9  	"github.com/alecthomas/chroma/lexers/internal"
    10  	"github.com/dlclark/regexp2"
    11  )
    12  
    13  // Raku lexer.
    14  var Raku Lexer = internal.Register(MustNewLazyLexer(
    15  	&Config{
    16  		Name:    "Raku",
    17  		Aliases: []string{"perl6", "pl6", "raku"},
    18  		Filenames: []string{
    19  			"*.pl", "*.pm", "*.nqp", "*.p6", "*.6pl", "*.p6l", "*.pl6", "*.6pm",
    20  			"*.p6m", "*.pm6", "*.t", "*.raku", "*.rakumod", "*.rakutest", "*.rakudoc",
    21  		},
    22  		MimeTypes: []string{
    23  			"text/x-perl6", "application/x-perl6",
    24  			"text/x-raku", "application/x-raku",
    25  		},
    26  		DotAll: true,
    27  	},
    28  	rakuRules,
    29  ))
    30  
    31  func rakuRules() Rules {
    32  	type RakuToken int
    33  
    34  	const (
    35  		rakuQuote RakuToken = iota
    36  		rakuNameAttribute
    37  		rakuPod
    38  		rakuPodFormatter
    39  		rakuPodDeclaration
    40  		rakuMultilineComment
    41  		rakuMatchRegex
    42  		rakuSubstitutionRegex
    43  	)
    44  
    45  	const (
    46  		colonPairOpeningBrackets = `(?:<<|<|«|\(|\[|\{)`
    47  		colonPairClosingBrackets = `(?:>>|>|»|\)|\]|\})`
    48  		colonPairPattern         = `(?<!:)(?<colon>:)(?<key>\w[\w'-]*)(?<opening_delimiters>` + colonPairOpeningBrackets + `)`
    49  		colonPairLookahead       = `(?=(:['\w-]+` +
    50  			colonPairOpeningBrackets + `.+?` + colonPairClosingBrackets + `)?`
    51  		namePattern           = `(?:(?!` + colonPairPattern + `)(?:::|[\w':-]))+`
    52  		variablePattern       = `[$@%&]+[.^:?=!~]?` + namePattern
    53  		globalVariablePattern = `[$@%&]+\*` + namePattern
    54  	)
    55  
    56  	keywords := []string{
    57  		`BEGIN`, `CATCH`, `CHECK`, `CLOSE`, `CONTROL`, `DOC`, `END`, `ENTER`, `FIRST`, `INIT`,
    58  		`KEEP`, `LAST`, `LEAVE`, `NEXT`, `POST`, `PRE`, `QUIT`, `UNDO`, `anon`, `augment`, `but`,
    59  		`class`, `constant`, `default`, `does`, `else`, `elsif`, `enum`, `for`, `gather`, `given`,
    60  		`grammar`, `has`, `if`, `import`, `is`, `of`, `let`, `loop`, `made`, `make`, `method`,
    61  		`module`, `multi`, `my`, `need`, `orwith`, `our`, `proceed`, `proto`, `repeat`, `require`,
    62  		`where`, `return`, `return-rw`, `returns`, `->`, `-->`, `role`, `state`, `sub`, `no`,
    63  		`submethod`, `subset`, `succeed`, `supersede`, `try`, `unit`, `unless`, `until`,
    64  		`use`, `when`, `while`, `with`, `without`, `export`, `native`, `repr`, `required`, `rw`,
    65  		`symbol`, `default`, `cached`, `DEPRECATED`, `dynamic`, `hidden-from-backtrace`, `nodal`,
    66  		`pure`, `raw`, `start`, `react`, `supply`, `whenever`, `also`, `rule`, `token`, `regex`,
    67  		`dynamic-scope`, `built`, `temp`,
    68  	}
    69  
    70  	keywordsPattern := Words(`(?<!['\w:-])`, `(?!['\w:-])`, keywords...)
    71  
    72  	wordOperators := []string{
    73  		`X`, `Z`, `R`, `after`, `and`, `andthen`, `before`, `cmp`, `div`, `eq`, `eqv`, `extra`, `ge`,
    74  		`gt`, `le`, `leg`, `lt`, `mod`, `ne`, `or`, `orelse`, `x`, `xor`, `xx`, `gcd`, `lcm`,
    75  		`but`, `min`, `max`, `^fff`, `fff^`, `fff`, `^ff`, `ff^`, `ff`, `so`, `not`, `unicmp`,
    76  		`TR`, `o`, `(&)`, `(.)`, `(|)`, `(+)`, `(-)`, `(^)`, `coll`, `(elem)`, `(==)`,
    77  		`(cont)`, `(<)`, `(<=)`, `(>)`, `(>=)`, `minmax`, `notandthen`, `S`,
    78  	}
    79  
    80  	wordOperatorsPattern := Words(`(?<=^|\b|\s)`, `(?=$|\b|\s)`, wordOperators...)
    81  
    82  	operators := []string{
    83  		`++`, `--`, `-`, `**`, `!`, `+`, `~`, `?`, `+^`, `~^`, `?^`, `^`, `*`, `/`, `%`, `%%`, `+&`,
    84  		`+<`, `+>`, `~&`, `~<`, `~>`, `?&`, `+|`, `+^`, `~|`, `~^`, `?`, `?|`, `?^`, `&`, `^`,
    85  		`<=>`, `^…^`, `^…`, `…^`, `…`, `...`, `...^`, `^...`, `^...^`, `..`, `..^`, `^..`, `^..^`,
    86  		`::=`, `:=`, `!=`, `==`, `<=`, `<`, `>=`, `>`, `~~`, `===`, `&&`, `||`, `|`, `^^`, `//`,
    87  		`??`, `!!`, `^fff^`, `^ff^`, `<==`, `==>`, `<<==`, `==>>`, `=>`, `=`, `<<`, `«`, `>>`, `»`,
    88  		`,`, `>>.`, `».`, `.&`, `.=`, `.^`, `.?`, `.+`, `.*`, `.`, `∘`, `∩`, `⊍`, `∪`, `⊎`, `∖`,
    89  		`⊖`, `≠`, `≤`, `≥`, `=:=`, `=~=`, `≅`, `∈`, `∉`, `≡`, `≢`, `∋`, `∌`, `⊂`, `⊄`, `⊆`, `⊈`,
    90  		`⊃`, `⊅`, `⊇`, `⊉`, `:`, `!!!`, `???`, `¯`, `×`, `÷`, `−`, `⁺`, `⁻`,
    91  	}
    92  
    93  	operatorsPattern := Words(``, ``, operators...)
    94  
    95  	builtinTypes := []string{
    96  		`False`, `True`, `Order`, `More`, `Less`, `Same`, `Any`, `Array`, `Associative`, `AST`,
    97  		`atomicint`, `Attribute`, `Backtrace`, `Backtrace::Frame`, `Bag`, `Baggy`, `BagHash`,
    98  		`Blob`, `Block`, `Bool`, `Buf`, `Callable`, `CallFrame`, `Cancellation`, `Capture`,
    99  		`CArray`, `Channel`, `Code`, `compiler`, `Complex`, `ComplexStr`, `CompUnit`,
   100  		`CompUnit::PrecompilationRepository`, `CompUnit::Repository`, `Empty`,
   101  		`CompUnit::Repository::FileSystem`, `CompUnit::Repository::Installation`, `Cool`,
   102  		`CurrentThreadScheduler`, `CX::Warn`, `CX::Take`, `CX::Succeed`, `CX::Return`, `CX::Redo`,
   103  		`CX::Proceed`, `CX::Next`, `CX::Last`, `CX::Emit`, `CX::Done`, `Cursor`, `Date`, `Dateish`,
   104  		`DateTime`, `Distribution`, `Distribution::Hash`, `Distribution::Locally`,
   105  		`Distribution::Path`, `Distribution::Resource`, `Distro`, `Duration`, `Encoding`,
   106  		`Encoding::Registry`, `Endian`, `Enumeration`, `Exception`, `Failure`, `FatRat`, `Grammar`,
   107  		`Hash`, `HyperWhatever`, `Instant`, `Int`, `int`, `int16`, `int32`, `int64`, `int8`, `str`,
   108  		`IntStr`, `IO`, `IO::ArgFiles`, `IO::CatHandle`, `IO::Handle`, `IO::Notification`,
   109  		`IO::Notification::Change`, `IO::Path`, `IO::Path::Cygwin`, `IO::Path::Parts`,
   110  		`IO::Path::QNX`, `IO::Path::Unix`, `IO::Path::Win32`, `IO::Pipe`, `IO::Socket`,
   111  		`IO::Socket::Async`, `IO::Socket::Async::ListenSocket`, `IO::Socket::INET`, `IO::Spec`,
   112  		`IO::Spec::Cygwin`, `IO::Spec::QNX`, `IO::Spec::Unix`, `IO::Spec::Win32`, `IO::Special`,
   113  		`Iterable`, `Iterator`, `Junction`, `Kernel`, `Label`, `List`, `Lock`, `Lock::Async`,
   114  		`Lock::ConditionVariable`, `long`, `longlong`, `Macro`, `Map`, `Match`,
   115  		`Metamodel::AttributeContainer`, `Metamodel::C3MRO`, `Metamodel::ClassHOW`,
   116  		`Metamodel::ConcreteRoleHOW`, `Metamodel::CurriedRoleHOW`, `Metamodel::DefiniteHOW`,
   117  		`Metamodel::Documenting`, `Metamodel::EnumHOW`, `Metamodel::Finalization`,
   118  		`Metamodel::MethodContainer`, `Metamodel::Mixins`, `Metamodel::MROBasedMethodDispatch`,
   119  		`Metamodel::MultipleInheritance`, `Metamodel::Naming`, `Metamodel::Primitives`,
   120  		`Metamodel::PrivateMethodContainer`, `Metamodel::RoleContainer`, `Metamodel::RolePunning`,
   121  		`Metamodel::Stashing`, `Metamodel::Trusting`, `Metamodel::Versioning`, `Method`, `Mix`,
   122  		`MixHash`, `Mixy`, `Mu`, `NFC`, `NFD`, `NFKC`, `NFKD`, `Nil`, `Num`, `num32`, `num64`,
   123  		`Numeric`, `NumStr`, `ObjAt`, `Order`, `Pair`, `Parameter`, `Perl`, `Pod::Block`,
   124  		`Pod::Block::Code`, `Pod::Block::Comment`, `Pod::Block::Declarator`, `Pod::Block::Named`,
   125  		`Pod::Block::Para`, `Pod::Block::Table`, `Pod::Heading`, `Pod::Item`, `Pointer`,
   126  		`Positional`, `PositionalBindFailover`, `Proc`, `Proc::Async`, `Promise`, `Proxy`,
   127  		`PseudoStash`, `QuantHash`, `RaceSeq`, `Raku`, `Range`, `Rat`, `Rational`, `RatStr`,
   128  		`Real`, `Regex`, `Routine`, `Routine::WrapHandle`, `Scalar`, `Scheduler`, `Semaphore`,
   129  		`Seq`, `Sequence`, `Set`, `SetHash`, `Setty`, `Signature`, `size_t`, `Slip`, `Stash`,
   130  		`Str`, `StrDistance`, `Stringy`, `Sub`, `Submethod`, `Supplier`, `Supplier::Preserving`,
   131  		`Supply`, `Systemic`, `Tap`, `Telemetry`, `Telemetry::Instrument::Thread`,
   132  		`Telemetry::Instrument::ThreadPool`, `Telemetry::Instrument::Usage`, `Telemetry::Period`,
   133  		`Telemetry::Sampler`, `Thread`, `Test`, `ThreadPoolScheduler`, `UInt`, `uint16`, `uint32`,
   134  		`uint64`, `uint8`, `Uni`, `utf8`, `ValueObjAt`, `Variable`, `Version`, `VM`, `Whatever`,
   135  		`WhateverCode`, `WrapHandle`, `NativeCall`,
   136  		// Pragmas
   137  		`precompilation`, `experimental`, `worries`, `MONKEY-TYPING`, `MONKEY-SEE-NO-EVAL`,
   138  		`MONKEY-GUTS`, `fatal`, `lib`, `isms`, `newline`, `nqp`, `soft`,
   139  		`strict`, `trace`, `variables`,
   140  	}
   141  
   142  	builtinTypesPattern := Words(`(?<!['\w:-])`, `(?::[_UD])?(?!['\w:-])`, builtinTypes...)
   143  
   144  	builtinRoutines := []string{
   145  		`ACCEPTS`, `abs`, `abs2rel`, `absolute`, `accept`, `accepts_type`, `accessed`, `acos`,
   146  		`acosec`, `acosech`, `acosh`, `acotan`, `acotanh`, `acquire`, `act`, `action`, `actions`,
   147  		`add`, `add_attribute`, `add_enum_value`, `add_fallback`, `add_method`, `add_parent`,
   148  		`add_private_method`, `add_role`, `add_stash`, `add_trustee`, `addendum`, `adverb`, `after`,
   149  		`all`, `allocate`, `allof`, `allowed`, `alternative-names`, `annotations`, `antipair`,
   150  		`antipairs`, `any`, `anyof`, `api`, `app_lifetime`, `append`, `arch`, `archetypes`,
   151  		`archname`, `args`, `ARGS-TO-CAPTURE`, `arity`, `Array`, `asec`, `asech`, `asin`, `asinh`,
   152  		`ASSIGN-KEY`, `ASSIGN-POS`, `assuming`, `ast`, `at`, `atan`, `atan2`, `atanh`, `AT-KEY`,
   153  		`atomic-assign`, `atomic-dec-fetch`, `atomic-fetch`, `atomic-fetch-add`, `atomic-fetch-dec`,
   154  		`atomic-fetch-inc`, `atomic-fetch-sub`, `atomic-inc-fetch`, `AT-POS`, `attributes`, `auth`,
   155  		`await`, `backend`, `backtrace`, `Bag`, `bag`, `Baggy`, `BagHash`, `bail-out`, `base`,
   156  		`basename`, `base-repeating`, `base_type`, `batch`, `BIND-KEY`, `BIND-POS`, `bind-stderr`,
   157  		`bind-stdin`, `bind-stdout`, `bind-udp`, `bits`, `bless`, `block`, `Bool`, `bool-only`,
   158  		`bounds`, `break`, `Bridge`, `broken`, `BUILD`, `TWEAK`, `build-date`, `bytes`, `cache`,
   159  		`callframe`, `calling-package`, `CALL-ME`, `callsame`, `callwith`, `can`, `cancel`,
   160  		`candidates`, `cando`, `can-ok`, `canonpath`, `caps`, `caption`, `Capture`, `capture`,
   161  		`cas`, `catdir`, `categorize`, `categorize-list`, `catfile`, `catpath`, `cause`, `ceiling`,
   162  		`cglobal`, `changed`, `Channel`, `channel`, `chars`, `chdir`, `child`, `child-name`,
   163  		`child-typename`, `chmod`, `chomp`, `chop`, `chr`, `chrs`, `chunks`, `cis`, `classify`,
   164  		`classify-list`, `cleanup`, `clone`, `close`, `closed`, `close-stdin`, `cmp-ok`, `code`,
   165  		`codename`, `codes`, `coerce_type`, `coll`, `collate`, `column`, `comb`, `combinations`,
   166  		`command`, `comment`, `compiler`, `Complex`, `compose`, `composalizer`, `compose_type`,
   167  		`compose_values`, `composer`, `compute_mro`, `condition`, `config`, `configure_destroy`,
   168  		`configure_type_checking`, `conj`, `connect`, `constraints`, `construct`, `contains`,
   169  		`content`, `contents`, `copy`, `cos`, `cosec`, `cosech`, `cosh`, `cotan`, `cotanh`, `count`,
   170  		`count-only`, `cpu-cores`, `cpu-usage`, `CREATE`, `create_type`, `cross`, `cue`, `curdir`,
   171  		`curupdir`, `d`, `Date`, `DateTime`, `day`, `daycount`, `day-of-month`, `day-of-week`,
   172  		`day-of-year`, `days-in-month`, `dd-mm-yyyy`, `declaration`, `decode`, `decoder`, `deepmap`,
   173  		`default`, `defined`, `DEFINITE`, `definite`, `delayed`, `delete`, `delete-by-compiler`,
   174  		`DELETE-KEY`, `DELETE-POS`, `denominator`, `desc`, `DESTROY`, `destroyers`, `devnull`,
   175  		`diag`, `did-you-mean`, `die`, `dies-ok`, `dir`, `dirname`, `distribution`, `dir-sep`,
   176  		`DISTROnames`, `do`, `does`, `does-ok`, `done`, `done-testing`, `duckmap`, `dynamic`, `e`,
   177  		`eager`, `earlier`, `elems`, `emit`, `enclosing`, `encode`, `encoder`, `encoding`, `end`,
   178  		`endian`, `ends-with`, `enum_from_value`, `enum_value_list`, `enum_values`, `enums`, `EOF`,
   179  		`eof`, `EVAL`, `eval-dies-ok`, `EVALFILE`, `eval-lives-ok`, `event`, `exception`,
   180  		`excludes-max`, `excludes-min`, `EXISTS-KEY`, `EXISTS-POS`, `exit`, `exitcode`, `exp`,
   181  		`expected`, `explicitly-manage`, `expmod`, `export_callback`, `extension`, `f`, `fail`,
   182  		`FALLBACK`, `fails-like`, `fc`, `feature`, `file`, `filename`, `files`, `find`,
   183  		`find_method`, `find_method_qualified`, `finish`, `first`, `flat`, `first-date-in-month`,
   184  		`flatmap`, `flip`, `floor`, `flunk`, `flush`, `flush_cache`, `fmt`, `format`, `formatter`,
   185  		`free-memory`, `freeze`, `from`, `from-list`, `from-loop`, `from-posix`, `from-slurpy`,
   186  		`full`, `full-barrier`, `GENERATE-USAGE`, `generate_mixin`, `get`, `get_value`, `getc`,
   187  		`gist`, `got`, `grab`, `grabpairs`, `grep`, `handle`, `handled`, `handles`, `hardware`,
   188  		`has_accessor`, `Hash`, `hash`, `head`, `headers`, `hh-mm-ss`, `hidden`, `hides`, `hostname`,
   189  		`hour`, `how`, `hyper`, `id`, `illegal`, `im`, `in`, `in-timezone`, `indent`, `index`,
   190  		`indices`, `indir`, `infinite`, `infix`, `postcirumfix`, `cicumfix`, `install`,
   191  		`install_method_cache`, `Instant`, `instead`, `Int`, `int-bounds`, `interval`, `in-timezone`,
   192  		`invalid-str`, `invert`, `invocant`, `IO`, `IO::Notification.watch-path`, `is_trusted`,
   193  		`is_type`, `isa`, `is-absolute`, `isa-ok`, `is-approx`, `is-deeply`, `is-hidden`,
   194  		`is-initial-thread`, `is-int`, `is-lazy`, `is-leap-year`, `isNaN`, `isnt`, `is-prime`,
   195  		`is-relative`, `is-routine`, `is-setting`, `is-win`, `item`, `iterator`, `join`, `keep`,
   196  		`kept`, `KERNELnames`, `key`, `keyof`, `keys`, `kill`, `kv`, `kxxv`, `l`, `lang`, `last`,
   197  		`lastcall`, `later`, `lazy`, `lc`, `leading`, `level`, `like`, `line`, `lines`, `link`,
   198  		`List`, `list`, `listen`, `live`, `lives-ok`, `load`, `load-repo-id`, `load-unit`, `loaded`,
   199  		`loads`, `local`, `lock`, `log`, `log10`, `lookup`, `lsb`, `made`, `MAIN`, `make`, `Map`,
   200  		`map`, `match`, `max`, `maxpairs`, `merge`, `message`, `method`, `meta`, `method_table`,
   201  		`methods`, `migrate`, `min`, `minmax`, `minpairs`, `minute`, `misplaced`, `Mix`, `mix`,
   202  		`MixHash`, `mixin`, `mixin_attribute`, `Mixy`, `mkdir`, `mode`, `modified`, `month`, `move`,
   203  		`mro`, `msb`, `multi`, `multiness`, `name`, `named`, `named_names`, `narrow`,
   204  		`nativecast`, `native-descriptor`, `nativesizeof`, `need`, `new`, `new_type`,
   205  		`new-from-daycount`, `new-from-pairs`, `next`, `nextcallee`, `next-handle`, `nextsame`,
   206  		`nextwith`, `next-interesting-index`, `NFC`, `NFD`, `NFKC`, `NFKD`, `nice`, `nl-in`,
   207  		`nl-out`, `nodemap`, `nok`, `normalize`, `none`, `norm`, `not`, `note`, `now`, `nude`,
   208  		`Num`, `numerator`, `Numeric`, `of`, `offset`, `offset-in-hours`, `offset-in-minutes`,
   209  		`ok`, `old`, `on-close`, `one`, `on-switch`, `open`, `opened`, `operation`, `optional`,
   210  		`ord`, `ords`, `orig`, `os-error`, `osname`, `out-buffer`, `pack`, `package`, `package-kind`,
   211  		`package-name`, `packages`, `Pair`, `pair`, `pairs`, `pairup`, `parameter`, `params`,
   212  		`parent`, `parent-name`, `parents`, `parse`, `parse-base`, `parsefile`, `parse-names`,
   213  		`parts`, `pass`, `path`, `path-sep`, `payload`, `peer-host`, `peer-port`, `periods`, `perl`,
   214  		`permutations`, `phaser`, `pick`, `pickpairs`, `pid`, `placeholder`, `plan`, `plus`,
   215  		`polar`, `poll`, `polymod`, `pop`, `pos`, `positional`, `posix`, `postfix`, `postmatch`,
   216  		`precomp-ext`, `precomp-target`, `precompiled`, `pred`, `prefix`, `prematch`, `prepend`,
   217  		`primary`, `print`, `printf`, `print-nl`, `print-to`, `private`, `private_method_names`,
   218  		`private_method_table`, `proc`, `produce`, `Promise`, `promise`, `prompt`, `protect`,
   219  		`protect-or-queue-on-recursion`, `publish_method_cache`, `pull-one`, `push`, `push-all`,
   220  		`push-at-least`, `push-exactly`, `push-until-lazy`, `put`, `qualifier-type`, `quaternary`,
   221  		`quit`, `r`, `race`, `radix`, `raku`, `rand`, `Range`, `range`, `Rat`, `raw`, `re`, `read`,
   222  		`read-bits`, `read-int128`, `read-int16`, `read-int32`, `read-int64`, `read-int8`,
   223  		`read-num32`, `read-num64`, `read-ubits`, `read-uint128`, `read-uint16`, `read-uint32`,
   224  		`read-uint64`, `read-uint8`, `readchars`, `readonly`, `ready`, `Real`, `reallocate`,
   225  		`reals`, `reason`, `rebless`, `receive`, `recv`, `redispatcher`, `redo`, `reduce`,
   226  		`rel2abs`, `relative`, `release`, `remove`, `rename`, `repeated`, `replacement`,
   227  		`replace-with`, `repo`, `repo-id`, `report`, `required`, `reserved`, `resolve`, `restore`,
   228  		`result`, `resume`, `rethrow`, `return`, `return-rw`, `returns`, `reverse`, `right`,
   229  		`rindex`, `rmdir`, `role`, `roles_to_compose`, `rolish`, `roll`, `rootdir`, `roots`,
   230  		`rotate`, `rotor`, `round`, `roundrobin`, `routine-type`, `run`, `RUN-MAIN`, `rw`, `rwx`,
   231  		`samecase`, `samemark`, `samewith`, `say`, `schedule-on`, `scheduler`, `scope`, `sec`,
   232  		`sech`, `second`, `secondary`, `seek`, `self`, `send`, `Seq`, `Set`, `set`, `serial`,
   233  		`set_hidden`, `set_name`, `set_package`, `set_rw`, `set_value`, `set_api`, `set_auth`,
   234  		`set_composalizer`, `set_export_callback`, `set_is_mixin`, `set_mixin_attribute`,
   235  		`set_package`, `set_ver`, `set_why`, `SetHash`, `Setty`, `set-instruments`,
   236  		`setup_finalization`, `setup_mixin_cache`, `shape`, `share`, `shell`, `short-id`,
   237  		`short-name`, `shortname`, `shift`, `sibling`, `sigil`, `sign`, `signal`, `signals`,
   238  		`signature`, `sin`, `sinh`, `sink`, `sink-all`, `skip`, `skip-at-least`,
   239  		`skip-at-least-pull-one`, `skip-one`, `skip-rest`, `sleep`, `sleep-timer`, `sleep-until`,
   240  		`Slip`, `slip`, `slurp`, `slurp-rest`, `slurpy`, `snap`, `snapper`, `so`, `socket-host`,
   241  		`socket-port`, `sort`, `source`, `source-package`, `spawn`, `SPEC`, `splice`, `split`,
   242  		`splitdir`, `splitpath`, `sprintf`, `spurt`, `sqrt`, `squish`, `srand`, `stable`, `start`,
   243  		`started`, `starts-with`, `status`, `stderr`, `stdout`, `STORE`, `store-file`,
   244  		`store-repo-id`, `store-unit`, `Str`, `Stringy`, `sub_signature`, `subbuf`, `subbuf-rw`,
   245  		`subname`, `subparse`, `subst`, `subst-mutate`, `substr`, `substr-eq`, `substr-rw`,
   246  		`subtest`, `succ`, `sum`, `suffix`, `summary`, `Supply`, `symlink`, `T`, `t`, `tail`,
   247  		`take`, `take-rw`, `tan`, `tanh`, `tap`, `target`, `target-name`, `tc`, `tclc`, `tell`,
   248  		`term`, `tertiary`, `then`, `throttle`, `throw`, `throws-like`, `time`, `timezone`,
   249  		`tmpdir`, `to`, `today`, `todo`, `toggle`, `to-posix`, `total`, `total-memory`, `trailing`,
   250  		`trans`, `tree`, `trim`, `trim-leading`, `trim-trailing`, `truncate`, `truncated-to`,
   251  		`trusts`, `try_acquire`, `trying`, `twigil`, `type`, `type_captures`, `type_check`,
   252  		`typename`, `uc`, `udp`, `uncaught_handler`, `undefine`, `unimatch`, `unicmp`, `uniname`,
   253  		`uninames`, `uninstall`, `uniparse`, `uniprop`, `uniprops`, `unique`, `unival`, `univals`,
   254  		`unlike`, `unlink`, `unlock`, `unpack`, `unpolar`, `unset`, `unshift`, `unwrap`, `updir`,
   255  		`USAGE`, `usage-name`, `use-ok`, `utc`, `val`, `value`, `values`, `VAR`, `variable`, `ver`,
   256  		`verbose-config`, `Version`, `version`, `VMnames`, `volume`, `vow`, `w`, `wait`, `warn`,
   257  		`watch`, `watch-path`, `week`, `weekday-of-month`, `week-number`, `week-year`, `WHAT`,
   258  		`what`, `when`, `WHERE`, `WHEREFORE`, `WHICH`, `WHO`, `whole-second`, `WHY`, `why`,
   259  		`with-lock-hidden-from-recursion-check`, `wordcase`, `words`, `workaround`, `wrap`,
   260  		`write`, `write-bits`, `write-int128`, `write-int16`, `write-int32`, `write-int64`,
   261  		`write-int8`, `write-num32`, `write-num64`, `write-ubits`, `write-uint128`, `write-uint16`,
   262  		`write-uint32`, `write-uint64`, `write-uint8`, `write-to`, `x`, `yada`, `year`, `yield`,
   263  		`yyyy-mm-dd`, `z`, `zip`, `zip-latest`, `HOW`, `s`, `DEPRECATED`, `trait_mod`,
   264  	}
   265  
   266  	builtinRoutinesPattern := Words(`(?<!['\w:-])`, `(?!['\w-])`, builtinRoutines...)
   267  
   268  	// A map of opening and closing brackets
   269  	brackets := map[rune]rune{
   270  		'\u0028': '\u0029', '\u003c': '\u003e', '\u005b': '\u005d',
   271  		'\u007b': '\u007d', '\u00ab': '\u00bb', '\u0f3a': '\u0f3b',
   272  		'\u0f3c': '\u0f3d', '\u169b': '\u169c', '\u2018': '\u2019',
   273  		'\u201a': '\u2019', '\u201b': '\u2019', '\u201c': '\u201d',
   274  		'\u201e': '\u201d', '\u201f': '\u201d', '\u2039': '\u203a',
   275  		'\u2045': '\u2046', '\u207d': '\u207e', '\u208d': '\u208e',
   276  		'\u2208': '\u220b', '\u2209': '\u220c', '\u220a': '\u220d',
   277  		'\u2215': '\u29f5', '\u223c': '\u223d', '\u2243': '\u22cd',
   278  		'\u2252': '\u2253', '\u2254': '\u2255', '\u2264': '\u2265',
   279  		'\u2266': '\u2267', '\u2268': '\u2269', '\u226a': '\u226b',
   280  		'\u226e': '\u226f', '\u2270': '\u2271', '\u2272': '\u2273',
   281  		'\u2274': '\u2275', '\u2276': '\u2277', '\u2278': '\u2279',
   282  		'\u227a': '\u227b', '\u227c': '\u227d', '\u227e': '\u227f',
   283  		'\u2280': '\u2281', '\u2282': '\u2283', '\u2284': '\u2285',
   284  		'\u2286': '\u2287', '\u2288': '\u2289', '\u228a': '\u228b',
   285  		'\u228f': '\u2290', '\u2291': '\u2292', '\u2298': '\u29b8',
   286  		'\u22a2': '\u22a3', '\u22a6': '\u2ade', '\u22a8': '\u2ae4',
   287  		'\u22a9': '\u2ae3', '\u22ab': '\u2ae5', '\u22b0': '\u22b1',
   288  		'\u22b2': '\u22b3', '\u22b4': '\u22b5', '\u22b6': '\u22b7',
   289  		'\u22c9': '\u22ca', '\u22cb': '\u22cc', '\u22d0': '\u22d1',
   290  		'\u22d6': '\u22d7', '\u22d8': '\u22d9', '\u22da': '\u22db',
   291  		'\u22dc': '\u22dd', '\u22de': '\u22df', '\u22e0': '\u22e1',
   292  		'\u22e2': '\u22e3', '\u22e4': '\u22e5', '\u22e6': '\u22e7',
   293  		'\u22e8': '\u22e9', '\u22ea': '\u22eb', '\u22ec': '\u22ed',
   294  		'\u22f0': '\u22f1', '\u22f2': '\u22fa', '\u22f3': '\u22fb',
   295  		'\u22f4': '\u22fc', '\u22f6': '\u22fd', '\u22f7': '\u22fe',
   296  		'\u2308': '\u2309', '\u230a': '\u230b', '\u2329': '\u232a',
   297  		'\u23b4': '\u23b5', '\u2768': '\u2769', '\u276a': '\u276b',
   298  		'\u276c': '\u276d', '\u276e': '\u276f', '\u2770': '\u2771',
   299  		'\u2772': '\u2773', '\u2774': '\u2775', '\u27c3': '\u27c4',
   300  		'\u27c5': '\u27c6', '\u27d5': '\u27d6', '\u27dd': '\u27de',
   301  		'\u27e2': '\u27e3', '\u27e4': '\u27e5', '\u27e6': '\u27e7',
   302  		'\u27e8': '\u27e9', '\u27ea': '\u27eb', '\u2983': '\u2984',
   303  		'\u2985': '\u2986', '\u2987': '\u2988', '\u2989': '\u298a',
   304  		'\u298b': '\u298c', '\u298d': '\u298e', '\u298f': '\u2990',
   305  		'\u2991': '\u2992', '\u2993': '\u2994', '\u2995': '\u2996',
   306  		'\u2997': '\u2998', '\u29c0': '\u29c1', '\u29c4': '\u29c5',
   307  		'\u29cf': '\u29d0', '\u29d1': '\u29d2', '\u29d4': '\u29d5',
   308  		'\u29d8': '\u29d9', '\u29da': '\u29db', '\u29f8': '\u29f9',
   309  		'\u29fc': '\u29fd', '\u2a2b': '\u2a2c', '\u2a2d': '\u2a2e',
   310  		'\u2a34': '\u2a35', '\u2a3c': '\u2a3d', '\u2a64': '\u2a65',
   311  		'\u2a79': '\u2a7a', '\u2a7d': '\u2a7e', '\u2a7f': '\u2a80',
   312  		'\u2a81': '\u2a82', '\u2a83': '\u2a84', '\u2a8b': '\u2a8c',
   313  		'\u2a91': '\u2a92', '\u2a93': '\u2a94', '\u2a95': '\u2a96',
   314  		'\u2a97': '\u2a98', '\u2a99': '\u2a9a', '\u2a9b': '\u2a9c',
   315  		'\u2aa1': '\u2aa2', '\u2aa6': '\u2aa7', '\u2aa8': '\u2aa9',
   316  		'\u2aaa': '\u2aab', '\u2aac': '\u2aad', '\u2aaf': '\u2ab0',
   317  		'\u2ab3': '\u2ab4', '\u2abb': '\u2abc', '\u2abd': '\u2abe',
   318  		'\u2abf': '\u2ac0', '\u2ac1': '\u2ac2', '\u2ac3': '\u2ac4',
   319  		'\u2ac5': '\u2ac6', '\u2acd': '\u2ace', '\u2acf': '\u2ad0',
   320  		'\u2ad1': '\u2ad2', '\u2ad3': '\u2ad4', '\u2ad5': '\u2ad6',
   321  		'\u2aec': '\u2aed', '\u2af7': '\u2af8', '\u2af9': '\u2afa',
   322  		'\u2e02': '\u2e03', '\u2e04': '\u2e05', '\u2e09': '\u2e0a',
   323  		'\u2e0c': '\u2e0d', '\u2e1c': '\u2e1d', '\u2e20': '\u2e21',
   324  		'\u3008': '\u3009', '\u300a': '\u300b', '\u300c': '\u300d',
   325  		'\u300e': '\u300f', '\u3010': '\u3011', '\u3014': '\u3015',
   326  		'\u3016': '\u3017', '\u3018': '\u3019', '\u301a': '\u301b',
   327  		'\u301d': '\u301e', '\ufd3e': '\ufd3f', '\ufe17': '\ufe18',
   328  		'\ufe35': '\ufe36', '\ufe37': '\ufe38', '\ufe39': '\ufe3a',
   329  		'\ufe3b': '\ufe3c', '\ufe3d': '\ufe3e', '\ufe3f': '\ufe40',
   330  		'\ufe41': '\ufe42', '\ufe43': '\ufe44', '\ufe47': '\ufe48',
   331  		'\ufe59': '\ufe5a', '\ufe5b': '\ufe5c', '\ufe5d': '\ufe5e',
   332  		'\uff08': '\uff09', '\uff1c': '\uff1e', '\uff3b': '\uff3d',
   333  		'\uff5b': '\uff5d', '\uff5f': '\uff60', '\uff62': '\uff63',
   334  	}
   335  
   336  	bracketsPattern := `[` + regexp.QuoteMeta(joinRuneMap(brackets)) + `]`
   337  
   338  	// Finds opening brackets and their closing counterparts (including pod and heredoc)
   339  	// and modifies state groups and position accordingly
   340  	findBrackets := func(tokenClass RakuToken) MutatorFunc {
   341  		return func(state *LexerState) error {
   342  			var openingChars []rune
   343  			var adverbs []rune
   344  			switch tokenClass {
   345  			case rakuPod:
   346  				openingChars = []rune(strings.Join(state.Groups[1:5], ``))
   347  			default:
   348  				adverbs = []rune(state.NamedGroups[`adverbs`])
   349  				openingChars = []rune(state.NamedGroups[`opening_delimiters`])
   350  			}
   351  
   352  			openingChar := openingChars[0]
   353  
   354  			nChars := len(openingChars)
   355  
   356  			var closingChar rune
   357  			var closingCharExists bool
   358  			var closingChars []rune
   359  
   360  			switch tokenClass {
   361  			case rakuPod:
   362  				closingCharExists = true
   363  			default:
   364  				closingChar, closingCharExists = brackets[openingChar]
   365  			}
   366  
   367  			switch tokenClass {
   368  			case rakuPodFormatter:
   369  				formatter := StringOther
   370  
   371  				switch state.NamedGroups[`keyword`] {
   372  				case "B":
   373  					formatter = GenericStrong
   374  				case "I":
   375  					formatter = GenericEmph
   376  				case "U":
   377  					formatter = GenericUnderline
   378  				}
   379  
   380  				formatterRule := ruleReplacingConfig{
   381  					pattern:      `.+?`,
   382  					tokenType:    formatter,
   383  					mutator:      nil,
   384  					stateName:    `pod-formatter`,
   385  					rulePosition: bottomRule,
   386  				}
   387  
   388  				err := replaceRule(formatterRule)(state)
   389  				if err != nil {
   390  					panic(err)
   391  				}
   392  
   393  				err = replaceRule(ruleReplacingConfig{
   394  					delimiter:              []rune{closingChar},
   395  					tokenType:              Punctuation,
   396  					stateName:              `pod-formatter`,
   397  					pushState:              true,
   398  					numberOfDelimiterChars: nChars,
   399  					appendMutator:          popRule(formatterRule),
   400  				})(state)
   401  				if err != nil {
   402  					panic(err)
   403  				}
   404  
   405  				return nil
   406  			case rakuMatchRegex:
   407  				var delimiter []rune
   408  				if closingCharExists {
   409  					delimiter = []rune{closingChar}
   410  				} else {
   411  					delimiter = openingChars
   412  				}
   413  
   414  				err := replaceRule(ruleReplacingConfig{
   415  					delimiter: delimiter,
   416  					tokenType: Punctuation,
   417  					stateName: `regex`,
   418  					popState:  true,
   419  					pushState: true,
   420  				})(state)
   421  				if err != nil {
   422  					panic(err)
   423  				}
   424  
   425  				return nil
   426  			case rakuSubstitutionRegex:
   427  				delimiter := regexp2.Escape(string(openingChars))
   428  
   429  				err := replaceRule(ruleReplacingConfig{
   430  					pattern:      `(` + delimiter + `)` + `((?:\\\\|\\/|.)*?)` + `(` + delimiter + `)`,
   431  					tokenType:    ByGroups(Punctuation, UsingSelf(`qq`), Punctuation),
   432  					rulePosition: topRule,
   433  					stateName:    `regex`,
   434  					popState:     true,
   435  					pushState:    true,
   436  				})(state)
   437  				if err != nil {
   438  					panic(err)
   439  				}
   440  
   441  				return nil
   442  			}
   443  
   444  			text := state.Text
   445  
   446  			var endPos int
   447  
   448  			var nonMirroredOpeningCharPosition int
   449  
   450  			if !closingCharExists {
   451  				// it's not a mirrored character, which means we
   452  				// just need to look for the next occurrence
   453  				closingChars = openingChars
   454  				nonMirroredOpeningCharPosition = indexAt(text, closingChars, state.Pos)
   455  				endPos = nonMirroredOpeningCharPosition
   456  			} else {
   457  				var podRegex *regexp2.Regexp
   458  				if tokenClass == rakuPod {
   459  					podRegex = regexp2.MustCompile(
   460  						state.NamedGroups[`ws`]+`=end`+`\s+`+regexp2.Escape(state.NamedGroups[`name`]),
   461  						0,
   462  					)
   463  				} else {
   464  					closingChars = []rune(strings.Repeat(string(closingChar), nChars))
   465  				}
   466  
   467  				// we need to look for the corresponding closing character,
   468  				// keep nesting in mind
   469  				nestingLevel := 1
   470  
   471  				searchPos := state.Pos - nChars
   472  
   473  				var nextClosePos int
   474  
   475  				for nestingLevel > 0 {
   476  					if tokenClass == rakuPod {
   477  						match, err := podRegex.FindRunesMatchStartingAt(text, searchPos+nChars)
   478  						if err == nil {
   479  							closingChars = match.Runes()
   480  							nextClosePos = match.Index
   481  						} else {
   482  							nextClosePos = -1
   483  						}
   484  					} else {
   485  						nextClosePos = indexAt(text, closingChars, searchPos+nChars)
   486  					}
   487  
   488  					nextOpenPos := indexAt(text, openingChars, searchPos+nChars)
   489  
   490  					switch {
   491  					case nextClosePos == -1:
   492  						nextClosePos = len(text)
   493  						nestingLevel = 0
   494  					case nextOpenPos != -1 && nextOpenPos < nextClosePos:
   495  						nestingLevel++
   496  						nChars = len(openingChars)
   497  						searchPos = nextOpenPos
   498  					default: // next_close_pos < next_open_pos
   499  						nestingLevel--
   500  						nChars = len(closingChars)
   501  						searchPos = nextClosePos
   502  					}
   503  				}
   504  
   505  				endPos = nextClosePos
   506  			}
   507  
   508  			if endPos < 0 {
   509  				// if we didn't find a closer, just highlight the
   510  				// rest of the text in this class
   511  				endPos = len(text)
   512  			}
   513  
   514  			adverbre := regexp.MustCompile(`:to\b|:heredoc\b`)
   515  			var heredocTerminator []rune
   516  			var endHeredocPos int
   517  			if adverbre.MatchString(string(adverbs)) {
   518  				if endPos != len(text) {
   519  					heredocTerminator = text[state.Pos:endPos]
   520  					nChars = len(heredocTerminator)
   521  				} else {
   522  					endPos = state.Pos + 1
   523  					heredocTerminator = []rune{}
   524  					nChars = 0
   525  				}
   526  
   527  				if nChars > 0 {
   528  					endHeredocPos = indexAt(text[endPos:], heredocTerminator, 0)
   529  					if endHeredocPos > -1 {
   530  						endPos += endHeredocPos
   531  					} else {
   532  						endPos = len(text)
   533  					}
   534  				}
   535  			}
   536  
   537  			textBetweenBrackets := string(text[state.Pos:endPos])
   538  			switch tokenClass {
   539  			case rakuPod, rakuPodDeclaration, rakuNameAttribute:
   540  				state.NamedGroups[`value`] = textBetweenBrackets
   541  				state.NamedGroups[`closing_delimiters`] = string(closingChars)
   542  			case rakuQuote:
   543  				if len(heredocTerminator) > 0 {
   544  					// Length of heredoc terminator + closing chars + `;`
   545  					heredocFristPunctuationLen := nChars + len(openingChars) + 1
   546  
   547  					state.NamedGroups[`opening_delimiters`] = string(openingChars) +
   548  						string(text[state.Pos:state.Pos+heredocFristPunctuationLen])
   549  
   550  					state.NamedGroups[`value`] =
   551  						string(text[state.Pos+heredocFristPunctuationLen : endPos])
   552  
   553  					if endHeredocPos > -1 {
   554  						state.NamedGroups[`closing_delimiters`] = string(heredocTerminator)
   555  					}
   556  				} else {
   557  					state.NamedGroups[`value`] = textBetweenBrackets
   558  					if nChars > 0 {
   559  						state.NamedGroups[`closing_delimiters`] = string(closingChars)
   560  					}
   561  				}
   562  			default:
   563  				state.Groups = []string{state.Groups[0] + string(text[state.Pos:endPos+nChars])}
   564  			}
   565  
   566  			state.Pos = endPos + nChars
   567  
   568  			return nil
   569  		}
   570  	}
   571  
   572  	// Raku rules
   573  	// Empty capture groups are placeholders and will be replaced by mutators
   574  	// DO NOT REMOVE THEM!
   575  	return Rules{
   576  		"root": {
   577  			// Placeholder, will be overwritten by mutators, DO NOT REMOVE!
   578  			{`\A\z`, nil, nil},
   579  			Include("common"),
   580  			{`{`, Punctuation, Push(`root`)},
   581  			{`\(`, Punctuation, Push(`root`)},
   582  			{`[)}]`, Punctuation, Pop(1)},
   583  			{`;`, Punctuation, nil},
   584  			{`\[|\]`, Operator, nil},
   585  			{`.+?`, Text, nil},
   586  		},
   587  		"common": {
   588  			{`^#![^\n]*$`, CommentHashbang, nil},
   589  			Include("pod"),
   590  			// Multi-line, Embedded comment
   591  			{
   592  				"#`(?<opening_delimiters>(?<delimiter>" + bracketsPattern + `)\k<delimiter>*)`,
   593  				CommentMultiline,
   594  				findBrackets(rakuMultilineComment),
   595  			},
   596  			{`#[^\n]*$`, CommentSingle, nil},
   597  			// /regex/
   598  			{
   599  				`(?<=(?:^|\(|=|:|~~|\[|{|,|=>)\s*)(/)(?!\]|\))((?:\\\\|\\/|.)*?)((?<!(?<!\\)\\)/(?!'|"))`,
   600  				ByGroups(Punctuation, UsingSelf("regex"), Punctuation),
   601  				nil,
   602  			},
   603  			Include("variable"),
   604  			// ::?VARIABLE
   605  			{`::\?\w+(?::[_UD])?`, NameVariableGlobal, nil},
   606  			// Version
   607  			{
   608  				`\b(v)(\d+)((?:\.(?:\*|[\d\w]+))*)(\+)?`,
   609  				ByGroups(Keyword, NumberInteger, NameEntity, Operator),
   610  				nil,
   611  			},
   612  			Include("number"),
   613  			// Hyperoperator | »*«
   614  			{`(>>)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
   615  			{`(»)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
   616  			// Hyperoperator | «*«
   617  			{`(<<)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
   618  			{`(«)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
   619  			// Hyperoperator | »*»
   620  			{`(>>)(\S+?)(>>)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
   621  			{`(»)(\S+?)(»)`, ByGroups(Operator, UsingSelf("root"), Operator), nil},
   622  			// <<quoted words>>
   623  			{`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(<<)(?!(?:(?!>>)[^\n])+?[},;] *\n)(?!(?:(?!>>).)+?>>\S+?>>)`, Punctuation, Push("<<")},
   624  			// «quoted words»
   625  			{`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(«)(?![^»]+?[},;] *\n)(?![^»]+?»\S+?»)`, Punctuation, Push("«")},
   626  			// [<]
   627  			{`(?<=\[\\?)<(?=\])`, Operator, nil},
   628  			// < and > operators | something < onething > something
   629  			{
   630  				`(?<=[$@%&]?\w[\w':-]* +)(<=?)( *[^ ]+? *)(>=?)(?= *[$@%&]?\w[\w':-]*)`,
   631  				ByGroups(Operator, UsingSelf("root"), Operator),
   632  				nil,
   633  			},
   634  			// <quoted words>
   635  			{
   636  				`(?<!(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+\s+|[\])}]\s+)\s*)(<)((?:(?![,;)}] *(?:#[^\n]+)?\n)[^<>])+?)(>)(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?\w[\w':-]*[^(]|\s+\[))`,
   637  				ByGroups(Punctuation, String, Punctuation),
   638  				nil,
   639  			},
   640  			{`C?X::['\w:-]+`, NameException, nil},
   641  			Include("metaoperator"),
   642  			// Pair | key => value
   643  			{
   644  				`(\w[\w'-]*)(\s*)(=>)`,
   645  				ByGroups(String, Text, Operator),
   646  				nil,
   647  			},
   648  			Include("colon-pair"),
   649  			// Token
   650  			{
   651  				`(?<=(?:^|\s)(?:regex|token|rule)(\s+))` + namePattern + colonPairLookahead + `\s*[({])`,
   652  				NameFunction,
   653  				Push("token", "name-adverb"),
   654  			},
   655  			// Substitution
   656  			{`(?<=^|\b|\s)(?<!\.)(ss|S|s|TR|tr)\b(\s*)`, ByGroups(Keyword, Text), Push("substitution")},
   657  			{keywordsPattern, Keyword, nil},
   658  			{builtinTypesPattern, NameBuiltin, nil},
   659  			{builtinRoutinesPattern, NameBuiltin, nil},
   660  			// Class name
   661  			{
   662  				`(?<=(?:^|\s)(?:class|grammar|role|does|but|is|subset|of)\s+)` + namePattern,
   663  				NameClass,
   664  				Push("name-adverb"),
   665  			},
   666  			//  Routine
   667  			{
   668  				`(?<=(?:^|\s)(?:sub|method|multi sub|multi)\s+)!?` + namePattern + colonPairLookahead + `\s*[({])`,
   669  				NameFunction,
   670  				Push("name-adverb"),
   671  			},
   672  			// Constant
   673  			{`(?<=\bconstant\s+)` + namePattern, NameConstant, Push("name-adverb")},
   674  			// Namespace
   675  			{`(?<=\b(?:use|module|package)\s+)` + namePattern, NameNamespace, Push("name-adverb")},
   676  			Include("operator"),
   677  			Include("single-quote"),
   678  			{`(?<!(?<!\\)\\)"`, Punctuation, Push("double-quotes")},
   679  			// m,rx regex
   680  			{`(?<=^|\b|\s)(ms|m|rx)\b(\s*)`, ByGroups(Keyword, Text), Push("rx")},
   681  			// Quote constructs
   682  			{
   683  				`(?<=^|\b|\s)(?<keyword>(?:qq|q|Q))(?<adverbs>(?::?(?:heredoc|to|qq|ww|q|w|s|a|h|f|c|b|to|v|x))*)(?<ws>\s*)(?<opening_delimiters>(?<delimiter>[^0-9a-zA-Z:\s])\k<delimiter>*)`,
   684  				EmitterFunc(quote),
   685  				findBrackets(rakuQuote),
   686  			},
   687  			// Function
   688  			{
   689  				`\b` + namePattern + colonPairLookahead + `\()`,
   690  				NameFunction,
   691  				Push("name-adverb"),
   692  			},
   693  			// Method
   694  			{
   695  				`(?<!\.\.[?^*+]?)(?<=(?:\.[?^*+&]?)|self!)` + namePattern + colonPairLookahead + `\b)`,
   696  				NameFunction,
   697  				Push("name-adverb"),
   698  			},
   699  			// Indirect invocant
   700  			{namePattern + `(?=\s+\W?['\w:-]+:\W)`, NameFunction, Push("name-adverb")},
   701  			{`(?<=\W)(?:∅|i|e|𝑒|tau|τ|pi|π|Inf|∞)(?=\W)`, NameConstant, nil},
   702  			{`(「)([^」]*)(」)`, ByGroups(Punctuation, String, Punctuation), nil},
   703  			{`(?<=^ *)\b` + namePattern + `(?=:\s*(?:for|while|loop))`, NameLabel, nil},
   704  			// Sigilless variable
   705  			{
   706  				`(?<=\b(?:my|our|constant|let|temp)\s+)\\` + namePattern,
   707  				NameVariable,
   708  				Push("name-adverb"),
   709  			},
   710  			{namePattern, Name, Push("name-adverb")},
   711  		},
   712  		"rx": {
   713  			Include("colon-pair-attribute"),
   714  			{
   715  				`(?<opening_delimiters>(?<delimiter>[^\w:\s])\k<delimiter>*)`,
   716  				ByGroupNames(
   717  					map[string]Emitter{
   718  						`opening_delimiters`: Punctuation,
   719  						`delimiter`:          nil,
   720  					},
   721  				),
   722  				findBrackets(rakuMatchRegex),
   723  			},
   724  		},
   725  		"substitution": {
   726  			Include("colon-pair-attribute"),
   727  			// Substitution | s{regex} = value
   728  			{
   729  				`(?<opening_delimiters>(?<delimiter>` + bracketsPattern + `)\k<delimiter>*)`,
   730  				ByGroupNames(map[string]Emitter{
   731  					`opening_delimiters`: Punctuation,
   732  					`delimiter`:          nil,
   733  				}),
   734  				findBrackets(rakuMatchRegex),
   735  			},
   736  			// Substitution | s/regex/string/
   737  			{
   738  				`(?<opening_delimiters>[^\w:\s])`,
   739  				Punctuation,
   740  				findBrackets(rakuSubstitutionRegex),
   741  			},
   742  		},
   743  		"number": {
   744  			{`0_?[0-7]+(_[0-7]+)*`, LiteralNumberOct, nil},
   745  			{`0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*`, LiteralNumberHex, nil},
   746  			{`0b[01]+(_[01]+)*`, LiteralNumberBin, nil},
   747  			{
   748  				`(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?`,
   749  				LiteralNumberFloat,
   750  				nil,
   751  			},
   752  			{`(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*`, LiteralNumberFloat, nil},
   753  			{`(?<=\d+)i`, NameConstant, nil},
   754  			{`\d+(_\d+)*`, LiteralNumberInteger, nil},
   755  		},
   756  		"name-adverb": {
   757  			Include("colon-pair-attribute-keyvalue"),
   758  			Default(Pop(1)),
   759  		},
   760  		"colon-pair": {
   761  			// :key(value)
   762  			{colonPairPattern, colonPair(String), findBrackets(rakuNameAttribute)},
   763  			// :123abc
   764  			{
   765  				`(:)(\d+)(\w[\w'-]*)`,
   766  				ByGroups(Punctuation, UsingSelf("number"), String),
   767  				nil,
   768  			},
   769  			// :key
   770  			{`(:)(!?)(\w[\w'-]*)`, ByGroups(Punctuation, Operator, String), nil},
   771  			{`\s+`, Text, nil},
   772  		},
   773  		"colon-pair-attribute": {
   774  			// :key(value)
   775  			{colonPairPattern, colonPair(NameAttribute), findBrackets(rakuNameAttribute)},
   776  			// :123abc
   777  			{
   778  				`(:)(\d+)(\w[\w'-]*)`,
   779  				ByGroups(Punctuation, UsingSelf("number"), NameAttribute),
   780  				nil,
   781  			},
   782  			// :key
   783  			{`(:)(!?)(\w[\w'-]*)`, ByGroups(Punctuation, Operator, NameAttribute), nil},
   784  			{`\s+`, Text, nil},
   785  		},
   786  		"colon-pair-attribute-keyvalue": {
   787  			// :key(value)
   788  			{colonPairPattern, colonPair(NameAttribute), findBrackets(rakuNameAttribute)},
   789  		},
   790  		"escape-qq": {
   791  			{
   792  				`(?<!(?<!\\)\\)(\\qq)(\[)(.+?)(\])`,
   793  				ByGroups(StringEscape, Punctuation, UsingSelf("qq"), Punctuation),
   794  				nil,
   795  			},
   796  		},
   797  		`escape-char`: {
   798  			{`(?<!(?<!\\)\\)(\\[abfrnrt])`, StringEscape, nil},
   799  		},
   800  		`escape-single-quote`: {
   801  			{`(?<!(?<!\\)\\)(\\)(['\\])`, ByGroups(StringEscape, StringSingle), nil},
   802  		},
   803  		"escape-c-name": {
   804  			{
   805  				`(?<!(?<!\\)\\)(\\[c|C])(\[)(.+?)(\])`,
   806  				ByGroups(StringEscape, Punctuation, String, Punctuation),
   807  				nil,
   808  			},
   809  		},
   810  		"escape-hexadecimal": {
   811  			{
   812  				`(?<!(?<!\\)\\)(\\[x|X])(\[)([0-9a-fA-F]+)(\])`,
   813  				ByGroups(StringEscape, Punctuation, NumberHex, Punctuation),
   814  				nil,
   815  			},
   816  			{`(\\[x|X])([0-9a-fA-F]+)`, ByGroups(StringEscape, NumberHex), nil},
   817  		},
   818  		"regex": {
   819  			// Placeholder, will be overwritten by mutators, DO NOT REMOVE!
   820  			{`\A\z`, nil, nil},
   821  			Include("regex-escape-class"),
   822  			Include(`regex-character-escape`),
   823  			// $(code)
   824  			{
   825  				`([$@])((?<!(?<!\\)\\)\()`,
   826  				ByGroups(Keyword, Punctuation),
   827  				replaceRule(ruleReplacingConfig{
   828  					delimiter: []rune(`)`),
   829  					tokenType: Punctuation,
   830  					stateName: `root`,
   831  					pushState: true,
   832  				}),
   833  			},
   834  			// Exclude $/ from variables, because we can't get out of the end of the slash regex: $/;
   835  			{`\$(?=/)`, NameEntity, nil},
   836  			// Exclude $ from variables
   837  			{`\$(?=\z|\s|[^<(\w*!.])`, NameEntity, nil},
   838  			Include("variable"),
   839  			Include("escape-c-name"),
   840  			Include("escape-hexadecimal"),
   841  			Include("number"),
   842  			Include("single-quote"),
   843  			// :my variable code ...
   844  			{
   845  				`(?<!(?<!\\)\\)(:)(my|our|state|constant|temp|let)`,
   846  				ByGroups(Operator, KeywordDeclaration),
   847  				replaceRule(ruleReplacingConfig{
   848  					delimiter: []rune(`;`),
   849  					tokenType: Punctuation,
   850  					stateName: `root`,
   851  					pushState: true,
   852  				}),
   853  			},
   854  			// <{code}>
   855  			{
   856  				`(?<!(?<!\\)\\)(<)([?!.]*)((?<!(?<!\\)\\){)`,
   857  				ByGroups(Punctuation, Operator, Punctuation),
   858  				replaceRule(ruleReplacingConfig{
   859  					delimiter: []rune(`}>`),
   860  					tokenType: Punctuation,
   861  					stateName: `root`,
   862  					pushState: true,
   863  				}),
   864  			},
   865  			// {code}
   866  			Include(`closure`),
   867  			// Properties
   868  			{`(:)(\w+)`, ByGroups(Punctuation, NameAttribute), nil},
   869  			// Operator
   870  			{`\|\||\||&&|&|\.\.|\*\*|%%|%|:|!|<<|«|>>|»|\+|\*\*|\*|\?|=|~|<~~>`, Operator, nil},
   871  			// Anchors
   872  			{`\^\^|\^|\$\$|\$`, NameEntity, nil},
   873  			{`\.`, NameEntity, nil},
   874  			{`#[^\n]*\n`, CommentSingle, nil},
   875  			// Lookaround
   876  			{
   877  				`(?<!(?<!\\)\\)(<)(\s*)([?!.]+)(\s*)(after|before)`,
   878  				ByGroups(Punctuation, Text, Operator, Text, OperatorWord),
   879  				replaceRule(ruleReplacingConfig{
   880  					delimiter: []rune(`>`),
   881  					tokenType: Punctuation,
   882  					stateName: `regex`,
   883  					pushState: true,
   884  				}),
   885  			},
   886  			{
   887  				`(?<!(?<!\\)\\)(<)([|!?.]*)(wb|ww|ws|w)(>)`,
   888  				ByGroups(Punctuation, Operator, OperatorWord, Punctuation),
   889  				nil,
   890  			},
   891  			// <$variable>
   892  			{
   893  				`(?<!(?<!\\)\\)(<)([?!.]*)([$@]\w[\w:-]*)(>)`,
   894  				ByGroups(Punctuation, Operator, NameVariable, Punctuation),
   895  				nil,
   896  			},
   897  			// Capture markers
   898  			{`(?<!(?<!\\)\\)<\(|\)>`, Operator, nil},
   899  			{
   900  				`(?<!(?<!\\)\\)(<)(\w[\w:-]*)(=\.?)`,
   901  				ByGroups(Punctuation, NameVariable, Operator),
   902  				Push(`regex-variable`),
   903  			},
   904  			{
   905  				`(?<!(?<!\\)\\)(<)([|!?.&]*)(\w(?:(?!:\s)[\w':-])*)`,
   906  				ByGroups(Punctuation, Operator, NameFunction),
   907  				Push(`regex-function`),
   908  			},
   909  			{`(?<!(?<!\\)\\)<`, Punctuation, Push("regex-property")},
   910  			{`(?<!(?<!\\)\\)"`, Punctuation, Push("double-quotes")},
   911  			{`(?<!(?<!\\)\\)(?:\]|\))`, Punctuation, Pop(1)},
   912  			{`(?<!(?<!\\)\\)(?:\[|\()`, Punctuation, Push("regex")},
   913  			{`.+?`, StringRegex, nil},
   914  		},
   915  		"regex-class-builtin": {
   916  			{
   917  				`\b(?:alnum|alpha|blank|cntrl|digit|graph|lower|print|punct|space|upper|xdigit|same|ident)\b`,
   918  				NameBuiltin,
   919  				nil,
   920  			},
   921  		},
   922  		"regex-function": {
   923  			// <function>
   924  			{`(?<!(?<!\\)\\)>`, Punctuation, Pop(1)},
   925  			// <function(parameter)>
   926  			{
   927  				`\(`,
   928  				Punctuation,
   929  				replaceRule(ruleReplacingConfig{
   930  					delimiter: []rune(`)>`),
   931  					tokenType: Punctuation,
   932  					stateName: `root`,
   933  					popState:  true,
   934  					pushState: true,
   935  				}),
   936  			},
   937  			// <function value>
   938  			{
   939  				`\s+`,
   940  				StringRegex,
   941  				replaceRule(ruleReplacingConfig{
   942  					delimiter: []rune(`>`),
   943  					tokenType: Punctuation,
   944  					stateName: `regex`,
   945  					popState:  true,
   946  					pushState: true,
   947  				}),
   948  			},
   949  			// <function: value>
   950  			{
   951  				`:`,
   952  				Punctuation,
   953  				replaceRule(ruleReplacingConfig{
   954  					delimiter: []rune(`>`),
   955  					tokenType: Punctuation,
   956  					stateName: `root`,
   957  					popState:  true,
   958  					pushState: true,
   959  				}),
   960  			},
   961  		},
   962  		"regex-variable": {
   963  			Include(`regex-starting-operators`),
   964  			// <var=function(
   965  			{
   966  				`(&)?(\w(?:(?!:\s)[\w':-])*)(?=\()`,
   967  				ByGroups(Operator, NameFunction),
   968  				Mutators(Pop(1), Push(`regex-function`)),
   969  			},
   970  			// <var=function>
   971  			{`(&)?(\w[\w':-]*)(>)`, ByGroups(Operator, NameFunction, Punctuation), Pop(1)},
   972  			// <var=
   973  			Default(Pop(1), Push(`regex-property`)),
   974  		},
   975  		"regex-property": {
   976  			{`(?<!(?<!\\)\\)>`, Punctuation, Pop(1)},
   977  			Include("regex-class-builtin"),
   978  			Include("variable"),
   979  			Include(`regex-starting-operators`),
   980  			Include("colon-pair-attribute"),
   981  			{`(?<!(?<!\\)\\)\[`, Punctuation, Push("regex-character-class")},
   982  			{`\+|\-`, Operator, nil},
   983  			{`@[\w':-]+`, NameVariable, nil},
   984  			{`.+?`, StringRegex, nil},
   985  		},
   986  		`regex-starting-operators`: {
   987  			{`(?<=<)[|!?.]+`, Operator, nil},
   988  		},
   989  		"regex-escape-class": {
   990  			{`(?i)\\n|\\t|\\h|\\v|\\s|\\d|\\w`, StringEscape, nil},
   991  		},
   992  		`regex-character-escape`: {
   993  			{`(?<!(?<!\\)\\)(\\)(.)`, ByGroups(StringEscape, StringRegex), nil},
   994  		},
   995  		"regex-character-class": {
   996  			{`(?<!(?<!\\)\\)\]`, Punctuation, Pop(1)},
   997  			Include("regex-escape-class"),
   998  			Include("escape-c-name"),
   999  			Include("escape-hexadecimal"),
  1000  			Include(`regex-character-escape`),
  1001  			Include("number"),
  1002  			{`\.\.`, Operator, nil},
  1003  			{`.+?`, StringRegex, nil},
  1004  		},
  1005  		"metaoperator": {
  1006  			// Z[=>]
  1007  			{
  1008  				`\b([RZX]+)\b(\[)([^\s\]]+?)(\])`,
  1009  				ByGroups(OperatorWord, Punctuation, UsingSelf("root"), Punctuation),
  1010  				nil,
  1011  			},
  1012  			// Z=>
  1013  			{`\b([RZX]+)\b([^\s\]]+)`, ByGroups(OperatorWord, UsingSelf("operator")), nil},
  1014  		},
  1015  		"operator": {
  1016  			// Word Operator
  1017  			{wordOperatorsPattern, OperatorWord, nil},
  1018  			// Operator
  1019  			{operatorsPattern, Operator, nil},
  1020  		},
  1021  		"pod": {
  1022  			// Single-line pod declaration
  1023  			{`(#[|=])\s`, Keyword, Push("pod-single")},
  1024  			// Multi-line pod declaration
  1025  			{
  1026  				"(?<keyword>#[|=])(?<opening_delimiters>(?<delimiter>" + bracketsPattern + `)\k<delimiter>*)(?<value>)(?<closing_delimiters>)`,
  1027  				ByGroupNames(
  1028  					map[string]Emitter{
  1029  						`keyword`:            Keyword,
  1030  						`opening_delimiters`: Punctuation,
  1031  						`delimiter`:          nil,
  1032  						`value`:              UsingSelf("pod-declaration"),
  1033  						`closing_delimiters`: Punctuation,
  1034  					}),
  1035  				findBrackets(rakuPodDeclaration),
  1036  			},
  1037  			Include("pod-blocks"),
  1038  		},
  1039  		"pod-blocks": {
  1040  			// =begin code
  1041  			{
  1042  				`(?<=^ *)(?<ws> *)(?<keyword>=begin)(?<ws2> +)(?<name>code)(?<config>[^\n]*)(?<value>.*?)(?<ws3>^\k<ws>)(?<end_keyword>=end)(?<ws4> +)\k<name>`,
  1043  				EmitterFunc(podCode),
  1044  				nil,
  1045  			},
  1046  			// =begin
  1047  			{
  1048  				`(?<=^ *)(?<ws> *)(?<keyword>=begin)(?<ws2> +)(?!code)(?<name>\w[\w'-]*)(?<config>[^\n]*)(?<value>)(?<closing_delimiters>)`,
  1049  				ByGroupNames(
  1050  					map[string]Emitter{
  1051  						`ws`:                 Comment,
  1052  						`keyword`:            Keyword,
  1053  						`ws2`:                StringDoc,
  1054  						`name`:               Keyword,
  1055  						`config`:             EmitterFunc(podConfig),
  1056  						`value`:              UsingSelf("pod-begin"),
  1057  						`closing_delimiters`: Keyword,
  1058  					}),
  1059  				findBrackets(rakuPod),
  1060  			},
  1061  			// =for ...
  1062  			{
  1063  				`(?<=^ *)(?<ws> *)(?<keyword>=(?:for|defn))(?<ws2> +)(?<name>\w[\w'-]*)(?<config>[^\n]*\n)`,
  1064  				ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)),
  1065  				Push("pod-paragraph"),
  1066  			},
  1067  			// =config
  1068  			{
  1069  				`(?<=^ *)(?<ws> *)(?<keyword>=config)(?<ws2> +)(?<name>\w[\w'-]*)(?<config>[^\n]*\n)`,
  1070  				ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)),
  1071  				nil,
  1072  			},
  1073  			// =alias
  1074  			{
  1075  				`(?<=^ *)(?<ws> *)(?<keyword>=alias)(?<ws2> +)(?<name>\w[\w'-]*)(?<value>[^\n]*\n)`,
  1076  				ByGroups(Comment, Keyword, StringDoc, Keyword, StringDoc),
  1077  				nil,
  1078  			},
  1079  			// =encoding
  1080  			{
  1081  				`(?<=^ *)(?<ws> *)(?<keyword>=encoding)(?<ws2> +)(?<name>[^\n]+)`,
  1082  				ByGroups(Comment, Keyword, StringDoc, Name),
  1083  				nil,
  1084  			},
  1085  			// =para ...
  1086  			{
  1087  				`(?<=^ *)(?<ws> *)(?<keyword>=(?:para|table|pod))(?<config>(?<!\n\s*)[^\n]*\n)`,
  1088  				ByGroups(Comment, Keyword, EmitterFunc(podConfig)),
  1089  				Push("pod-paragraph"),
  1090  			},
  1091  			// =head1 ...
  1092  			{
  1093  				`(?<=^ *)(?<ws> *)(?<keyword>=head\d+)(?<ws2> *)(?<config>#?)`,
  1094  				ByGroups(Comment, Keyword, GenericHeading, Keyword),
  1095  				Push("pod-heading"),
  1096  			},
  1097  			// =item ...
  1098  			{
  1099  				`(?<=^ *)(?<ws> *)(?<keyword>=(?:item\d*|comment|data|[A-Z]+))(?<ws2> *)(?<config>#?)`,
  1100  				ByGroups(Comment, Keyword, StringDoc, Keyword),
  1101  				Push("pod-paragraph"),
  1102  			},
  1103  			{
  1104  				`(?<=^ *)(?<ws> *)(?<keyword>=finish)(?<config>[^\n]*)`,
  1105  				ByGroups(Comment, Keyword, EmitterFunc(podConfig)),
  1106  				Push("pod-finish"),
  1107  			},
  1108  			// ={custom} ...
  1109  			{
  1110  				`(?<=^ *)(?<ws> *)(?<name>=\w[\w'-]*)(?<ws2> *)(?<config>#?)`,
  1111  				ByGroups(Comment, Name, StringDoc, Keyword),
  1112  				Push("pod-paragraph"),
  1113  			},
  1114  			// = podconfig
  1115  			{
  1116  				`(?<=^ *)(?<keyword> *=)(?<ws> *)(?<config>(?::\w[\w'-]*(?:` + colonPairOpeningBrackets + `.+?` +
  1117  					colonPairClosingBrackets + `) *)*\n)`,
  1118  				ByGroups(Keyword, StringDoc, EmitterFunc(podConfig)),
  1119  				nil,
  1120  			},
  1121  		},
  1122  		"pod-begin": {
  1123  			Include("pod-blocks"),
  1124  			Include("pre-pod-formatter"),
  1125  			{`.+?`, StringDoc, nil},
  1126  		},
  1127  		"pod-declaration": {
  1128  			Include("pre-pod-formatter"),
  1129  			{`.+?`, StringDoc, nil},
  1130  		},
  1131  		"pod-paragraph": {
  1132  			{`\n *\n|\n(?=^ *=)`, StringDoc, Pop(1)},
  1133  			Include("pre-pod-formatter"),
  1134  			{`.+?`, StringDoc, nil},
  1135  		},
  1136  		"pod-single": {
  1137  			{`\n`, StringDoc, Pop(1)},
  1138  			Include("pre-pod-formatter"),
  1139  			{`.+?`, StringDoc, nil},
  1140  		},
  1141  		"pod-heading": {
  1142  			{`\n *\n|\n(?=^ *=)`, GenericHeading, Pop(1)},
  1143  			Include("pre-pod-formatter"),
  1144  			{`.+?`, GenericHeading, nil},
  1145  		},
  1146  		"pod-finish": {
  1147  			{`\z`, nil, Pop(1)},
  1148  			Include("pre-pod-formatter"),
  1149  			{`.+?`, StringDoc, nil},
  1150  		},
  1151  		"pre-pod-formatter": {
  1152  			// C<code>, B<bold>, ...
  1153  			{
  1154  				`(?<keyword>[CBIUDTKRPAELZVMSXN])(?<opening_delimiters><+|«)`,
  1155  				ByGroups(Keyword, Punctuation),
  1156  				findBrackets(rakuPodFormatter),
  1157  			},
  1158  		},
  1159  		"pod-formatter": {
  1160  			// Placeholder rule, will be replaced by mutators. DO NOT REMOVE!
  1161  			{`>`, Punctuation, Pop(1)},
  1162  			Include("pre-pod-formatter"),
  1163  			// Placeholder rule, will be replaced by mutators. DO NOT REMOVE!
  1164  			{`.+?`, StringOther, nil},
  1165  		},
  1166  		"variable": {
  1167  			{variablePattern, NameVariable, Push("name-adverb")},
  1168  			{globalVariablePattern, NameVariableGlobal, Push("name-adverb")},
  1169  			{`[$@]<[^>]+>`, NameVariable, nil},
  1170  			{`\$[/!¢]`, NameVariable, nil},
  1171  			{`[$@%]`, NameVariable, nil},
  1172  		},
  1173  		"single-quote": {
  1174  			{`(?<!(?<!\\)\\)'`, Punctuation, Push("single-quote-inner")},
  1175  		},
  1176  		"single-quote-inner": {
  1177  			{`(?<!(?<!(?<!\\)\\)\\)'`, Punctuation, Pop(1)},
  1178  			Include("escape-single-quote"),
  1179  			Include("escape-qq"),
  1180  			{`(?:\\\\|\\[^\\]|[^'\\])+?`, StringSingle, nil},
  1181  		},
  1182  		"double-quotes": {
  1183  			{`(?<!(?<!\\)\\)"`, Punctuation, Pop(1)},
  1184  			Include("qq"),
  1185  		},
  1186  		"<<": {
  1187  			{`>>(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+|\s+\[))`, Punctuation, Pop(1)},
  1188  			Include("ww"),
  1189  		},
  1190  		"«": {
  1191  			{`»(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+|\s+\[))`, Punctuation, Pop(1)},
  1192  			Include("ww"),
  1193  		},
  1194  		"ww": {
  1195  			Include("single-quote"),
  1196  			Include("qq"),
  1197  		},
  1198  		"qq": {
  1199  			Include("qq-variable"),
  1200  			Include("closure"),
  1201  			Include(`escape-char`),
  1202  			Include("escape-hexadecimal"),
  1203  			Include("escape-c-name"),
  1204  			Include("escape-qq"),
  1205  			{`.+?`, StringDouble, nil},
  1206  		},
  1207  		"qq-variable": {
  1208  			{
  1209  				`(?<!(?<!\\)\\)(?:` + variablePattern + `|` + globalVariablePattern + `)` + colonPairLookahead + `)`,
  1210  				NameVariable,
  1211  				Push("qq-variable-extras", "name-adverb"),
  1212  			},
  1213  		},
  1214  		"qq-variable-extras": {
  1215  			// Method
  1216  			{
  1217  				`(?<operator>\.)(?<method_name>` + namePattern + `)` + colonPairLookahead + `\()`,
  1218  				ByGroupNames(map[string]Emitter{
  1219  					`operator`:    Operator,
  1220  					`method_name`: NameFunction,
  1221  				}),
  1222  				Push(`name-adverb`),
  1223  			},
  1224  			// Function/Signature
  1225  			{
  1226  				`\(`, Punctuation, replaceRule(
  1227  					ruleReplacingConfig{
  1228  						delimiter: []rune(`)`),
  1229  						tokenType: Punctuation,
  1230  						stateName: `root`,
  1231  						pushState: true,
  1232  					}),
  1233  			},
  1234  			Default(Pop(1)),
  1235  		},
  1236  		"Q": {
  1237  			Include("escape-qq"),
  1238  			{`.+?`, String, nil},
  1239  		},
  1240  		"Q-closure": {
  1241  			Include("escape-qq"),
  1242  			Include("closure"),
  1243  			{`.+?`, String, nil},
  1244  		},
  1245  		"Q-variable": {
  1246  			Include("escape-qq"),
  1247  			Include("qq-variable"),
  1248  			{`.+?`, String, nil},
  1249  		},
  1250  		"closure": {
  1251  			{`(?<!(?<!\\)\\){`, Punctuation, replaceRule(
  1252  				ruleReplacingConfig{
  1253  					delimiter: []rune(`}`),
  1254  					tokenType: Punctuation,
  1255  					stateName: `root`,
  1256  					pushState: true,
  1257  				}),
  1258  			},
  1259  		},
  1260  		"token": {
  1261  			// Token signature
  1262  			{`\(`, Punctuation, replaceRule(
  1263  				ruleReplacingConfig{
  1264  					delimiter: []rune(`)`),
  1265  					tokenType: Punctuation,
  1266  					stateName: `root`,
  1267  					pushState: true,
  1268  				}),
  1269  			},
  1270  			{`{`, Punctuation, replaceRule(
  1271  				ruleReplacingConfig{
  1272  					delimiter: []rune(`}`),
  1273  					tokenType: Punctuation,
  1274  					stateName: `regex`,
  1275  					popState:  true,
  1276  					pushState: true,
  1277  				}),
  1278  			},
  1279  			{`\s*`, Text, nil},
  1280  			Default(Pop(1)),
  1281  		},
  1282  	}
  1283  }
  1284  
  1285  // Joins keys of rune map
  1286  func joinRuneMap(m map[rune]rune) string {
  1287  	runes := make([]rune, 0, len(m))
  1288  	for k := range m {
  1289  		runes = append(runes, k)
  1290  	}
  1291  
  1292  	return string(runes)
  1293  }
  1294  
  1295  // Finds the index of substring in the string starting at position n
  1296  func indexAt(str []rune, substr []rune, pos int) int {
  1297  	strFromPos := str[pos:]
  1298  	text := string(strFromPos)
  1299  
  1300  	idx := strings.Index(text, string(substr))
  1301  	if idx > -1 {
  1302  		idx = utf8.RuneCountInString(text[:idx])
  1303  
  1304  		// Search again if the substr is escaped with backslash
  1305  		if (idx > 1 && strFromPos[idx-1] == '\\' && strFromPos[idx-2] != '\\') ||
  1306  			(idx == 1 && strFromPos[idx-1] == '\\') {
  1307  			idx = indexAt(str[pos:], substr, idx+1)
  1308  
  1309  			idx = utf8.RuneCountInString(text[:idx])
  1310  
  1311  			if idx < 0 {
  1312  				return idx
  1313  			}
  1314  		}
  1315  		idx += pos
  1316  	}
  1317  
  1318  	return idx
  1319  }
  1320  
  1321  // Tells if an array of string contains a string
  1322  func contains(s []string, e string) bool {
  1323  	for _, value := range s {
  1324  		if value == e {
  1325  			return true
  1326  		}
  1327  	}
  1328  	return false
  1329  }
  1330  
  1331  type rulePosition int
  1332  
  1333  const (
  1334  	topRule    rulePosition = 0
  1335  	bottomRule              = -1
  1336  )
  1337  
  1338  type ruleMakingConfig struct {
  1339  	delimiter              []rune
  1340  	pattern                string
  1341  	tokenType              Emitter
  1342  	mutator                Mutator
  1343  	numberOfDelimiterChars int
  1344  }
  1345  
  1346  type ruleReplacingConfig struct {
  1347  	delimiter              []rune
  1348  	pattern                string
  1349  	tokenType              Emitter
  1350  	numberOfDelimiterChars int
  1351  	mutator                Mutator
  1352  	appendMutator          Mutator
  1353  	rulePosition           rulePosition
  1354  	stateName              string
  1355  	pop                    bool
  1356  	popState               bool
  1357  	pushState              bool
  1358  }
  1359  
  1360  // Pops rule from state-stack and replaces the rule with the previous rule
  1361  func popRule(rule ruleReplacingConfig) MutatorFunc {
  1362  	return func(state *LexerState) error {
  1363  		stackName := genStackName(rule.stateName, rule.rulePosition)
  1364  
  1365  		stack, ok := state.Get(stackName).([]ruleReplacingConfig)
  1366  
  1367  		if ok && len(stack) > 0 {
  1368  			// Pop from stack
  1369  			stack = stack[:len(stack)-1]
  1370  			lastRule := stack[len(stack)-1]
  1371  			lastRule.pushState = false
  1372  			lastRule.popState = false
  1373  			lastRule.pop = true
  1374  			state.Set(stackName, stack)
  1375  
  1376  			// Call replaceRule to use the last rule
  1377  			err := replaceRule(lastRule)(state)
  1378  			if err != nil {
  1379  				panic(err)
  1380  			}
  1381  		}
  1382  
  1383  		return nil
  1384  	}
  1385  }
  1386  
  1387  // Replaces a state's rule based on the rule config and position
  1388  func replaceRule(rule ruleReplacingConfig) MutatorFunc {
  1389  	return func(state *LexerState) error {
  1390  		stateName := rule.stateName
  1391  		stackName := genStackName(rule.stateName, rule.rulePosition)
  1392  
  1393  		stack, ok := state.Get(stackName).([]ruleReplacingConfig)
  1394  		if !ok {
  1395  			stack = []ruleReplacingConfig{}
  1396  		}
  1397  
  1398  		// If state-stack is empty fill it with the placeholder rule
  1399  		if len(stack) == 0 {
  1400  			stack = []ruleReplacingConfig{
  1401  				{
  1402  					// Placeholder, will be overwritten by mutators, DO NOT REMOVE!
  1403  					pattern:      `\A\z`,
  1404  					tokenType:    nil,
  1405  					mutator:      nil,
  1406  					stateName:    stateName,
  1407  					rulePosition: rule.rulePosition,
  1408  				},
  1409  			}
  1410  			state.Set(stackName, stack)
  1411  		}
  1412  
  1413  		var mutator Mutator
  1414  		mutators := []Mutator{}
  1415  
  1416  		switch {
  1417  		case rule.rulePosition == topRule && rule.mutator == nil:
  1418  			// Default mutator for top rule
  1419  			mutators = []Mutator{Pop(1), popRule(rule)}
  1420  		case rule.rulePosition == topRule && rule.mutator != nil:
  1421  			// Default mutator for top rule, when rule.mutator is set
  1422  			mutators = []Mutator{rule.mutator, popRule(rule)}
  1423  		case rule.mutator != nil:
  1424  			mutators = []Mutator{rule.mutator}
  1425  		}
  1426  
  1427  		if rule.appendMutator != nil {
  1428  			mutators = append(mutators, rule.appendMutator)
  1429  		}
  1430  
  1431  		if len(mutators) > 0 {
  1432  			mutator = Mutators(mutators...)
  1433  		} else {
  1434  			mutator = nil
  1435  		}
  1436  
  1437  		ruleConfig := ruleMakingConfig{
  1438  			pattern:                rule.pattern,
  1439  			delimiter:              rule.delimiter,
  1440  			numberOfDelimiterChars: rule.numberOfDelimiterChars,
  1441  			tokenType:              rule.tokenType,
  1442  			mutator:                mutator,
  1443  		}
  1444  
  1445  		cRule := makeRule(ruleConfig)
  1446  
  1447  		switch rule.rulePosition {
  1448  		case topRule:
  1449  			state.Rules[stateName][0] = cRule
  1450  		case bottomRule:
  1451  			state.Rules[stateName][len(state.Rules[stateName])-1] = cRule
  1452  		}
  1453  
  1454  		// Pop state name from stack if asked. State should be popped first before Pushing
  1455  		if rule.popState {
  1456  			err := Pop(1)(state)
  1457  			if err != nil {
  1458  				panic(err)
  1459  			}
  1460  		}
  1461  
  1462  		// Push state name to stack if asked
  1463  		if rule.pushState {
  1464  			err := Push(stateName)(state)
  1465  			if err != nil {
  1466  				panic(err)
  1467  			}
  1468  		}
  1469  
  1470  		if !rule.pop {
  1471  			state.Set(stackName, append(stack, rule))
  1472  		}
  1473  
  1474  		return nil
  1475  	}
  1476  }
  1477  
  1478  // Generates rule replacing stack using state name and rule position
  1479  func genStackName(stateName string, rulePosition rulePosition) (stackName string) {
  1480  	switch rulePosition {
  1481  	case topRule:
  1482  		stackName = stateName + `-top-stack`
  1483  	case bottomRule:
  1484  		stackName = stateName + `-bottom-stack`
  1485  	}
  1486  	return
  1487  }
  1488  
  1489  // Makes a compiled rule and returns it
  1490  func makeRule(config ruleMakingConfig) *CompiledRule {
  1491  	var rePattern string
  1492  
  1493  	if len(config.delimiter) > 0 {
  1494  		delimiter := string(config.delimiter)
  1495  
  1496  		if config.numberOfDelimiterChars > 1 {
  1497  			delimiter = strings.Repeat(delimiter, config.numberOfDelimiterChars)
  1498  		}
  1499  
  1500  		rePattern = `(?<!(?<!\\)\\)` + regexp2.Escape(delimiter)
  1501  	} else {
  1502  		rePattern = config.pattern
  1503  	}
  1504  
  1505  	regex := regexp2.MustCompile(rePattern, regexp2.None)
  1506  
  1507  	cRule := &CompiledRule{
  1508  		Rule:   Rule{rePattern, config.tokenType, config.mutator},
  1509  		Regexp: regex,
  1510  	}
  1511  
  1512  	return cRule
  1513  }
  1514  
  1515  // Emitter for colon pairs, changes token state based on key and brackets
  1516  func colonPair(tokenClass TokenType) Emitter {
  1517  	return EmitterFunc(func(groups []string, state *LexerState) Iterator {
  1518  		iterators := []Iterator{}
  1519  		tokens := []Token{
  1520  			{Punctuation, state.NamedGroups[`colon`]},
  1521  			{Punctuation, state.NamedGroups[`opening_delimiters`]},
  1522  			{Punctuation, state.NamedGroups[`closing_delimiters`]},
  1523  		}
  1524  
  1525  		// Append colon
  1526  		iterators = append(iterators, Literator(tokens[0]))
  1527  
  1528  		if tokenClass == NameAttribute {
  1529  			iterators = append(iterators, Literator(Token{NameAttribute, state.NamedGroups[`key`]}))
  1530  		} else {
  1531  			var keyTokenState string
  1532  			keyre := regexp.MustCompile(`^\d+$`)
  1533  			if keyre.MatchString(state.NamedGroups[`key`]) {
  1534  				keyTokenState = "common"
  1535  			} else {
  1536  				keyTokenState = "Q"
  1537  			}
  1538  
  1539  			// Use token state to Tokenise key
  1540  			if keyTokenState != "" {
  1541  				iterator, err := state.Lexer.Tokenise(
  1542  					&TokeniseOptions{
  1543  						State:  keyTokenState,
  1544  						Nested: true,
  1545  					}, state.NamedGroups[`key`])
  1546  
  1547  				if err != nil {
  1548  					panic(err)
  1549  				} else {
  1550  					// Append key
  1551  					iterators = append(iterators, iterator)
  1552  				}
  1553  			}
  1554  		}
  1555  
  1556  		// Append punctuation
  1557  		iterators = append(iterators, Literator(tokens[1]))
  1558  
  1559  		var valueTokenState string
  1560  
  1561  		switch state.NamedGroups[`opening_delimiters`] {
  1562  		case "(", "{", "[":
  1563  			valueTokenState = "root"
  1564  		case "<<", "«":
  1565  			valueTokenState = "ww"
  1566  		case "<":
  1567  			valueTokenState = "Q"
  1568  		}
  1569  
  1570  		// Use token state to Tokenise value
  1571  		if valueTokenState != "" {
  1572  			iterator, err := state.Lexer.Tokenise(
  1573  				&TokeniseOptions{
  1574  					State:  valueTokenState,
  1575  					Nested: true,
  1576  				}, state.NamedGroups[`value`])
  1577  
  1578  			if err != nil {
  1579  				panic(err)
  1580  			} else {
  1581  				// Append value
  1582  				iterators = append(iterators, iterator)
  1583  			}
  1584  		}
  1585  		// Append last punctuation
  1586  		iterators = append(iterators, Literator(tokens[2]))
  1587  
  1588  		return Concaterator(iterators...)
  1589  	})
  1590  }
  1591  
  1592  // Emitter for quoting constructs, changes token state based on quote name and adverbs
  1593  func quote(groups []string, state *LexerState) Iterator {
  1594  	keyword := state.NamedGroups[`keyword`]
  1595  	adverbsStr := state.NamedGroups[`adverbs`]
  1596  	iterators := []Iterator{}
  1597  	tokens := []Token{
  1598  		{Keyword, keyword},
  1599  		{StringAffix, adverbsStr},
  1600  		{Text, state.NamedGroups[`ws`]},
  1601  		{Punctuation, state.NamedGroups[`opening_delimiters`]},
  1602  		{Punctuation, state.NamedGroups[`closing_delimiters`]},
  1603  	}
  1604  
  1605  	// Append all tokens before dealing with the main string
  1606  	iterators = append(iterators, Literator(tokens[:4]...))
  1607  
  1608  	var tokenStates []string
  1609  
  1610  	// Set tokenStates based on adverbs
  1611  	adverbs := strings.Split(adverbsStr, ":")
  1612  	for _, adverb := range adverbs {
  1613  		switch adverb {
  1614  		case "c", "closure":
  1615  			tokenStates = append(tokenStates, "Q-closure")
  1616  		case "qq":
  1617  			tokenStates = append(tokenStates, "qq")
  1618  		case "ww":
  1619  			tokenStates = append(tokenStates, "ww")
  1620  		case "s", "scalar", "a", "array", "h", "hash", "f", "function":
  1621  			tokenStates = append(tokenStates, "Q-variable")
  1622  		}
  1623  	}
  1624  
  1625  	var tokenState string
  1626  
  1627  	switch {
  1628  	case keyword == "qq" || contains(tokenStates, "qq"):
  1629  		tokenState = "qq"
  1630  	case adverbsStr == "ww" || contains(tokenStates, "ww"):
  1631  		tokenState = "ww"
  1632  	case contains(tokenStates, "Q-closure") && contains(tokenStates, "Q-variable"):
  1633  		tokenState = "qq"
  1634  	case contains(tokenStates, "Q-closure"):
  1635  		tokenState = "Q-closure"
  1636  	case contains(tokenStates, "Q-variable"):
  1637  		tokenState = "Q-variable"
  1638  	default:
  1639  		tokenState = "Q"
  1640  	}
  1641  
  1642  	iterator, err := state.Lexer.Tokenise(
  1643  		&TokeniseOptions{
  1644  			State:  tokenState,
  1645  			Nested: true,
  1646  		}, state.NamedGroups[`value`])
  1647  
  1648  	if err != nil {
  1649  		panic(err)
  1650  	} else {
  1651  		iterators = append(iterators, iterator)
  1652  	}
  1653  
  1654  	// Append the last punctuation
  1655  	iterators = append(iterators, Literator(tokens[4]))
  1656  
  1657  	return Concaterator(iterators...)
  1658  }
  1659  
  1660  // Emitter for pod config, tokenises the properties with "colon-pair-attribute" state
  1661  func podConfig(groups []string, state *LexerState) Iterator {
  1662  	// Tokenise pod config
  1663  	iterator, err := state.Lexer.Tokenise(
  1664  		&TokeniseOptions{
  1665  			State:  "colon-pair-attribute",
  1666  			Nested: true,
  1667  		}, groups[0])
  1668  
  1669  	if err != nil {
  1670  		panic(err)
  1671  	} else {
  1672  		return iterator
  1673  	}
  1674  }
  1675  
  1676  // Emitter for pod code, tokenises the code based on the lang specified
  1677  func podCode(groups []string, state *LexerState) Iterator {
  1678  	iterators := []Iterator{}
  1679  	tokens := []Token{
  1680  		{Comment, state.NamedGroups[`ws`]},
  1681  		{Keyword, state.NamedGroups[`keyword`]},
  1682  		{Keyword, state.NamedGroups[`ws2`]},
  1683  		{Keyword, state.NamedGroups[`name`]},
  1684  		{StringDoc, state.NamedGroups[`value`]},
  1685  		{Comment, state.NamedGroups[`ws3`]},
  1686  		{Keyword, state.NamedGroups[`end_keyword`]},
  1687  		{Keyword, state.NamedGroups[`ws4`]},
  1688  		{Keyword, state.NamedGroups[`name`]},
  1689  	}
  1690  
  1691  	// Append all tokens before dealing with the pod config
  1692  	iterators = append(iterators, Literator(tokens[:4]...))
  1693  
  1694  	// Tokenise pod config
  1695  	iterators = append(iterators, podConfig([]string{state.NamedGroups[`config`]}, state))
  1696  
  1697  	langMatch := regexp.MustCompile(`:lang\W+(\w+)`).FindStringSubmatch(state.NamedGroups[`config`])
  1698  	var lang string
  1699  	if len(langMatch) > 1 {
  1700  		lang = langMatch[1]
  1701  	}
  1702  
  1703  	// Tokenise code based on lang property
  1704  	sublexer := internal.Get(lang)
  1705  	if sublexer != nil {
  1706  		iterator, err := sublexer.Tokenise(nil, state.NamedGroups[`value`])
  1707  
  1708  		if err != nil {
  1709  			panic(err)
  1710  		} else {
  1711  			iterators = append(iterators, iterator)
  1712  		}
  1713  	} else {
  1714  		iterators = append(iterators, Literator(tokens[4]))
  1715  	}
  1716  
  1717  	// Append the rest of the tokens
  1718  	iterators = append(iterators, Literator(tokens[5:]...))
  1719  
  1720  	return Concaterator(iterators...)
  1721  }
  1722  

View as plain text