const ( LowercaseSet = 0 // Set to arg. LowercaseAdd = 1 // Add arg. LowercaseBor = 2 // Bitwise or with 1. LowercaseBad = 3 // Bitwise and with 1 and add original. )
const ( Onerep InstOp = 0 // lef,back char,min,max a {n} Notonerep = 1 // lef,back char,min,max .{n} Setrep = 2 // lef,back set,min,max [\d]{n} Oneloop = 3 // lef,back char,min,max a {,n} Notoneloop = 4 // lef,back char,min,max .{,n} Setloop = 5 // lef,back set,min,max [\d]{,n} Onelazy = 6 // lef,back char,min,max a {,n}? Notonelazy = 7 // lef,back char,min,max .{,n}? Setlazy = 8 // lef,back set,min,max [\d]{,n}? One = 9 // lef char a Notone = 10 // lef char [^a] Set = 11 // lef set [a-z\s] \w \s \d Multi = 12 // lef string abcd Ref = 13 // lef group \# Bol = 14 // ^ Eol = 15 // $ Boundary = 16 // \b Nonboundary = 17 // \B Beginning = 18 // \A Start = 19 // \G EndZ = 20 // \Z End = 21 // \Z Nothing = 22 // Reject! Lazybranch = 23 // back jump straight first Branchmark = 24 // back jump branch first for loop Lazybranchmark = 25 // back jump straight first for loop Nullcount = 26 // back val set counter, null mark Setcount = 27 // back val set counter, make mark Branchcount = 28 // back jump,limit branch++ if zero<=c<limit Lazybranchcount = 29 // back jump,limit same, but straight first Nullmark = 30 // back save position Setmark = 31 // back save position Capturemark = 32 // back group define group Getmark = 33 // back recall position Setjump = 34 // back save backtrack state Backjump = 35 // zap back to saved state Forejump = 36 // zap backtracking state Testref = 37 // backtrack if ref undefined Goto = 38 // jump just go Prune = 39 // prune it baby Stop = 40 // done! ECMABoundary = 41 // \b NonECMABoundary = 42 // \B Mask = 63 // Mask to get unmodified ordinary operator Rtl = 64 // bit to indicate that we're reverse scanning. Back = 128 // bit to indicate that we're backtracking. Back2 = 256 // bit to indicate that we're backtracking on a second branch. Ci = 512 // bit to indicate that we're case-insensitive. )
const ( IgnoreCase RegexOptions = 0x0001 // "i" Multiline = 0x0002 // "m" ExplicitCapture = 0x0004 // "n" Compiled = 0x0008 // "c" Singleline = 0x0010 // "s" IgnorePatternWhitespace = 0x0020 // "x" RightToLeft = 0x0040 // "r" Debug = 0x0080 // "d" ECMAScript = 0x0100 // "e" RE2 = 0x0200 // RE2 compat mode Unicode = 0x0400 // "u" )
const ( // internal issue ErrInternalError ErrorCode = "regexp/syntax: internal error" // Parser errors ErrUnterminatedComment = "unterminated comment" ErrInvalidCharRange = "invalid character class range" ErrInvalidRepeatSize = "invalid repeat count" ErrInvalidUTF8 = "invalid UTF-8" ErrCaptureGroupOutOfRange = "capture group number out of range" ErrUnexpectedParen = "unexpected )" ErrMissingParen = "missing closing )" ErrMissingBrace = "missing closing }" ErrInvalidRepeatOp = "invalid nested repetition operator" ErrMissingRepeatArgument = "missing argument to repetition operator" ErrConditionalExpression = "illegal conditional (?(...)) expression" ErrTooManyAlternates = "too many | in (?()|)" ErrUnrecognizedGrouping = "unrecognized grouping construct: (%v" ErrInvalidGroupName = "invalid group name: group names must begin with a word character and have a matching terminator" ErrCapNumNotZero = "capture number cannot be zero" ErrUndefinedBackRef = "reference to undefined group number %v" ErrUndefinedNameRef = "reference to undefined group name %v" ErrAlternationCantCapture = "alternation conditions do not capture and cannot be named" ErrAlternationCantHaveComment = "alternation conditions cannot be comments" ErrMalformedReference = "(?(%v) ) malformed" ErrUndefinedReference = "(?(%v) ) reference to undefined group" ErrIllegalEndEscape = "illegal \\ at end of pattern" ErrMalformedSlashP = "malformed \\p{X} character escape" ErrIncompleteSlashP = "incomplete \\p{X} character escape" ErrUnknownSlashP = "unknown unicode category, script, or property '%v'" ErrUnrecognizedEscape = "unrecognized escape sequence \\%v" ErrMissingControl = "missing control character" ErrUnrecognizedControl = "unrecognized control character" ErrTooFewHex = "insufficient hexadecimal digits" ErrInvalidHex = "hex values may not be larger than 0x10FFFF" ErrMalformedNameRef = "malformed \\k<...> named back reference" ErrBadClassInCharRange = "cannot include class \\%v in character range" ErrUnterminatedBracket = "unterminated [] set" ErrSubtractionMustBeLast = "a subtraction must be the last element in a character class" ErrReversedCharRange = "[%c-%c] range in reverse order" )
const ( Q byte = 5 // quantifier S = 4 // ordinary stopper Z = 3 // ScanBlank stopper X = 2 // whitespace E = 1 // should be escaped )
where the regex can be pegged
const ( AnchorBeginning AnchorLoc = 0x0001 AnchorBol = 0x0002 AnchorStart = 0x0004 AnchorEol = 0x0008 AnchorEndZ = 0x0010 AnchorEnd = 0x0020 AnchorBoundary = 0x0040 AnchorECMABoundary = 0x0080 )
const ( //MaxPrefixSize is the largest number of runes we'll use for a BoyerMoyer prefix MaxPrefixSize = 50 )
var ( AnyClass = getCharSetFromOldString([]rune{0}, false) ECMAAnyClass = getCharSetFromOldString([]rune{0, 0x000a, 0x000b, 0x000d, 0x000e}, false) NoneClass = getCharSetFromOldString(nil, false) ECMAWordClass = getCharSetFromOldString(ecmaWord, false) NotECMAWordClass = getCharSetFromOldString(ecmaWord, true) ECMASpaceClass = getCharSetFromOldString(ecmaSpace, false) NotECMASpaceClass = getCharSetFromOldString(ecmaSpace, true) ECMADigitClass = getCharSetFromOldString(ecmaDigit, false) NotECMADigitClass = getCharSetFromOldString(ecmaDigit, true) WordClass = getCharSetFromCategoryString(false, false, wordCategoryText) NotWordClass = getCharSetFromCategoryString(true, false, wordCategoryText) SpaceClass = getCharSetFromCategoryString(false, false, spaceCategoryText) NotSpaceClass = getCharSetFromCategoryString(true, false, spaceCategoryText) DigitClass = getCharSetFromCategoryString(false, false, "Nd") NotDigitClass = getCharSetFromCategoryString(false, true, "Nd") RE2SpaceClass = getCharSetFromOldString(re2Space, false) NotRE2SpaceClass = getCharSetFromOldString(re2Space, true) )
ErrReplacementError is a general error during parsing the replacement text
var ErrReplacementError = errors.New("Replacement pattern error.")
func CharDescription(ch rune) string
CharDescription Produces a human-readable description for a single character.
func Escape(input string) string
func IsECMAWordChar(r rune) bool
func IsWordChar(r rune) bool
According to UTS#18 Unicode Regular Expressions (http://www.unicode.org/reports/tr18/) RL 1.4 Simple Word Boundaries The class of <word_character> includes all Alphabetic values from the Unicode character database, from UnicodeData.txt [UData], plus the U+200C ZERO WIDTH NON-JOINER and U+200D ZERO WIDTH JOINER.
func Unescape(input string) (string, error)
type AnchorLoc int16
func (anchors AnchorLoc) String() string
anchorDescription returns a human-readable description of the anchors
BmPrefix precomputes the Boyer-Moore tables for fast string scanning. These tables allow you to scan for the first occurrence of a string within a large body of text without examining every character. The performance of the heuristic depends on the actual string and the text being searched, but usually, the longer the string that is being searched for, the fewer characters need to be examined.
type BmPrefix struct {
// contains filtered or unexported fields
}
func (b *BmPrefix) Dump(indent string) string
Dump returns the contents of the filter as a human readable string
func (b *BmPrefix) IsMatch(text []rune, index, beglimit, endlimit int) bool
When a regex is anchored, we can do a quick IsMatch test instead of a Scan
func (b *BmPrefix) Scan(text []rune, index, beglimit, endlimit int) int
Scan uses the Boyer-Moore algorithm to find the first occurrence of the specified string within text, beginning at index, and constrained within beglimit and endlimit.
The direction and case-sensitivity of the match is determined by the arguments to the RegexBoyerMoore constructor.
func (b *BmPrefix) String() string
CharSet combines start-end rune ranges and unicode categories representing a set of characters
type CharSet struct {
// contains filtered or unexported fields
}
func (c CharSet) CharIn(ch rune) bool
CharIn returns true if the rune is in our character set (either ranges or categories). It handles negations and subtracted sub-charsets.
func (c CharSet) Copy() CharSet
Copy makes a deep copy to prevent accidental mutation of a set
func (c CharSet) HasSubtraction() bool
func (c CharSet) IsEmpty() bool
func (c CharSet) IsMergeable() bool
func (c CharSet) IsNegated() bool
func (c CharSet) IsSingleton() bool
func (c CharSet) IsSingletonInverse() bool
func (c CharSet) SingletonChar() rune
SingletonChar will return the char from the first range without validation. It assumes you have checked for IsSingleton or IsSingletonInverse and will panic given bad input
func (c CharSet) String() string
gets a human-readable description for a set string
type Code struct { Codes []int // the code Strings [][]rune // string table Sets []*CharSet //character set table TrackCount int // how many instructions use backtracking Caps map[int]int // mapping of user group numbers -> impl group slots Capsize int // number of impl group slots FcPrefix *Prefix // the set of candidate first characters (may be null) BmPrefix *BmPrefix // the fixed prefix string as a Boyer-Moore machine (may be null) Anchors AnchorLoc // the set of zero-length start anchors (RegexFCD.Bol, etc) RightToLeft bool // true if right to left }
func Write(tree *RegexTree) (*Code, error)
func (c *Code) Dump() string
func (c *Code) OpcodeDescription(offset int) string
OpcodeDescription is a humman readable string of the specific offset
An Error describes a failure to parse a regular expression and gives the offending expression.
type Error struct { Code ErrorCode Expr string Args []interface{} }
func (e *Error) Error() string
An ErrorCode describes a failure to parse a regular expression.
type ErrorCode string
func (e ErrorCode) String() string
type InstOp int
type Prefix struct { PrefixStr []rune PrefixSet CharSet CaseInsensitive bool }
type RegexOptions int32
type RegexTree struct { Capnames map[string]int Caplist []string // contains filtered or unexported fields }
func Parse(re string, op RegexOptions) (*RegexTree, error)
Parse converts a regex string into a parse tree
func (t *RegexTree) Dump() string
type ReplacerData struct { Rep string Strings []string Rules []int }
func NewReplacerData(rep string, caps map[int]int, capsize int, capnames map[string]int, op RegexOptions) (*ReplacerData, error)
NewReplacerData will populate a reusable replacer data struct based on the given replacement string and the capture group data from a regexp