1 package syntax
2
3 import (
4 "bytes"
5 "fmt"
6 "math"
7 )
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 type InstOp int
24
25 const (
26
27
28 Onerep InstOp = 0
29 Notonerep = 1
30 Setrep = 2
31
32 Oneloop = 3
33 Notoneloop = 4
34 Setloop = 5
35
36 Onelazy = 6
37 Notonelazy = 7
38 Setlazy = 8
39
40 One = 9
41 Notone = 10
42 Set = 11
43
44 Multi = 12
45 Ref = 13
46
47 Bol = 14
48 Eol = 15
49 Boundary = 16
50 Nonboundary = 17
51 Beginning = 18
52 Start = 19
53 EndZ = 20
54 End = 21
55
56 Nothing = 22
57
58
59
60 Lazybranch = 23
61 Branchmark = 24
62 Lazybranchmark = 25
63 Nullcount = 26
64 Setcount = 27
65 Branchcount = 28
66 Lazybranchcount = 29
67 Nullmark = 30
68 Setmark = 31
69 Capturemark = 32
70 Getmark = 33
71 Setjump = 34
72 Backjump = 35
73 Forejump = 36
74 Testref = 37
75 Goto = 38
76
77 Prune = 39
78 Stop = 40
79
80 ECMABoundary = 41
81 NonECMABoundary = 42
82
83
84
85 Mask = 63
86 Rtl = 64
87 Back = 128
88 Back2 = 256
89 Ci = 512
90 )
91
92 type Code struct {
93 Codes []int
94 Strings [][]rune
95 Sets []*CharSet
96 TrackCount int
97 Caps map[int]int
98 Capsize int
99 FcPrefix *Prefix
100 BmPrefix *BmPrefix
101 Anchors AnchorLoc
102 RightToLeft bool
103 }
104
105 func opcodeBacktracks(op InstOp) bool {
106 op &= Mask
107
108 switch op {
109 case Oneloop, Notoneloop, Setloop, Onelazy, Notonelazy, Setlazy, Lazybranch, Branchmark, Lazybranchmark,
110 Nullcount, Setcount, Branchcount, Lazybranchcount, Setmark, Capturemark, Getmark, Setjump, Backjump,
111 Forejump, Goto:
112 return true
113
114 default:
115 return false
116 }
117 }
118
119 func opcodeSize(op InstOp) int {
120 op &= Mask
121
122 switch op {
123 case Nothing, Bol, Eol, Boundary, Nonboundary, ECMABoundary, NonECMABoundary, Beginning, Start, EndZ,
124 End, Nullmark, Setmark, Getmark, Setjump, Backjump, Forejump, Stop:
125 return 1
126
127 case One, Notone, Multi, Ref, Testref, Goto, Nullcount, Setcount, Lazybranch, Branchmark, Lazybranchmark,
128 Prune, Set:
129 return 2
130
131 case Capturemark, Branchcount, Lazybranchcount, Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy,
132 Setlazy, Setrep, Setloop:
133 return 3
134
135 default:
136 panic(fmt.Errorf("Unexpected op code: %v", op))
137 }
138 }
139
140 var codeStr = []string{
141 "Onerep", "Notonerep", "Setrep",
142 "Oneloop", "Notoneloop", "Setloop",
143 "Onelazy", "Notonelazy", "Setlazy",
144 "One", "Notone", "Set",
145 "Multi", "Ref",
146 "Bol", "Eol", "Boundary", "Nonboundary", "Beginning", "Start", "EndZ", "End",
147 "Nothing",
148 "Lazybranch", "Branchmark", "Lazybranchmark",
149 "Nullcount", "Setcount", "Branchcount", "Lazybranchcount",
150 "Nullmark", "Setmark", "Capturemark", "Getmark",
151 "Setjump", "Backjump", "Forejump", "Testref", "Goto",
152 "Prune", "Stop",
153 "ECMABoundary", "NonECMABoundary",
154 }
155
156 func operatorDescription(op InstOp) string {
157 desc := codeStr[op&Mask]
158 if (op & Ci) != 0 {
159 desc += "-Ci"
160 }
161 if (op & Rtl) != 0 {
162 desc += "-Rtl"
163 }
164 if (op & Back) != 0 {
165 desc += "-Back"
166 }
167 if (op & Back2) != 0 {
168 desc += "-Back2"
169 }
170
171 return desc
172 }
173
174
175 func (c *Code) OpcodeDescription(offset int) string {
176 buf := &bytes.Buffer{}
177
178 op := InstOp(c.Codes[offset])
179 fmt.Fprintf(buf, "%06d ", offset)
180
181 if opcodeBacktracks(op & Mask) {
182 buf.WriteString("*")
183 } else {
184 buf.WriteString(" ")
185 }
186 buf.WriteString(operatorDescription(op))
187 buf.WriteString("(")
188 op &= Mask
189
190 switch op {
191 case One, Notone, Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy:
192 buf.WriteString("Ch = ")
193 buf.WriteString(CharDescription(rune(c.Codes[offset+1])))
194
195 case Set, Setrep, Setloop, Setlazy:
196 buf.WriteString("Set = ")
197 buf.WriteString(c.Sets[c.Codes[offset+1]].String())
198
199 case Multi:
200 fmt.Fprintf(buf, "String = %s", string(c.Strings[c.Codes[offset+1]]))
201
202 case Ref, Testref:
203 fmt.Fprintf(buf, "Index = %d", c.Codes[offset+1])
204
205 case Capturemark:
206 fmt.Fprintf(buf, "Index = %d", c.Codes[offset+1])
207 if c.Codes[offset+2] != -1 {
208 fmt.Fprintf(buf, ", Unindex = %d", c.Codes[offset+2])
209 }
210
211 case Nullcount, Setcount:
212 fmt.Fprintf(buf, "Value = %d", c.Codes[offset+1])
213
214 case Goto, Lazybranch, Branchmark, Lazybranchmark, Branchcount, Lazybranchcount:
215 fmt.Fprintf(buf, "Addr = %d", c.Codes[offset+1])
216 }
217
218 switch op {
219 case Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy, Setrep, Setloop, Setlazy:
220 buf.WriteString(", Rep = ")
221 if c.Codes[offset+2] == math.MaxInt32 {
222 buf.WriteString("inf")
223 } else {
224 fmt.Fprintf(buf, "%d", c.Codes[offset+2])
225 }
226
227 case Branchcount, Lazybranchcount:
228 buf.WriteString(", Limit = ")
229 if c.Codes[offset+2] == math.MaxInt32 {
230 buf.WriteString("inf")
231 } else {
232 fmt.Fprintf(buf, "%d", c.Codes[offset+2])
233 }
234
235 }
236
237 buf.WriteString(")")
238
239 return buf.String()
240 }
241
242 func (c *Code) Dump() string {
243 buf := &bytes.Buffer{}
244
245 if c.RightToLeft {
246 fmt.Fprintln(buf, "Direction: right-to-left")
247 } else {
248 fmt.Fprintln(buf, "Direction: left-to-right")
249 }
250 if c.FcPrefix == nil {
251 fmt.Fprintln(buf, "Firstchars: n/a")
252 } else {
253 fmt.Fprintf(buf, "Firstchars: %v\n", c.FcPrefix.PrefixSet.String())
254 }
255
256 if c.BmPrefix == nil {
257 fmt.Fprintln(buf, "Prefix: n/a")
258 } else {
259 fmt.Fprintf(buf, "Prefix: %v\n", Escape(c.BmPrefix.String()))
260 }
261
262 fmt.Fprintf(buf, "Anchors: %v\n", c.Anchors)
263 fmt.Fprintln(buf)
264
265 if c.BmPrefix != nil {
266 fmt.Fprintln(buf, "BoyerMoore:")
267 fmt.Fprintln(buf, c.BmPrefix.Dump(" "))
268 }
269 for i := 0; i < len(c.Codes); i += opcodeSize(InstOp(c.Codes[i])) {
270 fmt.Fprintln(buf, c.OpcodeDescription(i))
271 }
272
273 return buf.String()
274 }
275
View as plain text