1 package text
2
3 import (
4 "bytes"
5 "io"
6 "regexp"
7 "unicode/utf8"
8
9 "github.com/yuin/goldmark/util"
10 )
11
12 const invalidValue = -1
13
14
15 const EOF = byte(0xff)
16
17
18 type Reader interface {
19 io.RuneReader
20
21
22 Source() []byte
23
24
25 ResetPosition()
26
27
28 Peek() byte
29
30
31 PeekLine() ([]byte, Segment)
32
33
34 PrecendingCharacter() rune
35
36
37 Value(Segment) []byte
38
39
40 LineOffset() int
41
42
43 Position() (int, Segment)
44
45
46 SetPosition(int, Segment)
47
48
49 SetPadding(int)
50
51
52 Advance(int)
53
54
55
56 AdvanceAndSetPadding(int, int)
57
58
59 AdvanceLine()
60
61
62
63 SkipSpaces() (Segment, int, bool)
64
65
66
67 SkipBlankLines() (Segment, int, bool)
68
69
70 Match(reg *regexp.Regexp) bool
71
72
73 FindSubMatch(reg *regexp.Regexp) [][]byte
74
75
76 FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool)
77 }
78
79
80 type FindClosureOptions struct {
81
82
83 CodeSpan bool
84
85
86
87 Nesting bool
88
89
90
91 Newline bool
92
93
94
95 Advance bool
96 }
97
98 type reader struct {
99 source []byte
100 sourceLength int
101 line int
102 peekedLine []byte
103 pos Segment
104 head int
105 lineOffset int
106 }
107
108
109 func NewReader(source []byte) Reader {
110 r := &reader{
111 source: source,
112 sourceLength: len(source),
113 }
114 r.ResetPosition()
115 return r
116 }
117
118 func (r *reader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
119 return findClosureReader(r, opener, closer, options)
120 }
121
122 func (r *reader) ResetPosition() {
123 r.line = -1
124 r.head = 0
125 r.lineOffset = -1
126 r.AdvanceLine()
127 }
128
129 func (r *reader) Source() []byte {
130 return r.source
131 }
132
133 func (r *reader) Value(seg Segment) []byte {
134 return seg.Value(r.source)
135 }
136
137 func (r *reader) Peek() byte {
138 if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
139 if r.pos.Padding != 0 {
140 return space[0]
141 }
142 return r.source[r.pos.Start]
143 }
144 return EOF
145 }
146
147 func (r *reader) PeekLine() ([]byte, Segment) {
148 if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
149 if r.peekedLine == nil {
150 r.peekedLine = r.pos.Value(r.Source())
151 }
152 return r.peekedLine, r.pos
153 }
154 return nil, r.pos
155 }
156
157
158 func (r *reader) ReadRune() (rune, int, error) {
159 return readRuneReader(r)
160 }
161
162 func (r *reader) LineOffset() int {
163 if r.lineOffset < 0 {
164 v := 0
165 for i := r.head; i < r.pos.Start; i++ {
166 if r.source[i] == '\t' {
167 v += util.TabWidth(v)
168 } else {
169 v++
170 }
171 }
172 r.lineOffset = v - r.pos.Padding
173 }
174 return r.lineOffset
175 }
176
177 func (r *reader) PrecendingCharacter() rune {
178 if r.pos.Start <= 0 {
179 if r.pos.Padding != 0 {
180 return rune(' ')
181 }
182 return rune('\n')
183 }
184 i := r.pos.Start - 1
185 for ; i >= 0; i-- {
186 if utf8.RuneStart(r.source[i]) {
187 break
188 }
189 }
190 rn, _ := utf8.DecodeRune(r.source[i:])
191 return rn
192 }
193
194 func (r *reader) Advance(n int) {
195 r.lineOffset = -1
196 if n < len(r.peekedLine) && r.pos.Padding == 0 {
197 r.pos.Start += n
198 r.peekedLine = nil
199 return
200 }
201 r.peekedLine = nil
202 l := r.sourceLength
203 for ; n > 0 && r.pos.Start < l; n-- {
204 if r.pos.Padding != 0 {
205 r.pos.Padding--
206 continue
207 }
208 if r.source[r.pos.Start] == '\n' {
209 r.AdvanceLine()
210 continue
211 }
212 r.pos.Start++
213 }
214 }
215
216 func (r *reader) AdvanceAndSetPadding(n, padding int) {
217 r.Advance(n)
218 if padding > r.pos.Padding {
219 r.SetPadding(padding)
220 }
221 }
222
223 func (r *reader) AdvanceLine() {
224 r.lineOffset = -1
225 r.peekedLine = nil
226 r.pos.Start = r.pos.Stop
227 r.head = r.pos.Start
228 if r.pos.Start < 0 {
229 return
230 }
231 r.pos.Stop = r.sourceLength
232 for i := r.pos.Start; i < r.sourceLength; i++ {
233 c := r.source[i]
234 if c == '\n' {
235 r.pos.Stop = i + 1
236 break
237 }
238 }
239 r.line++
240 r.pos.Padding = 0
241 }
242
243 func (r *reader) Position() (int, Segment) {
244 return r.line, r.pos
245 }
246
247 func (r *reader) SetPosition(line int, pos Segment) {
248 r.lineOffset = -1
249 r.line = line
250 r.pos = pos
251 }
252
253 func (r *reader) SetPadding(v int) {
254 r.pos.Padding = v
255 }
256
257 func (r *reader) SkipSpaces() (Segment, int, bool) {
258 return skipSpacesReader(r)
259 }
260
261 func (r *reader) SkipBlankLines() (Segment, int, bool) {
262 return skipBlankLinesReader(r)
263 }
264
265 func (r *reader) Match(reg *regexp.Regexp) bool {
266 return matchReader(r, reg)
267 }
268
269 func (r *reader) FindSubMatch(reg *regexp.Regexp) [][]byte {
270 return findSubMatchReader(r, reg)
271 }
272
273
274 type BlockReader interface {
275 Reader
276
277 Reset(segment *Segments)
278 }
279
280 type blockReader struct {
281 source []byte
282 segments *Segments
283 segmentsLength int
284 line int
285 pos Segment
286 head int
287 last int
288 lineOffset int
289 }
290
291
292 func NewBlockReader(source []byte, segments *Segments) BlockReader {
293 r := &blockReader{
294 source: source,
295 }
296 if segments != nil {
297 r.Reset(segments)
298 }
299 return r
300 }
301
302 func (r *blockReader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
303 return findClosureReader(r, opener, closer, options)
304 }
305
306 func (r *blockReader) ResetPosition() {
307 r.line = -1
308 r.head = 0
309 r.last = 0
310 r.lineOffset = -1
311 r.pos.Start = -1
312 r.pos.Stop = -1
313 r.pos.Padding = 0
314 if r.segmentsLength > 0 {
315 last := r.segments.At(r.segmentsLength - 1)
316 r.last = last.Stop
317 }
318 r.AdvanceLine()
319 }
320
321 func (r *blockReader) Reset(segments *Segments) {
322 r.segments = segments
323 r.segmentsLength = segments.Len()
324 r.ResetPosition()
325 }
326
327 func (r *blockReader) Source() []byte {
328 return r.source
329 }
330
331 func (r *blockReader) Value(seg Segment) []byte {
332 line := r.segmentsLength - 1
333 ret := make([]byte, 0, seg.Stop-seg.Start+1)
334 for ; line >= 0; line-- {
335 if seg.Start >= r.segments.At(line).Start {
336 break
337 }
338 }
339 i := seg.Start
340 for ; line < r.segmentsLength; line++ {
341 s := r.segments.At(line)
342 if i < 0 {
343 i = s.Start
344 }
345 ret = s.ConcatPadding(ret)
346 for ; i < seg.Stop && i < s.Stop; i++ {
347 ret = append(ret, r.source[i])
348 }
349 i = -1
350 if s.Stop > seg.Stop {
351 break
352 }
353 }
354 return ret
355 }
356
357
358 func (r *blockReader) ReadRune() (rune, int, error) {
359 return readRuneReader(r)
360 }
361
362 func (r *blockReader) PrecendingCharacter() rune {
363 if r.pos.Padding != 0 {
364 return rune(' ')
365 }
366 if r.segments.Len() < 1 {
367 return rune('\n')
368 }
369 firstSegment := r.segments.At(0)
370 if r.line == 0 && r.pos.Start <= firstSegment.Start {
371 return rune('\n')
372 }
373 l := len(r.source)
374 i := r.pos.Start - 1
375 for ; i < l && i >= 0; i-- {
376 if utf8.RuneStart(r.source[i]) {
377 break
378 }
379 }
380 if i < 0 || i >= l {
381 return rune('\n')
382 }
383 rn, _ := utf8.DecodeRune(r.source[i:])
384 return rn
385 }
386
387 func (r *blockReader) LineOffset() int {
388 if r.lineOffset < 0 {
389 v := 0
390 for i := r.head; i < r.pos.Start; i++ {
391 if r.source[i] == '\t' {
392 v += util.TabWidth(v)
393 } else {
394 v++
395 }
396 }
397 r.lineOffset = v - r.pos.Padding
398 }
399 return r.lineOffset
400 }
401
402 func (r *blockReader) Peek() byte {
403 if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
404 if r.pos.Padding != 0 {
405 return space[0]
406 }
407 return r.source[r.pos.Start]
408 }
409 return EOF
410 }
411
412 func (r *blockReader) PeekLine() ([]byte, Segment) {
413 if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
414 return r.pos.Value(r.source), r.pos
415 }
416 return nil, r.pos
417 }
418
419 func (r *blockReader) Advance(n int) {
420 r.lineOffset = -1
421
422 if n < r.pos.Stop-r.pos.Start && r.pos.Padding == 0 {
423 r.pos.Start += n
424 return
425 }
426
427 for ; n > 0; n-- {
428 if r.pos.Padding != 0 {
429 r.pos.Padding--
430 continue
431 }
432 if r.pos.Start >= r.pos.Stop-1 && r.pos.Stop < r.last {
433 r.AdvanceLine()
434 continue
435 }
436 r.pos.Start++
437 }
438 }
439
440 func (r *blockReader) AdvanceAndSetPadding(n, padding int) {
441 r.Advance(n)
442 if padding > r.pos.Padding {
443 r.SetPadding(padding)
444 }
445 }
446
447 func (r *blockReader) AdvanceLine() {
448 r.SetPosition(r.line+1, NewSegment(invalidValue, invalidValue))
449 r.head = r.pos.Start
450 }
451
452 func (r *blockReader) Position() (int, Segment) {
453 return r.line, r.pos
454 }
455
456 func (r *blockReader) SetPosition(line int, pos Segment) {
457 r.lineOffset = -1
458 r.line = line
459 if pos.Start == invalidValue {
460 if r.line < r.segmentsLength {
461 s := r.segments.At(line)
462 r.head = s.Start
463 r.pos = s
464 }
465 } else {
466 r.pos = pos
467 if r.line < r.segmentsLength {
468 s := r.segments.At(line)
469 r.head = s.Start
470 }
471 }
472 }
473
474 func (r *blockReader) SetPadding(v int) {
475 r.lineOffset = -1
476 r.pos.Padding = v
477 }
478
479 func (r *blockReader) SkipSpaces() (Segment, int, bool) {
480 return skipSpacesReader(r)
481 }
482
483 func (r *blockReader) SkipBlankLines() (Segment, int, bool) {
484 return skipBlankLinesReader(r)
485 }
486
487 func (r *blockReader) Match(reg *regexp.Regexp) bool {
488 return matchReader(r, reg)
489 }
490
491 func (r *blockReader) FindSubMatch(reg *regexp.Regexp) [][]byte {
492 return findSubMatchReader(r, reg)
493 }
494
495 func skipBlankLinesReader(r Reader) (Segment, int, bool) {
496 lines := 0
497 for {
498 line, seg := r.PeekLine()
499 if line == nil {
500 return seg, lines, false
501 }
502 if util.IsBlank(line) {
503 lines++
504 r.AdvanceLine()
505 } else {
506 return seg, lines, true
507 }
508 }
509 }
510
511 func skipSpacesReader(r Reader) (Segment, int, bool) {
512 chars := 0
513 for {
514 line, segment := r.PeekLine()
515 if line == nil {
516 return segment, chars, false
517 }
518 for i, c := range line {
519 if util.IsSpace(c) {
520 chars++
521 r.Advance(1)
522 continue
523 }
524 return segment.WithStart(segment.Start + i + 1), chars, true
525 }
526 }
527 }
528
529 func matchReader(r Reader, reg *regexp.Regexp) bool {
530 oldline, oldseg := r.Position()
531 match := reg.FindReaderSubmatchIndex(r)
532 r.SetPosition(oldline, oldseg)
533 if match == nil {
534 return false
535 }
536 r.Advance(match[1] - match[0])
537 return true
538 }
539
540 func findSubMatchReader(r Reader, reg *regexp.Regexp) [][]byte {
541 oldLine, oldSeg := r.Position()
542 match := reg.FindReaderSubmatchIndex(r)
543 r.SetPosition(oldLine, oldSeg)
544 if match == nil {
545 return nil
546 }
547 var bb bytes.Buffer
548 bb.Grow(match[1] - match[0])
549 for i := 0; i < match[1]; {
550 r, size, _ := readRuneReader(r)
551 i += size
552 bb.WriteRune(r)
553 }
554 bs := bb.Bytes()
555 var result [][]byte
556 for i := 0; i < len(match); i += 2 {
557 if match[i] < 0 {
558 result = append(result, []byte{})
559 continue
560 }
561 result = append(result, bs[match[i]:match[i+1]])
562 }
563
564 r.SetPosition(oldLine, oldSeg)
565 r.Advance(match[1] - match[0])
566 return result
567 }
568
569 func readRuneReader(r Reader) (rune, int, error) {
570 line, _ := r.PeekLine()
571 if line == nil {
572 return 0, 0, io.EOF
573 }
574 rn, size := utf8.DecodeRune(line)
575 if rn == utf8.RuneError {
576 return 0, 0, io.EOF
577 }
578 r.Advance(size)
579 return rn, size, nil
580 }
581
582 func findClosureReader(r Reader, opener, closer byte, opts FindClosureOptions) (*Segments, bool) {
583 opened := 1
584 codeSpanOpener := 0
585 closed := false
586 orgline, orgpos := r.Position()
587 var ret *Segments
588
589 for {
590 bs, seg := r.PeekLine()
591 if bs == nil {
592 goto end
593 }
594 i := 0
595 for i < len(bs) {
596 c := bs[i]
597 if opts.CodeSpan && codeSpanOpener != 0 && c == '`' {
598 codeSpanCloser := 0
599 for ; i < len(bs); i++ {
600 if bs[i] == '`' {
601 codeSpanCloser++
602 } else {
603 i--
604 break
605 }
606 }
607 if codeSpanCloser == codeSpanOpener {
608 codeSpanOpener = 0
609 }
610 } else if codeSpanOpener == 0 && c == '\\' && i < len(bs)-1 && util.IsPunct(bs[i+1]) {
611 i += 2
612 continue
613 } else if opts.CodeSpan && codeSpanOpener == 0 && c == '`' {
614 for ; i < len(bs); i++ {
615 if bs[i] == '`' {
616 codeSpanOpener++
617 } else {
618 i--
619 break
620 }
621 }
622 } else if (opts.CodeSpan && codeSpanOpener == 0) || !opts.CodeSpan {
623 if c == closer {
624 opened--
625 if opened == 0 {
626 if ret == nil {
627 ret = NewSegments()
628 }
629 ret.Append(seg.WithStop(seg.Start + i))
630 r.Advance(i + 1)
631 closed = true
632 goto end
633 }
634 } else if c == opener {
635 if !opts.Nesting {
636 goto end
637 }
638 opened++
639 }
640 }
641 i++
642 }
643 if !opts.Newline {
644 goto end
645 }
646 r.AdvanceLine()
647 if ret == nil {
648 ret = NewSegments()
649 }
650 ret.Append(seg)
651 }
652 end:
653 if !opts.Advance {
654 r.SetPosition(orgline, orgpos)
655 }
656 if closed {
657 return ret, true
658 }
659 return nil, false
660 }
661
View as plain text