...

Source file src/github.com/yuin/goldmark/text/reader.go

Documentation: github.com/yuin/goldmark/text

     1  package text
     2  
     3  import (
     4  	"bytes"
     5  	"io"
     6  	"regexp"
     7  	"unicode/utf8"
     8  
     9  	"github.com/yuin/goldmark/util"
    10  )
    11  
    12  const invalidValue = -1
    13  
    14  // EOF indicates the end of file.
    15  const EOF = byte(0xff)
    16  
    17  // A Reader interface provides abstracted method for reading text.
    18  type Reader interface {
    19  	io.RuneReader
    20  
    21  	// Source returns a source of the reader.
    22  	Source() []byte
    23  
    24  	// ResetPosition resets positions.
    25  	ResetPosition()
    26  
    27  	// Peek returns a byte at current position without advancing the internal pointer.
    28  	Peek() byte
    29  
    30  	// PeekLine returns the current line without advancing the internal pointer.
    31  	PeekLine() ([]byte, Segment)
    32  
    33  	// PrecendingCharacter returns a character just before current internal pointer.
    34  	PrecendingCharacter() rune
    35  
    36  	// Value returns a value of the given segment.
    37  	Value(Segment) []byte
    38  
    39  	// LineOffset returns a distance from the line head to current position.
    40  	LineOffset() int
    41  
    42  	// Position returns current line number and position.
    43  	Position() (int, Segment)
    44  
    45  	// SetPosition sets current line number and position.
    46  	SetPosition(int, Segment)
    47  
    48  	// SetPadding sets padding to the reader.
    49  	SetPadding(int)
    50  
    51  	// Advance advances the internal pointer.
    52  	Advance(int)
    53  
    54  	// AdvanceAndSetPadding advances the internal pointer and add padding to the
    55  	// reader.
    56  	AdvanceAndSetPadding(int, int)
    57  
    58  	// AdvanceLine advances the internal pointer to the next line head.
    59  	AdvanceLine()
    60  
    61  	// SkipSpaces skips space characters and returns a non-blank line.
    62  	// If it reaches EOF, returns false.
    63  	SkipSpaces() (Segment, int, bool)
    64  
    65  	// SkipSpaces skips blank lines and returns a non-blank line.
    66  	// If it reaches EOF, returns false.
    67  	SkipBlankLines() (Segment, int, bool)
    68  
    69  	// Match performs regular expression matching to current line.
    70  	Match(reg *regexp.Regexp) bool
    71  
    72  	// Match performs regular expression searching to current line.
    73  	FindSubMatch(reg *regexp.Regexp) [][]byte
    74  
    75  	// FindClosure finds corresponding closure.
    76  	FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool)
    77  }
    78  
    79  // FindClosureOptions is options for Reader.FindClosure.
    80  type FindClosureOptions struct {
    81  	// CodeSpan is a flag for the FindClosure. If this is set to true,
    82  	// FindClosure ignores closers in codespans.
    83  	CodeSpan bool
    84  
    85  	// Nesting is a flag for the FindClosure. If this is set to true,
    86  	// FindClosure allows nesting.
    87  	Nesting bool
    88  
    89  	// Newline is a flag for the FindClosure. If this is set to true,
    90  	// FindClosure searches for a closer over multiple lines.
    91  	Newline bool
    92  
    93  	// Advance is a flag for the FindClosure. If this is set to true,
    94  	// FindClosure advances pointers when closer is found.
    95  	Advance bool
    96  }
    97  
    98  type reader struct {
    99  	source       []byte
   100  	sourceLength int
   101  	line         int
   102  	peekedLine   []byte
   103  	pos          Segment
   104  	head         int
   105  	lineOffset   int
   106  }
   107  
   108  // NewReader return a new Reader that can read UTF-8 bytes .
   109  func NewReader(source []byte) Reader {
   110  	r := &reader{
   111  		source:       source,
   112  		sourceLength: len(source),
   113  	}
   114  	r.ResetPosition()
   115  	return r
   116  }
   117  
   118  func (r *reader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
   119  	return findClosureReader(r, opener, closer, options)
   120  }
   121  
   122  func (r *reader) ResetPosition() {
   123  	r.line = -1
   124  	r.head = 0
   125  	r.lineOffset = -1
   126  	r.AdvanceLine()
   127  }
   128  
   129  func (r *reader) Source() []byte {
   130  	return r.source
   131  }
   132  
   133  func (r *reader) Value(seg Segment) []byte {
   134  	return seg.Value(r.source)
   135  }
   136  
   137  func (r *reader) Peek() byte {
   138  	if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
   139  		if r.pos.Padding != 0 {
   140  			return space[0]
   141  		}
   142  		return r.source[r.pos.Start]
   143  	}
   144  	return EOF
   145  }
   146  
   147  func (r *reader) PeekLine() ([]byte, Segment) {
   148  	if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
   149  		if r.peekedLine == nil {
   150  			r.peekedLine = r.pos.Value(r.Source())
   151  		}
   152  		return r.peekedLine, r.pos
   153  	}
   154  	return nil, r.pos
   155  }
   156  
   157  // io.RuneReader interface.
   158  func (r *reader) ReadRune() (rune, int, error) {
   159  	return readRuneReader(r)
   160  }
   161  
   162  func (r *reader) LineOffset() int {
   163  	if r.lineOffset < 0 {
   164  		v := 0
   165  		for i := r.head; i < r.pos.Start; i++ {
   166  			if r.source[i] == '\t' {
   167  				v += util.TabWidth(v)
   168  			} else {
   169  				v++
   170  			}
   171  		}
   172  		r.lineOffset = v - r.pos.Padding
   173  	}
   174  	return r.lineOffset
   175  }
   176  
   177  func (r *reader) PrecendingCharacter() rune {
   178  	if r.pos.Start <= 0 {
   179  		if r.pos.Padding != 0 {
   180  			return rune(' ')
   181  		}
   182  		return rune('\n')
   183  	}
   184  	i := r.pos.Start - 1
   185  	for ; i >= 0; i-- {
   186  		if utf8.RuneStart(r.source[i]) {
   187  			break
   188  		}
   189  	}
   190  	rn, _ := utf8.DecodeRune(r.source[i:])
   191  	return rn
   192  }
   193  
   194  func (r *reader) Advance(n int) {
   195  	r.lineOffset = -1
   196  	if n < len(r.peekedLine) && r.pos.Padding == 0 {
   197  		r.pos.Start += n
   198  		r.peekedLine = nil
   199  		return
   200  	}
   201  	r.peekedLine = nil
   202  	l := r.sourceLength
   203  	for ; n > 0 && r.pos.Start < l; n-- {
   204  		if r.pos.Padding != 0 {
   205  			r.pos.Padding--
   206  			continue
   207  		}
   208  		if r.source[r.pos.Start] == '\n' {
   209  			r.AdvanceLine()
   210  			continue
   211  		}
   212  		r.pos.Start++
   213  	}
   214  }
   215  
   216  func (r *reader) AdvanceAndSetPadding(n, padding int) {
   217  	r.Advance(n)
   218  	if padding > r.pos.Padding {
   219  		r.SetPadding(padding)
   220  	}
   221  }
   222  
   223  func (r *reader) AdvanceLine() {
   224  	r.lineOffset = -1
   225  	r.peekedLine = nil
   226  	r.pos.Start = r.pos.Stop
   227  	r.head = r.pos.Start
   228  	if r.pos.Start < 0 {
   229  		return
   230  	}
   231  	r.pos.Stop = r.sourceLength
   232  	for i := r.pos.Start; i < r.sourceLength; i++ {
   233  		c := r.source[i]
   234  		if c == '\n' {
   235  			r.pos.Stop = i + 1
   236  			break
   237  		}
   238  	}
   239  	r.line++
   240  	r.pos.Padding = 0
   241  }
   242  
   243  func (r *reader) Position() (int, Segment) {
   244  	return r.line, r.pos
   245  }
   246  
   247  func (r *reader) SetPosition(line int, pos Segment) {
   248  	r.lineOffset = -1
   249  	r.line = line
   250  	r.pos = pos
   251  }
   252  
   253  func (r *reader) SetPadding(v int) {
   254  	r.pos.Padding = v
   255  }
   256  
   257  func (r *reader) SkipSpaces() (Segment, int, bool) {
   258  	return skipSpacesReader(r)
   259  }
   260  
   261  func (r *reader) SkipBlankLines() (Segment, int, bool) {
   262  	return skipBlankLinesReader(r)
   263  }
   264  
   265  func (r *reader) Match(reg *regexp.Regexp) bool {
   266  	return matchReader(r, reg)
   267  }
   268  
   269  func (r *reader) FindSubMatch(reg *regexp.Regexp) [][]byte {
   270  	return findSubMatchReader(r, reg)
   271  }
   272  
   273  // A BlockReader interface is a reader that is optimized for Blocks.
   274  type BlockReader interface {
   275  	Reader
   276  	// Reset resets current state and sets new segments to the reader.
   277  	Reset(segment *Segments)
   278  }
   279  
   280  type blockReader struct {
   281  	source         []byte
   282  	segments       *Segments
   283  	segmentsLength int
   284  	line           int
   285  	pos            Segment
   286  	head           int
   287  	last           int
   288  	lineOffset     int
   289  }
   290  
   291  // NewBlockReader returns a new BlockReader.
   292  func NewBlockReader(source []byte, segments *Segments) BlockReader {
   293  	r := &blockReader{
   294  		source: source,
   295  	}
   296  	if segments != nil {
   297  		r.Reset(segments)
   298  	}
   299  	return r
   300  }
   301  
   302  func (r *blockReader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
   303  	return findClosureReader(r, opener, closer, options)
   304  }
   305  
   306  func (r *blockReader) ResetPosition() {
   307  	r.line = -1
   308  	r.head = 0
   309  	r.last = 0
   310  	r.lineOffset = -1
   311  	r.pos.Start = -1
   312  	r.pos.Stop = -1
   313  	r.pos.Padding = 0
   314  	if r.segmentsLength > 0 {
   315  		last := r.segments.At(r.segmentsLength - 1)
   316  		r.last = last.Stop
   317  	}
   318  	r.AdvanceLine()
   319  }
   320  
   321  func (r *blockReader) Reset(segments *Segments) {
   322  	r.segments = segments
   323  	r.segmentsLength = segments.Len()
   324  	r.ResetPosition()
   325  }
   326  
   327  func (r *blockReader) Source() []byte {
   328  	return r.source
   329  }
   330  
   331  func (r *blockReader) Value(seg Segment) []byte {
   332  	line := r.segmentsLength - 1
   333  	ret := make([]byte, 0, seg.Stop-seg.Start+1)
   334  	for ; line >= 0; line-- {
   335  		if seg.Start >= r.segments.At(line).Start {
   336  			break
   337  		}
   338  	}
   339  	i := seg.Start
   340  	for ; line < r.segmentsLength; line++ {
   341  		s := r.segments.At(line)
   342  		if i < 0 {
   343  			i = s.Start
   344  		}
   345  		ret = s.ConcatPadding(ret)
   346  		for ; i < seg.Stop && i < s.Stop; i++ {
   347  			ret = append(ret, r.source[i])
   348  		}
   349  		i = -1
   350  		if s.Stop > seg.Stop {
   351  			break
   352  		}
   353  	}
   354  	return ret
   355  }
   356  
   357  // io.RuneReader interface.
   358  func (r *blockReader) ReadRune() (rune, int, error) {
   359  	return readRuneReader(r)
   360  }
   361  
   362  func (r *blockReader) PrecendingCharacter() rune {
   363  	if r.pos.Padding != 0 {
   364  		return rune(' ')
   365  	}
   366  	if r.segments.Len() < 1 {
   367  		return rune('\n')
   368  	}
   369  	firstSegment := r.segments.At(0)
   370  	if r.line == 0 && r.pos.Start <= firstSegment.Start {
   371  		return rune('\n')
   372  	}
   373  	l := len(r.source)
   374  	i := r.pos.Start - 1
   375  	for ; i < l && i >= 0; i-- {
   376  		if utf8.RuneStart(r.source[i]) {
   377  			break
   378  		}
   379  	}
   380  	if i < 0 || i >= l {
   381  		return rune('\n')
   382  	}
   383  	rn, _ := utf8.DecodeRune(r.source[i:])
   384  	return rn
   385  }
   386  
   387  func (r *blockReader) LineOffset() int {
   388  	if r.lineOffset < 0 {
   389  		v := 0
   390  		for i := r.head; i < r.pos.Start; i++ {
   391  			if r.source[i] == '\t' {
   392  				v += util.TabWidth(v)
   393  			} else {
   394  				v++
   395  			}
   396  		}
   397  		r.lineOffset = v - r.pos.Padding
   398  	}
   399  	return r.lineOffset
   400  }
   401  
   402  func (r *blockReader) Peek() byte {
   403  	if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
   404  		if r.pos.Padding != 0 {
   405  			return space[0]
   406  		}
   407  		return r.source[r.pos.Start]
   408  	}
   409  	return EOF
   410  }
   411  
   412  func (r *blockReader) PeekLine() ([]byte, Segment) {
   413  	if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
   414  		return r.pos.Value(r.source), r.pos
   415  	}
   416  	return nil, r.pos
   417  }
   418  
   419  func (r *blockReader) Advance(n int) {
   420  	r.lineOffset = -1
   421  
   422  	if n < r.pos.Stop-r.pos.Start && r.pos.Padding == 0 {
   423  		r.pos.Start += n
   424  		return
   425  	}
   426  
   427  	for ; n > 0; n-- {
   428  		if r.pos.Padding != 0 {
   429  			r.pos.Padding--
   430  			continue
   431  		}
   432  		if r.pos.Start >= r.pos.Stop-1 && r.pos.Stop < r.last {
   433  			r.AdvanceLine()
   434  			continue
   435  		}
   436  		r.pos.Start++
   437  	}
   438  }
   439  
   440  func (r *blockReader) AdvanceAndSetPadding(n, padding int) {
   441  	r.Advance(n)
   442  	if padding > r.pos.Padding {
   443  		r.SetPadding(padding)
   444  	}
   445  }
   446  
   447  func (r *blockReader) AdvanceLine() {
   448  	r.SetPosition(r.line+1, NewSegment(invalidValue, invalidValue))
   449  	r.head = r.pos.Start
   450  }
   451  
   452  func (r *blockReader) Position() (int, Segment) {
   453  	return r.line, r.pos
   454  }
   455  
   456  func (r *blockReader) SetPosition(line int, pos Segment) {
   457  	r.lineOffset = -1
   458  	r.line = line
   459  	if pos.Start == invalidValue {
   460  		if r.line < r.segmentsLength {
   461  			s := r.segments.At(line)
   462  			r.head = s.Start
   463  			r.pos = s
   464  		}
   465  	} else {
   466  		r.pos = pos
   467  		if r.line < r.segmentsLength {
   468  			s := r.segments.At(line)
   469  			r.head = s.Start
   470  		}
   471  	}
   472  }
   473  
   474  func (r *blockReader) SetPadding(v int) {
   475  	r.lineOffset = -1
   476  	r.pos.Padding = v
   477  }
   478  
   479  func (r *blockReader) SkipSpaces() (Segment, int, bool) {
   480  	return skipSpacesReader(r)
   481  }
   482  
   483  func (r *blockReader) SkipBlankLines() (Segment, int, bool) {
   484  	return skipBlankLinesReader(r)
   485  }
   486  
   487  func (r *blockReader) Match(reg *regexp.Regexp) bool {
   488  	return matchReader(r, reg)
   489  }
   490  
   491  func (r *blockReader) FindSubMatch(reg *regexp.Regexp) [][]byte {
   492  	return findSubMatchReader(r, reg)
   493  }
   494  
   495  func skipBlankLinesReader(r Reader) (Segment, int, bool) {
   496  	lines := 0
   497  	for {
   498  		line, seg := r.PeekLine()
   499  		if line == nil {
   500  			return seg, lines, false
   501  		}
   502  		if util.IsBlank(line) {
   503  			lines++
   504  			r.AdvanceLine()
   505  		} else {
   506  			return seg, lines, true
   507  		}
   508  	}
   509  }
   510  
   511  func skipSpacesReader(r Reader) (Segment, int, bool) {
   512  	chars := 0
   513  	for {
   514  		line, segment := r.PeekLine()
   515  		if line == nil {
   516  			return segment, chars, false
   517  		}
   518  		for i, c := range line {
   519  			if util.IsSpace(c) {
   520  				chars++
   521  				r.Advance(1)
   522  				continue
   523  			}
   524  			return segment.WithStart(segment.Start + i + 1), chars, true
   525  		}
   526  	}
   527  }
   528  
   529  func matchReader(r Reader, reg *regexp.Regexp) bool {
   530  	oldline, oldseg := r.Position()
   531  	match := reg.FindReaderSubmatchIndex(r)
   532  	r.SetPosition(oldline, oldseg)
   533  	if match == nil {
   534  		return false
   535  	}
   536  	r.Advance(match[1] - match[0])
   537  	return true
   538  }
   539  
   540  func findSubMatchReader(r Reader, reg *regexp.Regexp) [][]byte {
   541  	oldLine, oldSeg := r.Position()
   542  	match := reg.FindReaderSubmatchIndex(r)
   543  	r.SetPosition(oldLine, oldSeg)
   544  	if match == nil {
   545  		return nil
   546  	}
   547  	var bb bytes.Buffer
   548  	bb.Grow(match[1] - match[0])
   549  	for i := 0; i < match[1]; {
   550  		r, size, _ := readRuneReader(r)
   551  		i += size
   552  		bb.WriteRune(r)
   553  	}
   554  	bs := bb.Bytes()
   555  	var result [][]byte
   556  	for i := 0; i < len(match); i += 2 {
   557  		if match[i] < 0 {
   558  			result = append(result, []byte{})
   559  			continue
   560  		}
   561  		result = append(result, bs[match[i]:match[i+1]])
   562  	}
   563  
   564  	r.SetPosition(oldLine, oldSeg)
   565  	r.Advance(match[1] - match[0])
   566  	return result
   567  }
   568  
   569  func readRuneReader(r Reader) (rune, int, error) {
   570  	line, _ := r.PeekLine()
   571  	if line == nil {
   572  		return 0, 0, io.EOF
   573  	}
   574  	rn, size := utf8.DecodeRune(line)
   575  	if rn == utf8.RuneError {
   576  		return 0, 0, io.EOF
   577  	}
   578  	r.Advance(size)
   579  	return rn, size, nil
   580  }
   581  
   582  func findClosureReader(r Reader, opener, closer byte, opts FindClosureOptions) (*Segments, bool) {
   583  	opened := 1
   584  	codeSpanOpener := 0
   585  	closed := false
   586  	orgline, orgpos := r.Position()
   587  	var ret *Segments
   588  
   589  	for {
   590  		bs, seg := r.PeekLine()
   591  		if bs == nil {
   592  			goto end
   593  		}
   594  		i := 0
   595  		for i < len(bs) {
   596  			c := bs[i]
   597  			if opts.CodeSpan && codeSpanOpener != 0 && c == '`' {
   598  				codeSpanCloser := 0
   599  				for ; i < len(bs); i++ {
   600  					if bs[i] == '`' {
   601  						codeSpanCloser++
   602  					} else {
   603  						i--
   604  						break
   605  					}
   606  				}
   607  				if codeSpanCloser == codeSpanOpener {
   608  					codeSpanOpener = 0
   609  				}
   610  			} else if codeSpanOpener == 0 && c == '\\' && i < len(bs)-1 && util.IsPunct(bs[i+1]) {
   611  				i += 2
   612  				continue
   613  			} else if opts.CodeSpan && codeSpanOpener == 0 && c == '`' {
   614  				for ; i < len(bs); i++ {
   615  					if bs[i] == '`' {
   616  						codeSpanOpener++
   617  					} else {
   618  						i--
   619  						break
   620  					}
   621  				}
   622  			} else if (opts.CodeSpan && codeSpanOpener == 0) || !opts.CodeSpan {
   623  				if c == closer {
   624  					opened--
   625  					if opened == 0 {
   626  						if ret == nil {
   627  							ret = NewSegments()
   628  						}
   629  						ret.Append(seg.WithStop(seg.Start + i))
   630  						r.Advance(i + 1)
   631  						closed = true
   632  						goto end
   633  					}
   634  				} else if c == opener {
   635  					if !opts.Nesting {
   636  						goto end
   637  					}
   638  					opened++
   639  				}
   640  			}
   641  			i++
   642  		}
   643  		if !opts.Newline {
   644  			goto end
   645  		}
   646  		r.AdvanceLine()
   647  		if ret == nil {
   648  			ret = NewSegments()
   649  		}
   650  		ret.Append(seg)
   651  	}
   652  end:
   653  	if !opts.Advance {
   654  		r.SetPosition(orgline, orgpos)
   655  	}
   656  	if closed {
   657  		return ret, true
   658  	}
   659  	return nil, false
   660  }
   661  

View as plain text