...

Source file src/github.com/yuin/goldmark/parser/parser.go

Documentation: github.com/yuin/goldmark/parser

     1  // Package parser contains stuff that are related to parsing a Markdown text.
     2  package parser
     3  
     4  import (
     5  	"fmt"
     6  	"strings"
     7  	"sync"
     8  
     9  	"github.com/yuin/goldmark/ast"
    10  	"github.com/yuin/goldmark/text"
    11  	"github.com/yuin/goldmark/util"
    12  )
    13  
    14  // A Reference interface represents a link reference in Markdown text.
    15  type Reference interface {
    16  	// String implements Stringer.
    17  	String() string
    18  
    19  	// Label returns a label of the reference.
    20  	Label() []byte
    21  
    22  	// Destination returns a destination(URL) of the reference.
    23  	Destination() []byte
    24  
    25  	// Title returns a title of the reference.
    26  	Title() []byte
    27  }
    28  
    29  type reference struct {
    30  	label       []byte
    31  	destination []byte
    32  	title       []byte
    33  }
    34  
    35  // NewReference returns a new Reference.
    36  func NewReference(label, destination, title []byte) Reference {
    37  	return &reference{label, destination, title}
    38  }
    39  
    40  func (r *reference) Label() []byte {
    41  	return r.label
    42  }
    43  
    44  func (r *reference) Destination() []byte {
    45  	return r.destination
    46  }
    47  
    48  func (r *reference) Title() []byte {
    49  	return r.title
    50  }
    51  
    52  func (r *reference) String() string {
    53  	return fmt.Sprintf("Reference{Label:%s, Destination:%s, Title:%s}", r.label, r.destination, r.title)
    54  }
    55  
    56  // An IDs interface is a collection of the element ids.
    57  type IDs interface {
    58  	// Generate generates a new element id.
    59  	Generate(value []byte, kind ast.NodeKind) []byte
    60  
    61  	// Put puts a given element id to the used ids table.
    62  	Put(value []byte)
    63  }
    64  
    65  type ids struct {
    66  	values map[string]bool
    67  }
    68  
    69  func newIDs() IDs {
    70  	return &ids{
    71  		values: map[string]bool{},
    72  	}
    73  }
    74  
    75  func (s *ids) Generate(value []byte, kind ast.NodeKind) []byte {
    76  	value = util.TrimLeftSpace(value)
    77  	value = util.TrimRightSpace(value)
    78  	result := []byte{}
    79  	for i := 0; i < len(value); {
    80  		v := value[i]
    81  		l := util.UTF8Len(v)
    82  		i += int(l)
    83  		if l != 1 {
    84  			continue
    85  		}
    86  		if util.IsAlphaNumeric(v) {
    87  			if 'A' <= v && v <= 'Z' {
    88  				v += 'a' - 'A'
    89  			}
    90  			result = append(result, v)
    91  		} else if util.IsSpace(v) || v == '-' || v == '_' {
    92  			result = append(result, '-')
    93  		}
    94  	}
    95  	if len(result) == 0 {
    96  		if kind == ast.KindHeading {
    97  			result = []byte("heading")
    98  		} else {
    99  			result = []byte("id")
   100  		}
   101  	}
   102  	if _, ok := s.values[util.BytesToReadOnlyString(result)]; !ok {
   103  		s.values[util.BytesToReadOnlyString(result)] = true
   104  		return result
   105  	}
   106  	for i := 1; ; i++ {
   107  		newResult := fmt.Sprintf("%s-%d", result, i)
   108  		if _, ok := s.values[newResult]; !ok {
   109  			s.values[newResult] = true
   110  			return []byte(newResult)
   111  		}
   112  
   113  	}
   114  }
   115  
   116  func (s *ids) Put(value []byte) {
   117  	s.values[util.BytesToReadOnlyString(value)] = true
   118  }
   119  
   120  // ContextKey is a key that is used to set arbitrary values to the context.
   121  type ContextKey int
   122  
   123  // ContextKeyMax is a maximum value of the ContextKey.
   124  var ContextKeyMax ContextKey
   125  
   126  // NewContextKey return a new ContextKey value.
   127  func NewContextKey() ContextKey {
   128  	ContextKeyMax++
   129  	return ContextKeyMax
   130  }
   131  
   132  // A Context interface holds a information that are necessary to parse
   133  // Markdown text.
   134  type Context interface {
   135  	// String implements Stringer.
   136  	String() string
   137  
   138  	// Get returns a value associated with the given key.
   139  	Get(ContextKey) interface{}
   140  
   141  	// ComputeIfAbsent computes a value if a value associated with the given key is absent and returns the value.
   142  	ComputeIfAbsent(ContextKey, func() interface{}) interface{}
   143  
   144  	// Set sets the given value to the context.
   145  	Set(ContextKey, interface{})
   146  
   147  	// AddReference adds the given reference to this context.
   148  	AddReference(Reference)
   149  
   150  	// Reference returns (a reference, true) if a reference associated with
   151  	// the given label exists, otherwise (nil, false).
   152  	Reference(label string) (Reference, bool)
   153  
   154  	// References returns a list of references.
   155  	References() []Reference
   156  
   157  	// IDs returns a collection of the element ids.
   158  	IDs() IDs
   159  
   160  	// BlockOffset returns a first non-space character position on current line.
   161  	// This value is valid only for BlockParser.Open.
   162  	// BlockOffset returns -1 if current line is blank.
   163  	BlockOffset() int
   164  
   165  	// BlockOffset sets a first non-space character position on current line.
   166  	// This value is valid only for BlockParser.Open.
   167  	SetBlockOffset(int)
   168  
   169  	// BlockIndent returns an indent width on current line.
   170  	// This value is valid only for BlockParser.Open.
   171  	// BlockIndent returns -1 if current line is blank.
   172  	BlockIndent() int
   173  
   174  	// BlockIndent sets an indent width on current line.
   175  	// This value is valid only for BlockParser.Open.
   176  	SetBlockIndent(int)
   177  
   178  	// FirstDelimiter returns a first delimiter of the current delimiter list.
   179  	FirstDelimiter() *Delimiter
   180  
   181  	// LastDelimiter returns a last delimiter of the current delimiter list.
   182  	LastDelimiter() *Delimiter
   183  
   184  	// PushDelimiter appends the given delimiter to the tail of the current
   185  	// delimiter list.
   186  	PushDelimiter(delimiter *Delimiter)
   187  
   188  	// RemoveDelimiter removes the given delimiter from the current delimiter list.
   189  	RemoveDelimiter(d *Delimiter)
   190  
   191  	// ClearDelimiters clears the current delimiter list.
   192  	ClearDelimiters(bottom ast.Node)
   193  
   194  	// OpenedBlocks returns a list of nodes that are currently in parsing.
   195  	OpenedBlocks() []Block
   196  
   197  	// SetOpenedBlocks sets a list of nodes that are currently in parsing.
   198  	SetOpenedBlocks([]Block)
   199  
   200  	// LastOpenedBlock returns a last node that is currently in parsing.
   201  	LastOpenedBlock() Block
   202  
   203  	// IsInLinkLabel returns true if current position seems to be in link label.
   204  	IsInLinkLabel() bool
   205  }
   206  
   207  // A ContextConfig struct is a data structure that holds configuration of the Context.
   208  type ContextConfig struct {
   209  	IDs IDs
   210  }
   211  
   212  // An ContextOption is a functional option type for the Context.
   213  type ContextOption func(*ContextConfig)
   214  
   215  // WithIDs is a functional option for the Context.
   216  func WithIDs(ids IDs) ContextOption {
   217  	return func(c *ContextConfig) {
   218  		c.IDs = ids
   219  	}
   220  }
   221  
   222  type parseContext struct {
   223  	store         []interface{}
   224  	ids           IDs
   225  	refs          map[string]Reference
   226  	blockOffset   int
   227  	blockIndent   int
   228  	delimiters    *Delimiter
   229  	lastDelimiter *Delimiter
   230  	openedBlocks  []Block
   231  }
   232  
   233  // NewContext returns a new Context.
   234  func NewContext(options ...ContextOption) Context {
   235  	cfg := &ContextConfig{
   236  		IDs: newIDs(),
   237  	}
   238  	for _, option := range options {
   239  		option(cfg)
   240  	}
   241  
   242  	return &parseContext{
   243  		store:         make([]interface{}, ContextKeyMax+1),
   244  		refs:          map[string]Reference{},
   245  		ids:           cfg.IDs,
   246  		blockOffset:   -1,
   247  		blockIndent:   -1,
   248  		delimiters:    nil,
   249  		lastDelimiter: nil,
   250  		openedBlocks:  []Block{},
   251  	}
   252  }
   253  
   254  func (p *parseContext) Get(key ContextKey) interface{} {
   255  	return p.store[key]
   256  }
   257  
   258  func (p *parseContext) ComputeIfAbsent(key ContextKey, f func() interface{}) interface{} {
   259  	v := p.store[key]
   260  	if v == nil {
   261  		v = f()
   262  		p.store[key] = v
   263  	}
   264  	return v
   265  }
   266  
   267  func (p *parseContext) Set(key ContextKey, value interface{}) {
   268  	p.store[key] = value
   269  }
   270  
   271  func (p *parseContext) IDs() IDs {
   272  	return p.ids
   273  }
   274  
   275  func (p *parseContext) BlockOffset() int {
   276  	return p.blockOffset
   277  }
   278  
   279  func (p *parseContext) SetBlockOffset(v int) {
   280  	p.blockOffset = v
   281  }
   282  
   283  func (p *parseContext) BlockIndent() int {
   284  	return p.blockIndent
   285  }
   286  
   287  func (p *parseContext) SetBlockIndent(v int) {
   288  	p.blockIndent = v
   289  }
   290  
   291  func (p *parseContext) LastDelimiter() *Delimiter {
   292  	return p.lastDelimiter
   293  }
   294  
   295  func (p *parseContext) FirstDelimiter() *Delimiter {
   296  	return p.delimiters
   297  }
   298  
   299  func (p *parseContext) PushDelimiter(d *Delimiter) {
   300  	if p.delimiters == nil {
   301  		p.delimiters = d
   302  		p.lastDelimiter = d
   303  	} else {
   304  		l := p.lastDelimiter
   305  		p.lastDelimiter = d
   306  		l.NextDelimiter = d
   307  		d.PreviousDelimiter = l
   308  	}
   309  }
   310  
   311  func (p *parseContext) RemoveDelimiter(d *Delimiter) {
   312  	if d.PreviousDelimiter == nil {
   313  		p.delimiters = d.NextDelimiter
   314  	} else {
   315  		d.PreviousDelimiter.NextDelimiter = d.NextDelimiter
   316  		if d.NextDelimiter != nil {
   317  			d.NextDelimiter.PreviousDelimiter = d.PreviousDelimiter
   318  		}
   319  	}
   320  	if d.NextDelimiter == nil {
   321  		p.lastDelimiter = d.PreviousDelimiter
   322  	}
   323  	if p.delimiters != nil {
   324  		p.delimiters.PreviousDelimiter = nil
   325  	}
   326  	if p.lastDelimiter != nil {
   327  		p.lastDelimiter.NextDelimiter = nil
   328  	}
   329  	d.NextDelimiter = nil
   330  	d.PreviousDelimiter = nil
   331  	if d.Length != 0 {
   332  		ast.MergeOrReplaceTextSegment(d.Parent(), d, d.Segment)
   333  	} else {
   334  		d.Parent().RemoveChild(d.Parent(), d)
   335  	}
   336  }
   337  
   338  func (p *parseContext) ClearDelimiters(bottom ast.Node) {
   339  	if p.lastDelimiter == nil {
   340  		return
   341  	}
   342  	var c ast.Node
   343  	for c = p.lastDelimiter; c != nil && c != bottom; {
   344  		prev := c.PreviousSibling()
   345  		if d, ok := c.(*Delimiter); ok {
   346  			p.RemoveDelimiter(d)
   347  		}
   348  		c = prev
   349  	}
   350  }
   351  
   352  func (p *parseContext) AddReference(ref Reference) {
   353  	key := util.ToLinkReference(ref.Label())
   354  	if _, ok := p.refs[key]; !ok {
   355  		p.refs[key] = ref
   356  	}
   357  }
   358  
   359  func (p *parseContext) Reference(label string) (Reference, bool) {
   360  	v, ok := p.refs[label]
   361  	return v, ok
   362  }
   363  
   364  func (p *parseContext) References() []Reference {
   365  	ret := make([]Reference, 0, len(p.refs))
   366  	for _, v := range p.refs {
   367  		ret = append(ret, v)
   368  	}
   369  	return ret
   370  }
   371  
   372  func (p *parseContext) String() string {
   373  	refs := []string{}
   374  	for _, r := range p.refs {
   375  		refs = append(refs, r.String())
   376  	}
   377  
   378  	return fmt.Sprintf("Context{Store:%#v, Refs:%s}", p.store, strings.Join(refs, ","))
   379  }
   380  
   381  func (p *parseContext) OpenedBlocks() []Block {
   382  	return p.openedBlocks
   383  }
   384  
   385  func (p *parseContext) SetOpenedBlocks(v []Block) {
   386  	p.openedBlocks = v
   387  }
   388  
   389  func (p *parseContext) LastOpenedBlock() Block {
   390  	if l := len(p.openedBlocks); l != 0 {
   391  		return p.openedBlocks[l-1]
   392  	}
   393  	return Block{}
   394  }
   395  
   396  func (p *parseContext) IsInLinkLabel() bool {
   397  	tlist := p.Get(linkLabelStateKey)
   398  	return tlist != nil
   399  }
   400  
   401  // State represents parser's state.
   402  // State is designed to use as a bit flag.
   403  type State int
   404  
   405  const (
   406  	// None is a default value of the [State].
   407  	None State = 1 << iota
   408  
   409  	// Continue indicates parser can continue parsing.
   410  	Continue
   411  
   412  	// Close indicates parser cannot parse anymore.
   413  	Close
   414  
   415  	// HasChildren indicates parser may have child blocks.
   416  	HasChildren
   417  
   418  	// NoChildren indicates parser does not have child blocks.
   419  	NoChildren
   420  
   421  	// RequireParagraph indicates parser requires that the last node
   422  	// must be a paragraph and is not converted to other nodes by
   423  	// ParagraphTransformers.
   424  	RequireParagraph
   425  )
   426  
   427  // A Config struct is a data structure that holds configuration of the Parser.
   428  type Config struct {
   429  	Options               map[OptionName]interface{}
   430  	BlockParsers          util.PrioritizedSlice /*<BlockParser>*/
   431  	InlineParsers         util.PrioritizedSlice /*<InlineParser>*/
   432  	ParagraphTransformers util.PrioritizedSlice /*<ParagraphTransformer>*/
   433  	ASTTransformers       util.PrioritizedSlice /*<ASTTransformer>*/
   434  	EscapedSpace          bool
   435  }
   436  
   437  // NewConfig returns a new Config.
   438  func NewConfig() *Config {
   439  	return &Config{
   440  		Options:               map[OptionName]interface{}{},
   441  		BlockParsers:          util.PrioritizedSlice{},
   442  		InlineParsers:         util.PrioritizedSlice{},
   443  		ParagraphTransformers: util.PrioritizedSlice{},
   444  		ASTTransformers:       util.PrioritizedSlice{},
   445  	}
   446  }
   447  
   448  // An Option interface is a functional option type for the Parser.
   449  type Option interface {
   450  	SetParserOption(*Config)
   451  }
   452  
   453  // OptionName is a name of parser options.
   454  type OptionName string
   455  
   456  // Attribute is an option name that spacify attributes of elements.
   457  const optAttribute OptionName = "Attribute"
   458  
   459  type withAttribute struct {
   460  }
   461  
   462  func (o *withAttribute) SetParserOption(c *Config) {
   463  	c.Options[optAttribute] = true
   464  }
   465  
   466  // WithAttribute is a functional option that enables custom attributes.
   467  func WithAttribute() Option {
   468  	return &withAttribute{}
   469  }
   470  
   471  // A Parser interface parses Markdown text into AST nodes.
   472  type Parser interface {
   473  	// Parse parses the given Markdown text into AST nodes.
   474  	Parse(reader text.Reader, opts ...ParseOption) ast.Node
   475  
   476  	// AddOption adds the given option to this parser.
   477  	AddOptions(...Option)
   478  }
   479  
   480  // A SetOptioner interface sets the given option to the object.
   481  type SetOptioner interface {
   482  	// SetOption sets the given option to the object.
   483  	// Unacceptable options may be passed.
   484  	// Thus implementations must ignore unacceptable options.
   485  	SetOption(name OptionName, value interface{})
   486  }
   487  
   488  // A BlockParser interface parses a block level element like Paragraph, List,
   489  // Blockquote etc.
   490  type BlockParser interface {
   491  	// Trigger returns a list of characters that triggers Parse method of
   492  	// this parser.
   493  	// If Trigger returns a nil, Open will be called with any lines.
   494  	Trigger() []byte
   495  
   496  	// Open parses the current line and returns a result of parsing.
   497  	//
   498  	// Open must not parse beyond the current line.
   499  	// If Open has been able to parse the current line, Open must advance a reader
   500  	// position by consumed byte length.
   501  	//
   502  	// If Open has not been able to parse the current line, Open should returns
   503  	// (nil, NoChildren). If Open has been able to parse the current line, Open
   504  	// should returns a new Block node and returns HasChildren or NoChildren.
   505  	Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State)
   506  
   507  	// Continue parses the current line and returns a result of parsing.
   508  	//
   509  	// Continue must not parse beyond the current line.
   510  	// If Continue has been able to parse the current line, Continue must advance
   511  	// a reader position by consumed byte length.
   512  	//
   513  	// If Continue has not been able to parse the current line, Continue should
   514  	// returns Close. If Continue has been able to parse the current line,
   515  	// Continue should returns (Continue | NoChildren) or
   516  	// (Continue | HasChildren)
   517  	Continue(node ast.Node, reader text.Reader, pc Context) State
   518  
   519  	// Close will be called when the parser returns Close.
   520  	Close(node ast.Node, reader text.Reader, pc Context)
   521  
   522  	// CanInterruptParagraph returns true if the parser can interrupt paragraphs,
   523  	// otherwise false.
   524  	CanInterruptParagraph() bool
   525  
   526  	// CanAcceptIndentedLine returns true if the parser can open new node when
   527  	// the given line is being indented more than 3 spaces.
   528  	CanAcceptIndentedLine() bool
   529  }
   530  
   531  // An InlineParser interface parses an inline level element like CodeSpan, Link etc.
   532  type InlineParser interface {
   533  	// Trigger returns a list of characters that triggers Parse method of
   534  	// this parser.
   535  	// Trigger characters must be a punctuation or a halfspace.
   536  	// Halfspaces triggers this parser when character is any spaces characters or
   537  	// a head of line
   538  	Trigger() []byte
   539  
   540  	// Parse parse the given block into an inline node.
   541  	//
   542  	// Parse can parse beyond the current line.
   543  	// If Parse has been able to parse the current line, it must advance a reader
   544  	// position by consumed byte length.
   545  	Parse(parent ast.Node, block text.Reader, pc Context) ast.Node
   546  }
   547  
   548  // A CloseBlocker interface is a callback function that will be
   549  // called when block is closed in the inline parsing.
   550  type CloseBlocker interface {
   551  	// CloseBlock will be called when a block is closed.
   552  	CloseBlock(parent ast.Node, block text.Reader, pc Context)
   553  }
   554  
   555  // A ParagraphTransformer transforms parsed Paragraph nodes.
   556  // For example, link references are searched in parsed Paragraphs.
   557  type ParagraphTransformer interface {
   558  	// Transform transforms the given paragraph.
   559  	Transform(node *ast.Paragraph, reader text.Reader, pc Context)
   560  }
   561  
   562  // ASTTransformer transforms entire Markdown document AST tree.
   563  type ASTTransformer interface {
   564  	// Transform transforms the given AST tree.
   565  	Transform(node *ast.Document, reader text.Reader, pc Context)
   566  }
   567  
   568  // DefaultBlockParsers returns a new list of default BlockParsers.
   569  // Priorities of default BlockParsers are:
   570  //
   571  //	SetextHeadingParser, 100
   572  //	ThematicBreakParser, 200
   573  //	ListParser, 300
   574  //	ListItemParser, 400
   575  //	CodeBlockParser, 500
   576  //	ATXHeadingParser, 600
   577  //	FencedCodeBlockParser, 700
   578  //	BlockquoteParser, 800
   579  //	HTMLBlockParser, 900
   580  //	ParagraphParser, 1000
   581  func DefaultBlockParsers() []util.PrioritizedValue {
   582  	return []util.PrioritizedValue{
   583  		util.Prioritized(NewSetextHeadingParser(), 100),
   584  		util.Prioritized(NewThematicBreakParser(), 200),
   585  		util.Prioritized(NewListParser(), 300),
   586  		util.Prioritized(NewListItemParser(), 400),
   587  		util.Prioritized(NewCodeBlockParser(), 500),
   588  		util.Prioritized(NewATXHeadingParser(), 600),
   589  		util.Prioritized(NewFencedCodeBlockParser(), 700),
   590  		util.Prioritized(NewBlockquoteParser(), 800),
   591  		util.Prioritized(NewHTMLBlockParser(), 900),
   592  		util.Prioritized(NewParagraphParser(), 1000),
   593  	}
   594  }
   595  
   596  // DefaultInlineParsers returns a new list of default InlineParsers.
   597  // Priorities of default InlineParsers are:
   598  //
   599  //	CodeSpanParser, 100
   600  //	LinkParser, 200
   601  //	AutoLinkParser, 300
   602  //	RawHTMLParser, 400
   603  //	EmphasisParser, 500
   604  func DefaultInlineParsers() []util.PrioritizedValue {
   605  	return []util.PrioritizedValue{
   606  		util.Prioritized(NewCodeSpanParser(), 100),
   607  		util.Prioritized(NewLinkParser(), 200),
   608  		util.Prioritized(NewAutoLinkParser(), 300),
   609  		util.Prioritized(NewRawHTMLParser(), 400),
   610  		util.Prioritized(NewEmphasisParser(), 500),
   611  	}
   612  }
   613  
   614  // DefaultParagraphTransformers returns a new list of default ParagraphTransformers.
   615  // Priorities of default ParagraphTransformers are:
   616  //
   617  //	LinkReferenceParagraphTransformer, 100
   618  func DefaultParagraphTransformers() []util.PrioritizedValue {
   619  	return []util.PrioritizedValue{
   620  		util.Prioritized(LinkReferenceParagraphTransformer, 100),
   621  	}
   622  }
   623  
   624  // A Block struct holds a node and correspond parser pair.
   625  type Block struct {
   626  	// Node is a BlockNode.
   627  	Node ast.Node
   628  	// Parser is a BlockParser.
   629  	Parser BlockParser
   630  }
   631  
   632  type parser struct {
   633  	options               map[OptionName]interface{}
   634  	blockParsers          [256][]BlockParser
   635  	freeBlockParsers      []BlockParser
   636  	inlineParsers         [256][]InlineParser
   637  	closeBlockers         []CloseBlocker
   638  	paragraphTransformers []ParagraphTransformer
   639  	astTransformers       []ASTTransformer
   640  	escapedSpace          bool
   641  	config                *Config
   642  	initSync              sync.Once
   643  }
   644  
   645  type withBlockParsers struct {
   646  	value []util.PrioritizedValue
   647  }
   648  
   649  func (o *withBlockParsers) SetParserOption(c *Config) {
   650  	c.BlockParsers = append(c.BlockParsers, o.value...)
   651  }
   652  
   653  // WithBlockParsers is a functional option that allow you to add
   654  // BlockParsers to the parser.
   655  func WithBlockParsers(bs ...util.PrioritizedValue) Option {
   656  	return &withBlockParsers{bs}
   657  }
   658  
   659  type withInlineParsers struct {
   660  	value []util.PrioritizedValue
   661  }
   662  
   663  func (o *withInlineParsers) SetParserOption(c *Config) {
   664  	c.InlineParsers = append(c.InlineParsers, o.value...)
   665  }
   666  
   667  // WithInlineParsers is a functional option that allow you to add
   668  // InlineParsers to the parser.
   669  func WithInlineParsers(bs ...util.PrioritizedValue) Option {
   670  	return &withInlineParsers{bs}
   671  }
   672  
   673  type withParagraphTransformers struct {
   674  	value []util.PrioritizedValue
   675  }
   676  
   677  func (o *withParagraphTransformers) SetParserOption(c *Config) {
   678  	c.ParagraphTransformers = append(c.ParagraphTransformers, o.value...)
   679  }
   680  
   681  // WithParagraphTransformers is a functional option that allow you to add
   682  // ParagraphTransformers to the parser.
   683  func WithParagraphTransformers(ps ...util.PrioritizedValue) Option {
   684  	return &withParagraphTransformers{ps}
   685  }
   686  
   687  type withASTTransformers struct {
   688  	value []util.PrioritizedValue
   689  }
   690  
   691  func (o *withASTTransformers) SetParserOption(c *Config) {
   692  	c.ASTTransformers = append(c.ASTTransformers, o.value...)
   693  }
   694  
   695  // WithASTTransformers is a functional option that allow you to add
   696  // ASTTransformers to the parser.
   697  func WithASTTransformers(ps ...util.PrioritizedValue) Option {
   698  	return &withASTTransformers{ps}
   699  }
   700  
   701  type withEscapedSpace struct {
   702  }
   703  
   704  func (o *withEscapedSpace) SetParserOption(c *Config) {
   705  	c.EscapedSpace = true
   706  }
   707  
   708  // WithEscapedSpace is a functional option indicates that a '\' escaped half-space(0x20) should not trigger parsers.
   709  func WithEscapedSpace() Option {
   710  	return &withEscapedSpace{}
   711  }
   712  
   713  type withOption struct {
   714  	name  OptionName
   715  	value interface{}
   716  }
   717  
   718  func (o *withOption) SetParserOption(c *Config) {
   719  	c.Options[o.name] = o.value
   720  }
   721  
   722  // WithOption is a functional option that allow you to set
   723  // an arbitrary option to the parser.
   724  func WithOption(name OptionName, value interface{}) Option {
   725  	return &withOption{name, value}
   726  }
   727  
   728  // NewParser returns a new Parser with given options.
   729  func NewParser(options ...Option) Parser {
   730  	config := NewConfig()
   731  	for _, opt := range options {
   732  		opt.SetParserOption(config)
   733  	}
   734  
   735  	p := &parser{
   736  		options: map[OptionName]interface{}{},
   737  		config:  config,
   738  	}
   739  
   740  	return p
   741  }
   742  
   743  func (p *parser) AddOptions(opts ...Option) {
   744  	for _, opt := range opts {
   745  		opt.SetParserOption(p.config)
   746  	}
   747  }
   748  
   749  func (p *parser) addBlockParser(v util.PrioritizedValue, options map[OptionName]interface{}) {
   750  	bp, ok := v.Value.(BlockParser)
   751  	if !ok {
   752  		panic(fmt.Sprintf("%v is not a BlockParser", v.Value))
   753  	}
   754  	tcs := bp.Trigger()
   755  	so, ok := v.Value.(SetOptioner)
   756  	if ok {
   757  		for oname, ovalue := range options {
   758  			so.SetOption(oname, ovalue)
   759  		}
   760  	}
   761  	if tcs == nil {
   762  		p.freeBlockParsers = append(p.freeBlockParsers, bp)
   763  	} else {
   764  		for _, tc := range tcs {
   765  			if p.blockParsers[tc] == nil {
   766  				p.blockParsers[tc] = []BlockParser{}
   767  			}
   768  			p.blockParsers[tc] = append(p.blockParsers[tc], bp)
   769  		}
   770  	}
   771  }
   772  
   773  func (p *parser) addInlineParser(v util.PrioritizedValue, options map[OptionName]interface{}) {
   774  	ip, ok := v.Value.(InlineParser)
   775  	if !ok {
   776  		panic(fmt.Sprintf("%v is not a InlineParser", v.Value))
   777  	}
   778  	tcs := ip.Trigger()
   779  	so, ok := v.Value.(SetOptioner)
   780  	if ok {
   781  		for oname, ovalue := range options {
   782  			so.SetOption(oname, ovalue)
   783  		}
   784  	}
   785  	if cb, ok := ip.(CloseBlocker); ok {
   786  		p.closeBlockers = append(p.closeBlockers, cb)
   787  	}
   788  	for _, tc := range tcs {
   789  		if p.inlineParsers[tc] == nil {
   790  			p.inlineParsers[tc] = []InlineParser{}
   791  		}
   792  		p.inlineParsers[tc] = append(p.inlineParsers[tc], ip)
   793  	}
   794  }
   795  
   796  func (p *parser) addParagraphTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) {
   797  	pt, ok := v.Value.(ParagraphTransformer)
   798  	if !ok {
   799  		panic(fmt.Sprintf("%v is not a ParagraphTransformer", v.Value))
   800  	}
   801  	so, ok := v.Value.(SetOptioner)
   802  	if ok {
   803  		for oname, ovalue := range options {
   804  			so.SetOption(oname, ovalue)
   805  		}
   806  	}
   807  	p.paragraphTransformers = append(p.paragraphTransformers, pt)
   808  }
   809  
   810  func (p *parser) addASTTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) {
   811  	at, ok := v.Value.(ASTTransformer)
   812  	if !ok {
   813  		panic(fmt.Sprintf("%v is not a ASTTransformer", v.Value))
   814  	}
   815  	so, ok := v.Value.(SetOptioner)
   816  	if ok {
   817  		for oname, ovalue := range options {
   818  			so.SetOption(oname, ovalue)
   819  		}
   820  	}
   821  	p.astTransformers = append(p.astTransformers, at)
   822  }
   823  
   824  // A ParseConfig struct is a data structure that holds configuration of the Parser.Parse.
   825  type ParseConfig struct {
   826  	Context Context
   827  }
   828  
   829  // A ParseOption is a functional option type for the Parser.Parse.
   830  type ParseOption func(c *ParseConfig)
   831  
   832  // WithContext is a functional option that allow you to override
   833  // a default context.
   834  func WithContext(context Context) ParseOption {
   835  	return func(c *ParseConfig) {
   836  		c.Context = context
   837  	}
   838  }
   839  
   840  func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
   841  	p.initSync.Do(func() {
   842  		p.config.BlockParsers.Sort()
   843  		for _, v := range p.config.BlockParsers {
   844  			p.addBlockParser(v, p.config.Options)
   845  		}
   846  		for i := range p.blockParsers {
   847  			if p.blockParsers[i] != nil {
   848  				p.blockParsers[i] = append(p.blockParsers[i], p.freeBlockParsers...)
   849  			}
   850  		}
   851  
   852  		p.config.InlineParsers.Sort()
   853  		for _, v := range p.config.InlineParsers {
   854  			p.addInlineParser(v, p.config.Options)
   855  		}
   856  		p.config.ParagraphTransformers.Sort()
   857  		for _, v := range p.config.ParagraphTransformers {
   858  			p.addParagraphTransformer(v, p.config.Options)
   859  		}
   860  		p.config.ASTTransformers.Sort()
   861  		for _, v := range p.config.ASTTransformers {
   862  			p.addASTTransformer(v, p.config.Options)
   863  		}
   864  		p.escapedSpace = p.config.EscapedSpace
   865  		p.config = nil
   866  	})
   867  	c := &ParseConfig{}
   868  	for _, opt := range opts {
   869  		opt(c)
   870  	}
   871  	if c.Context == nil {
   872  		c.Context = NewContext()
   873  	}
   874  	pc := c.Context
   875  	root := ast.NewDocument()
   876  	p.parseBlocks(root, reader, pc)
   877  
   878  	blockReader := text.NewBlockReader(reader.Source(), nil)
   879  	p.walkBlock(root, func(node ast.Node) {
   880  		p.parseBlock(blockReader, node, pc)
   881  	})
   882  	for _, at := range p.astTransformers {
   883  		at.Transform(root, reader, pc)
   884  	}
   885  	// root.Dump(reader.Source(), 0)
   886  	return root
   887  }
   888  
   889  func (p *parser) transformParagraph(node *ast.Paragraph, reader text.Reader, pc Context) bool {
   890  	for _, pt := range p.paragraphTransformers {
   891  		pt.Transform(node, reader, pc)
   892  		if node.Parent() == nil {
   893  			return true
   894  		}
   895  	}
   896  	return false
   897  }
   898  
   899  func (p *parser) closeBlocks(from, to int, reader text.Reader, pc Context) {
   900  	blocks := pc.OpenedBlocks()
   901  	for i := from; i >= to; i-- {
   902  		node := blocks[i].Node
   903  		paragraph, ok := node.(*ast.Paragraph)
   904  		if ok && node.Parent() != nil {
   905  			p.transformParagraph(paragraph, reader, pc)
   906  		}
   907  		if node.Parent() != nil { // closes only if node has not been transformed
   908  			blocks[i].Parser.Close(blocks[i].Node, reader, pc)
   909  		}
   910  	}
   911  	if from == len(blocks)-1 {
   912  		blocks = blocks[0:to]
   913  	} else {
   914  		blocks = append(blocks[0:to], blocks[from+1:]...)
   915  	}
   916  	pc.SetOpenedBlocks(blocks)
   917  }
   918  
   919  type blockOpenResult int
   920  
   921  const (
   922  	paragraphContinuation blockOpenResult = iota + 1
   923  	newBlocksOpened
   924  	noBlocksOpened
   925  )
   926  
   927  func (p *parser) openBlocks(parent ast.Node, blankLine bool, reader text.Reader, pc Context) blockOpenResult {
   928  	result := blockOpenResult(noBlocksOpened)
   929  	continuable := false
   930  	lastBlock := pc.LastOpenedBlock()
   931  	if lastBlock.Node != nil {
   932  		continuable = ast.IsParagraph(lastBlock.Node)
   933  	}
   934  retry:
   935  	var bps []BlockParser
   936  	line, _ := reader.PeekLine()
   937  	w, pos := util.IndentWidth(line, reader.LineOffset())
   938  	if w >= len(line) {
   939  		pc.SetBlockOffset(-1)
   940  		pc.SetBlockIndent(-1)
   941  	} else {
   942  		pc.SetBlockOffset(pos)
   943  		pc.SetBlockIndent(w)
   944  	}
   945  	if line == nil || line[0] == '\n' {
   946  		goto continuable
   947  	}
   948  	bps = p.freeBlockParsers
   949  	if pos < len(line) {
   950  		bps = p.blockParsers[line[pos]]
   951  		if bps == nil {
   952  			bps = p.freeBlockParsers
   953  		}
   954  	}
   955  	if bps == nil {
   956  		goto continuable
   957  	}
   958  
   959  	for _, bp := range bps {
   960  		if continuable && result == noBlocksOpened && !bp.CanInterruptParagraph() {
   961  			continue
   962  		}
   963  		if w > 3 && !bp.CanAcceptIndentedLine() {
   964  			continue
   965  		}
   966  		lastBlock = pc.LastOpenedBlock()
   967  		last := lastBlock.Node
   968  		node, state := bp.Open(parent, reader, pc)
   969  		if node != nil {
   970  			// Parser requires last node to be a paragraph.
   971  			// With table extension:
   972  			//
   973  			//     0
   974  			//     -:
   975  			//     -
   976  			//
   977  			// '-' on 3rd line seems a Setext heading because 1st and 2nd lines
   978  			// are being paragraph when the Settext heading parser tries to parse the 3rd
   979  			// line.
   980  			// But 1st line and 2nd line are a table. Thus this paragraph will be transformed
   981  			// by a paragraph transformer. So this text should be converted to a table and
   982  			// an empty list.
   983  			if state&RequireParagraph != 0 {
   984  				if last == parent.LastChild() {
   985  					// Opened paragraph may be transformed by ParagraphTransformers in
   986  					// closeBlocks().
   987  					lastBlock.Parser.Close(last, reader, pc)
   988  					blocks := pc.OpenedBlocks()
   989  					pc.SetOpenedBlocks(blocks[0 : len(blocks)-1])
   990  					if p.transformParagraph(last.(*ast.Paragraph), reader, pc) {
   991  						// Paragraph has been transformed.
   992  						// So this parser is considered as failing.
   993  						continuable = false
   994  						goto retry
   995  					}
   996  				}
   997  			}
   998  			node.SetBlankPreviousLines(blankLine)
   999  			if last != nil && last.Parent() == nil {
  1000  				lastPos := len(pc.OpenedBlocks()) - 1
  1001  				p.closeBlocks(lastPos, lastPos, reader, pc)
  1002  			}
  1003  			parent.AppendChild(parent, node)
  1004  			result = newBlocksOpened
  1005  			be := Block{node, bp}
  1006  			pc.SetOpenedBlocks(append(pc.OpenedBlocks(), be))
  1007  			if state&HasChildren != 0 {
  1008  				parent = node
  1009  				goto retry // try child block
  1010  			}
  1011  			break // no children, can not open more blocks on this line
  1012  		}
  1013  	}
  1014  
  1015  continuable:
  1016  	if result == noBlocksOpened && continuable {
  1017  		state := lastBlock.Parser.Continue(lastBlock.Node, reader, pc)
  1018  		if state&Continue != 0 {
  1019  			result = paragraphContinuation
  1020  		}
  1021  	}
  1022  	return result
  1023  }
  1024  
  1025  type lineStat struct {
  1026  	lineNum int
  1027  	level   int
  1028  	isBlank bool
  1029  }
  1030  
  1031  func isBlankLine(lineNum, level int, stats []lineStat) bool {
  1032  	ret := true
  1033  	for i := len(stats) - 1 - level; i >= 0; i-- {
  1034  		ret = false
  1035  		s := stats[i]
  1036  		if s.lineNum == lineNum {
  1037  			if s.level < level && s.isBlank {
  1038  				return true
  1039  			} else if s.level == level {
  1040  				return s.isBlank
  1041  			}
  1042  		}
  1043  		if s.lineNum < lineNum {
  1044  			return ret
  1045  		}
  1046  	}
  1047  	return ret
  1048  }
  1049  
  1050  func (p *parser) parseBlocks(parent ast.Node, reader text.Reader, pc Context) {
  1051  	pc.SetOpenedBlocks([]Block{})
  1052  	blankLines := make([]lineStat, 0, 128)
  1053  	var isBlank bool
  1054  	for { // process blocks separated by blank lines
  1055  		_, lines, ok := reader.SkipBlankLines()
  1056  		if !ok {
  1057  			return
  1058  		}
  1059  		lineNum, _ := reader.Position()
  1060  		if lines != 0 {
  1061  			blankLines = blankLines[0:0]
  1062  			l := len(pc.OpenedBlocks())
  1063  			for i := 0; i < l; i++ {
  1064  				blankLines = append(blankLines, lineStat{lineNum - 1, i, lines != 0})
  1065  			}
  1066  		}
  1067  		isBlank = isBlankLine(lineNum-1, 0, blankLines)
  1068  		// first, we try to open blocks
  1069  		if p.openBlocks(parent, isBlank, reader, pc) != newBlocksOpened {
  1070  			return
  1071  		}
  1072  		reader.AdvanceLine()
  1073  		for { // process opened blocks line by line
  1074  			openedBlocks := pc.OpenedBlocks()
  1075  			l := len(openedBlocks)
  1076  			if l == 0 {
  1077  				break
  1078  			}
  1079  			lastIndex := l - 1
  1080  			for i := 0; i < l; i++ {
  1081  				be := openedBlocks[i]
  1082  				line, _ := reader.PeekLine()
  1083  				if line == nil {
  1084  					p.closeBlocks(lastIndex, 0, reader, pc)
  1085  					reader.AdvanceLine()
  1086  					return
  1087  				}
  1088  				lineNum, _ := reader.Position()
  1089  				blankLines = append(blankLines, lineStat{lineNum, i, util.IsBlank(line)})
  1090  				// If node is a paragraph, p.openBlocks determines whether it is continuable.
  1091  				// So we do not process paragraphs here.
  1092  				if !ast.IsParagraph(be.Node) {
  1093  					state := be.Parser.Continue(be.Node, reader, pc)
  1094  					if state&Continue != 0 {
  1095  						// When current node is a container block and has no children,
  1096  						// we try to open new child nodes
  1097  						if state&HasChildren != 0 && i == lastIndex {
  1098  							isBlank = isBlankLine(lineNum-1, i, blankLines)
  1099  							p.openBlocks(be.Node, isBlank, reader, pc)
  1100  							break
  1101  						}
  1102  						continue
  1103  					}
  1104  				}
  1105  				// current node may be closed or lazy continuation
  1106  				isBlank = isBlankLine(lineNum-1, i, blankLines)
  1107  				thisParent := parent
  1108  				if i != 0 {
  1109  					thisParent = openedBlocks[i-1].Node
  1110  				}
  1111  				lastNode := openedBlocks[lastIndex].Node
  1112  				result := p.openBlocks(thisParent, isBlank, reader, pc)
  1113  				if result != paragraphContinuation {
  1114  					// lastNode is a paragraph and was transformed by the paragraph
  1115  					// transformers.
  1116  					if openedBlocks[lastIndex].Node != lastNode {
  1117  						lastIndex--
  1118  					}
  1119  					p.closeBlocks(lastIndex, i, reader, pc)
  1120  				}
  1121  				break
  1122  			}
  1123  
  1124  			reader.AdvanceLine()
  1125  		}
  1126  	}
  1127  }
  1128  
  1129  func (p *parser) walkBlock(block ast.Node, cb func(node ast.Node)) {
  1130  	for c := block.FirstChild(); c != nil; c = c.NextSibling() {
  1131  		p.walkBlock(c, cb)
  1132  	}
  1133  	cb(block)
  1134  }
  1135  
  1136  const (
  1137  	lineBreakHard uint8 = 1 << iota
  1138  	lineBreakSoft
  1139  	lineBreakVisible
  1140  )
  1141  
  1142  func (p *parser) parseBlock(block text.BlockReader, parent ast.Node, pc Context) {
  1143  	if parent.IsRaw() {
  1144  		return
  1145  	}
  1146  	escaped := false
  1147  	source := block.Source()
  1148  	block.Reset(parent.Lines())
  1149  	for {
  1150  	retry:
  1151  		line, _ := block.PeekLine()
  1152  		if line == nil {
  1153  			break
  1154  		}
  1155  		lineLength := len(line)
  1156  		var lineBreakFlags uint8
  1157  		hasNewLine := line[lineLength-1] == '\n'
  1158  		if ((lineLength >= 3 && line[lineLength-2] == '\\' &&
  1159  			line[lineLength-3] != '\\') || (lineLength == 2 && line[lineLength-2] == '\\')) && hasNewLine { // ends with \\n
  1160  			lineLength -= 2
  1161  			lineBreakFlags |= lineBreakHard | lineBreakVisible
  1162  		} else if ((lineLength >= 4 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r' &&
  1163  			line[lineLength-4] != '\\') || (lineLength == 3 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r')) &&
  1164  			hasNewLine { // ends with \\r\n
  1165  			lineLength -= 3
  1166  			lineBreakFlags |= lineBreakHard | lineBreakVisible
  1167  		} else if lineLength >= 3 && line[lineLength-3] == ' ' && line[lineLength-2] == ' ' &&
  1168  			hasNewLine { // ends with [space][space]\n
  1169  			lineLength -= 3
  1170  			lineBreakFlags |= lineBreakHard
  1171  		} else if lineLength >= 4 && line[lineLength-4] == ' ' && line[lineLength-3] == ' ' &&
  1172  			line[lineLength-2] == '\r' && hasNewLine { // ends with [space][space]\r\n
  1173  			lineLength -= 4
  1174  			lineBreakFlags |= lineBreakHard
  1175  		} else if hasNewLine {
  1176  			// If the line ends with a newline character, but it is not a hardlineBreak, then it is a softLinebreak
  1177  			// If the line ends with a hardlineBreak, then it cannot end with a softLinebreak
  1178  			// See https://spec.commonmark.org/0.30/#soft-line-breaks
  1179  			lineBreakFlags |= lineBreakSoft
  1180  		}
  1181  
  1182  		l, startPosition := block.Position()
  1183  		n := 0
  1184  		for i := 0; i < lineLength; i++ {
  1185  			c := line[i]
  1186  			if c == '\n' {
  1187  				break
  1188  			}
  1189  			isSpace := util.IsSpace(c) && c != '\r' && c != '\n'
  1190  			isPunct := util.IsPunct(c)
  1191  			if (isPunct && !escaped) || isSpace && !(escaped && p.escapedSpace) || i == 0 {
  1192  				parserChar := c
  1193  				if isSpace || (i == 0 && !isPunct) {
  1194  					parserChar = ' '
  1195  				}
  1196  				ips := p.inlineParsers[parserChar]
  1197  				if ips != nil {
  1198  					block.Advance(n)
  1199  					n = 0
  1200  					savedLine, savedPosition := block.Position()
  1201  					if i != 0 {
  1202  						_, currentPosition := block.Position()
  1203  						ast.MergeOrAppendTextSegment(parent, startPosition.Between(currentPosition))
  1204  						_, startPosition = block.Position()
  1205  					}
  1206  					var inlineNode ast.Node
  1207  					for _, ip := range ips {
  1208  						inlineNode = ip.Parse(parent, block, pc)
  1209  						if inlineNode != nil {
  1210  							break
  1211  						}
  1212  						block.SetPosition(savedLine, savedPosition)
  1213  					}
  1214  					if inlineNode != nil {
  1215  						parent.AppendChild(parent, inlineNode)
  1216  						goto retry
  1217  					}
  1218  				}
  1219  			}
  1220  			if escaped {
  1221  				escaped = false
  1222  				n++
  1223  				continue
  1224  			}
  1225  
  1226  			if c == '\\' {
  1227  				escaped = true
  1228  				n++
  1229  				continue
  1230  			}
  1231  
  1232  			escaped = false
  1233  			n++
  1234  		}
  1235  		if n != 0 {
  1236  			block.Advance(n)
  1237  		}
  1238  		currentL, currentPosition := block.Position()
  1239  		if l != currentL {
  1240  			continue
  1241  		}
  1242  		diff := startPosition.Between(currentPosition)
  1243  		var text *ast.Text
  1244  		if lineBreakFlags&(lineBreakHard|lineBreakVisible) == lineBreakHard|lineBreakVisible {
  1245  			text = ast.NewTextSegment(diff)
  1246  		} else {
  1247  			text = ast.NewTextSegment(diff.TrimRightSpace(source))
  1248  		}
  1249  		text.SetSoftLineBreak(lineBreakFlags&lineBreakSoft != 0)
  1250  		text.SetHardLineBreak(lineBreakFlags&lineBreakHard != 0)
  1251  		parent.AppendChild(parent, text)
  1252  		block.AdvanceLine()
  1253  	}
  1254  
  1255  	ProcessDelimiters(nil, pc)
  1256  	for _, ip := range p.closeBlockers {
  1257  		ip.CloseBlock(parent, block, pc)
  1258  	}
  1259  }
  1260  

View as plain text