markdown.go

Documentation: github.com/russross/blackfriday/v2

     1  // Blackfriday Markdown Processor
     2  // Available at http://github.com/russross/blackfriday
     3  //
     4  // Copyright © 2011 Russ Ross <russ@russross.com>.
     5  // Distributed under the Simplified BSD License.
     6  // See README.md for details.
     7  
     8  package blackfriday
     9  
    10  import (
    11  	"bytes"
    12  	"fmt"
    13  	"io"
    14  	"strings"
    15  	"unicode/utf8"
    16  )
    17  
    18  //
    19  // Markdown parsing and processing
    20  //
    21  
    22  // Version string of the package. Appears in the rendered document when
    23  // CompletePage flag is on.
    24  const Version = "2.0"
    25  
    26  // Extensions is a bitwise or'ed collection of enabled Blackfriday's
    27  // extensions.
    28  type Extensions int
    29  
    30  // These are the supported markdown parsing extensions.
    31  // OR these values together to select multiple extensions.
    32  const (
    33  	NoExtensions           Extensions = 0
    34  	NoIntraEmphasis        Extensions = 1 << iota // Ignore emphasis markers inside words
    35  	Tables                                        // Render tables
    36  	FencedCode                                    // Render fenced code blocks
    37  	Autolink                                      // Detect embedded URLs that are not explicitly marked
    38  	Strikethrough                                 // Strikethrough text using ~~test~~
    39  	LaxHTMLBlocks                                 // Loosen up HTML block parsing rules
    40  	SpaceHeadings                                 // Be strict about prefix heading rules
    41  	HardLineBreak                                 // Translate newlines into line breaks
    42  	TabSizeEight                                  // Expand tabs to eight spaces instead of four
    43  	Footnotes                                     // Pandoc-style footnotes
    44  	NoEmptyLineBeforeBlock                        // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
    45  	HeadingIDs                                    // specify heading IDs  with {#id}
    46  	Titleblock                                    // Titleblock ala pandoc
    47  	AutoHeadingIDs                                // Create the heading ID from the text
    48  	BackslashLineBreak                            // Translate trailing backslashes into line breaks
    49  	DefinitionLists                               // Render definition lists
    50  
    51  	CommonHTMLFlags HTMLFlags = UseXHTML | Smartypants |
    52  		SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes
    53  
    54  	CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode |
    55  		Autolink | Strikethrough | SpaceHeadings | HeadingIDs |
    56  		BackslashLineBreak | DefinitionLists
    57  )
    58  
    59  // ListType contains bitwise or'ed flags for list and list item objects.
    60  type ListType int
    61  
    62  // These are the possible flag values for the ListItem renderer.
    63  // Multiple flag values may be ORed together.
    64  // These are mostly of interest if you are writing a new output format.
    65  const (
    66  	ListTypeOrdered ListType = 1 << iota
    67  	ListTypeDefinition
    68  	ListTypeTerm
    69  
    70  	ListItemContainsBlock
    71  	ListItemBeginningOfList // TODO: figure out if this is of any use now
    72  	ListItemEndOfList
    73  )
    74  
    75  // CellAlignFlags holds a type of alignment in a table cell.
    76  type CellAlignFlags int
    77  
    78  // These are the possible flag values for the table cell renderer.
    79  // Only a single one of these values will be used; they are not ORed together.
    80  // These are mostly of interest if you are writing a new output format.
    81  const (
    82  	TableAlignmentLeft CellAlignFlags = 1 << iota
    83  	TableAlignmentRight
    84  	TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight)
    85  )
    86  
    87  // The size of a tab stop.
    88  const (
    89  	TabSizeDefault = 4
    90  	TabSizeDouble  = 8
    91  )
    92  
    93  // blockTags is a set of tags that are recognized as HTML block tags.
    94  // Any of these can be included in markdown text without special escaping.
    95  var blockTags = map[string]struct{}{
    96  	"blockquote": {},
    97  	"del":        {},
    98  	"div":        {},
    99  	"dl":         {},
   100  	"fieldset":   {},
   101  	"form":       {},
   102  	"h1":         {},
   103  	"h2":         {},
   104  	"h3":         {},
   105  	"h4":         {},
   106  	"h5":         {},
   107  	"h6":         {},
   108  	"iframe":     {},
   109  	"ins":        {},
   110  	"math":       {},
   111  	"noscript":   {},
   112  	"ol":         {},
   113  	"pre":        {},
   114  	"p":          {},
   115  	"script":     {},
   116  	"style":      {},
   117  	"table":      {},
   118  	"ul":         {},
   119  
   120  	// HTML5
   121  	"address":    {},
   122  	"article":    {},
   123  	"aside":      {},
   124  	"canvas":     {},
   125  	"figcaption": {},
   126  	"figure":     {},
   127  	"footer":     {},
   128  	"header":     {},
   129  	"hgroup":     {},
   130  	"main":       {},
   131  	"nav":        {},
   132  	"output":     {},
   133  	"progress":   {},
   134  	"section":    {},
   135  	"video":      {},
   136  }
   137  
   138  // Renderer is the rendering interface. This is mostly of interest if you are
   139  // implementing a new rendering format.
   140  //
   141  // Only an HTML implementation is provided in this repository, see the README
   142  // for external implementations.
   143  type Renderer interface {
   144  	// RenderNode is the main rendering method. It will be called once for
   145  	// every leaf node and twice for every non-leaf node (first with
   146  	// entering=true, then with entering=false). The method should write its
   147  	// rendition of the node to the supplied writer w.
   148  	RenderNode(w io.Writer, node *Node, entering bool) WalkStatus
   149  
   150  	// RenderHeader is a method that allows the renderer to produce some
   151  	// content preceding the main body of the output document. The header is
   152  	// understood in the broad sense here. For example, the default HTML
   153  	// renderer will write not only the HTML document preamble, but also the
   154  	// table of contents if it was requested.
   155  	//
   156  	// The method will be passed an entire document tree, in case a particular
   157  	// implementation needs to inspect it to produce output.
   158  	//
   159  	// The output should be written to the supplied writer w. If your
   160  	// implementation has no header to write, supply an empty implementation.
   161  	RenderHeader(w io.Writer, ast *Node)
   162  
   163  	// RenderFooter is a symmetric counterpart of RenderHeader.
   164  	RenderFooter(w io.Writer, ast *Node)
   165  }
   166  
   167  // Callback functions for inline parsing. One such function is defined
   168  // for each character that triggers a response when parsing inline data.
   169  type inlineParser func(p *Markdown, data []byte, offset int) (int, *Node)
   170  
   171  // Markdown is a type that holds extensions and the runtime state used by
   172  // Parse, and the renderer. You can not use it directly, construct it with New.
   173  type Markdown struct {
   174  	renderer          Renderer
   175  	referenceOverride ReferenceOverrideFunc
   176  	refs              map[string]*reference
   177  	inlineCallback    [256]inlineParser
   178  	extensions        Extensions
   179  	nesting           int
   180  	maxNesting        int
   181  	insideLink        bool
   182  
   183  	// Footnotes need to be ordered as well as available to quickly check for
   184  	// presence. If a ref is also a footnote, it's stored both in refs and here
   185  	// in notes. Slice is nil if footnotes not enabled.
   186  	notes []*reference
   187  
   188  	doc                  *Node
   189  	tip                  *Node // = doc
   190  	oldTip               *Node
   191  	lastMatchedContainer *Node // = doc
   192  	allClosed            bool
   193  }
   194  
   195  func (p *Markdown) getRef(refid string) (ref *reference, found bool) {
   196  	if p.referenceOverride != nil {
   197  		r, overridden := p.referenceOverride(refid)
   198  		if overridden {
   199  			if r == nil {
   200  				return nil, false
   201  			}
   202  			return &reference{
   203  				link:     []byte(r.Link),
   204  				title:    []byte(r.Title),
   205  				noteID:   0,
   206  				hasBlock: false,
   207  				text:     []byte(r.Text)}, true
   208  		}
   209  	}
   210  	// refs are case insensitive
   211  	ref, found = p.refs[strings.ToLower(refid)]
   212  	return ref, found
   213  }
   214  
   215  func (p *Markdown) finalize(block *Node) {
   216  	above := block.Parent
   217  	block.open = false
   218  	p.tip = above
   219  }
   220  
   221  func (p *Markdown) addChild(node NodeType, offset uint32) *Node {
   222  	return p.addExistingChild(NewNode(node), offset)
   223  }
   224  
   225  func (p *Markdown) addExistingChild(node *Node, offset uint32) *Node {
   226  	for !p.tip.canContain(node.Type) {
   227  		p.finalize(p.tip)
   228  	}
   229  	p.tip.AppendChild(node)
   230  	p.tip = node
   231  	return node
   232  }
   233  
   234  func (p *Markdown) closeUnmatchedBlocks() {
   235  	if !p.allClosed {
   236  		for p.oldTip != p.lastMatchedContainer {
   237  			parent := p.oldTip.Parent
   238  			p.finalize(p.oldTip)
   239  			p.oldTip = parent
   240  		}
   241  		p.allClosed = true
   242  	}
   243  }
   244  
   245  //
   246  //
   247  // Public interface
   248  //
   249  //
   250  
   251  // Reference represents the details of a link.
   252  // See the documentation in Options for more details on use-case.
   253  type Reference struct {
   254  	// Link is usually the URL the reference points to.
   255  	Link string
   256  	// Title is the alternate text describing the link in more detail.
   257  	Title string
   258  	// Text is the optional text to override the ref with if the syntax used was
   259  	// [refid][]
   260  	Text string
   261  }
   262  
   263  // ReferenceOverrideFunc is expected to be called with a reference string and
   264  // return either a valid Reference type that the reference string maps to or
   265  // nil. If overridden is false, the default reference logic will be executed.
   266  // See the documentation in Options for more details on use-case.
   267  type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
   268  
   269  // New constructs a Markdown processor. You can use the same With* functions as
   270  // for Run() to customize parser's behavior and the renderer.
   271  func New(opts ...Option) *Markdown {
   272  	var p Markdown
   273  	for _, opt := range opts {
   274  		opt(&p)
   275  	}
   276  	p.refs = make(map[string]*reference)
   277  	p.maxNesting = 16
   278  	p.insideLink = false
   279  	docNode := NewNode(Document)
   280  	p.doc = docNode
   281  	p.tip = docNode
   282  	p.oldTip = docNode
   283  	p.lastMatchedContainer = docNode
   284  	p.allClosed = true
   285  	// register inline parsers
   286  	p.inlineCallback[' '] = maybeLineBreak
   287  	p.inlineCallback['*'] = emphasis
   288  	p.inlineCallback['_'] = emphasis
   289  	if p.extensions&Strikethrough != 0 {
   290  		p.inlineCallback['~'] = emphasis
   291  	}
   292  	p.inlineCallback['`'] = codeSpan
   293  	p.inlineCallback['\n'] = lineBreak
   294  	p.inlineCallback['['] = link
   295  	p.inlineCallback['<'] = leftAngle
   296  	p.inlineCallback['\\'] = escape
   297  	p.inlineCallback['&'] = entity
   298  	p.inlineCallback['!'] = maybeImage
   299  	p.inlineCallback['^'] = maybeInlineFootnote
   300  	if p.extensions&Autolink != 0 {
   301  		p.inlineCallback['h'] = maybeAutoLink
   302  		p.inlineCallback['m'] = maybeAutoLink
   303  		p.inlineCallback['f'] = maybeAutoLink
   304  		p.inlineCallback['H'] = maybeAutoLink
   305  		p.inlineCallback['M'] = maybeAutoLink
   306  		p.inlineCallback['F'] = maybeAutoLink
   307  	}
   308  	if p.extensions&Footnotes != 0 {
   309  		p.notes = make([]*reference, 0)
   310  	}
   311  	return &p
   312  }
   313  
   314  // Option customizes the Markdown processor's default behavior.
   315  type Option func(*Markdown)
   316  
   317  // WithRenderer allows you to override the default renderer.
   318  func WithRenderer(r Renderer) Option {
   319  	return func(p *Markdown) {
   320  		p.renderer = r
   321  	}
   322  }
   323  
   324  // WithExtensions allows you to pick some of the many extensions provided by
   325  // Blackfriday. You can bitwise OR them.
   326  func WithExtensions(e Extensions) Option {
   327  	return func(p *Markdown) {
   328  		p.extensions = e
   329  	}
   330  }
   331  
   332  // WithNoExtensions turns off all extensions and custom behavior.
   333  func WithNoExtensions() Option {
   334  	return func(p *Markdown) {
   335  		p.extensions = NoExtensions
   336  		p.renderer = NewHTMLRenderer(HTMLRendererParameters{
   337  			Flags: HTMLFlagsNone,
   338  		})
   339  	}
   340  }
   341  
   342  // WithRefOverride sets an optional function callback that is called every
   343  // time a reference is resolved.
   344  //
   345  // In Markdown, the link reference syntax can be made to resolve a link to
   346  // a reference instead of an inline URL, in one of the following ways:
   347  //
   348  //  * [link text][refid]
   349  //  * [refid][]
   350  //
   351  // Usually, the refid is defined at the bottom of the Markdown document. If
   352  // this override function is provided, the refid is passed to the override
   353  // function first, before consulting the defined refids at the bottom. If
   354  // the override function indicates an override did not occur, the refids at
   355  // the bottom will be used to fill in the link details.
   356  func WithRefOverride(o ReferenceOverrideFunc) Option {
   357  	return func(p *Markdown) {
   358  		p.referenceOverride = o
   359  	}
   360  }
   361  
   362  // Run is the main entry point to Blackfriday. It parses and renders a
   363  // block of markdown-encoded text.
   364  //
   365  // The simplest invocation of Run takes one argument, input:
   366  //     output := Run(input)
   367  // This will parse the input with CommonExtensions enabled and render it with
   368  // the default HTMLRenderer (with CommonHTMLFlags).
   369  //
   370  // Variadic arguments opts can customize the default behavior. Since Markdown
   371  // type does not contain exported fields, you can not use it directly. Instead,
   372  // use the With* functions. For example, this will call the most basic
   373  // functionality, with no extensions:
   374  //     output := Run(input, WithNoExtensions())
   375  //
   376  // You can use any number of With* arguments, even contradicting ones. They
   377  // will be applied in order of appearance and the latter will override the
   378  // former:
   379  //     output := Run(input, WithNoExtensions(), WithExtensions(exts),
   380  //         WithRenderer(yourRenderer))
   381  func Run(input []byte, opts ...Option) []byte {
   382  	r := NewHTMLRenderer(HTMLRendererParameters{
   383  		Flags: CommonHTMLFlags,
   384  	})
   385  	optList := []Option{WithRenderer(r), WithExtensions(CommonExtensions)}
   386  	optList = append(optList, opts...)
   387  	parser := New(optList...)
   388  	ast := parser.Parse(input)
   389  	var buf bytes.Buffer
   390  	parser.renderer.RenderHeader(&buf, ast)
   391  	ast.Walk(func(node *Node, entering bool) WalkStatus {
   392  		return parser.renderer.RenderNode(&buf, node, entering)
   393  	})
   394  	parser.renderer.RenderFooter(&buf, ast)
   395  	return buf.Bytes()
   396  }
   397  
   398  // Parse is an entry point to the parsing part of Blackfriday. It takes an
   399  // input markdown document and produces a syntax tree for its contents. This
   400  // tree can then be rendered with a default or custom renderer, or
   401  // analyzed/transformed by the caller to whatever non-standard needs they have.
   402  // The return value is the root node of the syntax tree.
   403  func (p *Markdown) Parse(input []byte) *Node {
   404  	p.block(input)
   405  	// Walk the tree and finish up some of unfinished blocks
   406  	for p.tip != nil {
   407  		p.finalize(p.tip)
   408  	}
   409  	// Walk the tree again and process inline markdown in each block
   410  	p.doc.Walk(func(node *Node, entering bool) WalkStatus {
   411  		if node.Type == Paragraph || node.Type == Heading || node.Type == TableCell {
   412  			p.inline(node, node.content)
   413  			node.content = nil
   414  		}
   415  		return GoToNext
   416  	})
   417  	p.parseRefsToAST()
   418  	return p.doc
   419  }
   420  
   421  func (p *Markdown) parseRefsToAST() {
   422  	if p.extensions&Footnotes == 0 || len(p.notes) == 0 {
   423  		return
   424  	}
   425  	p.tip = p.doc
   426  	block := p.addBlock(List, nil)
   427  	block.IsFootnotesList = true
   428  	block.ListFlags = ListTypeOrdered
   429  	flags := ListItemBeginningOfList
   430  	// Note: this loop is intentionally explicit, not range-form. This is
   431  	// because the body of the loop will append nested footnotes to p.notes and
   432  	// we need to process those late additions. Range form would only walk over
   433  	// the fixed initial set.
   434  	for i := 0; i < len(p.notes); i++ {
   435  		ref := p.notes[i]
   436  		p.addExistingChild(ref.footnote, 0)
   437  		block := ref.footnote
   438  		block.ListFlags = flags | ListTypeOrdered
   439  		block.RefLink = ref.link
   440  		if ref.hasBlock {
   441  			flags |= ListItemContainsBlock
   442  			p.block(ref.title)
   443  		} else {
   444  			p.inline(block, ref.title)
   445  		}
   446  		flags &^= ListItemBeginningOfList | ListItemContainsBlock
   447  	}
   448  	above := block.Parent
   449  	finalizeList(block)
   450  	p.tip = above
   451  	block.Walk(func(node *Node, entering bool) WalkStatus {
   452  		if node.Type == Paragraph || node.Type == Heading {
   453  			p.inline(node, node.content)
   454  			node.content = nil
   455  		}
   456  		return GoToNext
   457  	})
   458  }
   459  
   460  //
   461  // Link references
   462  //
   463  // This section implements support for references that (usually) appear
   464  // as footnotes in a document, and can be referenced anywhere in the document.
   465  // The basic format is:
   466  //
   467  //    [1]: http://www.google.com/ "Google"
   468  //    [2]: http://www.github.com/ "Github"
   469  //
   470  // Anywhere in the document, the reference can be linked by referring to its
   471  // label, i.e., 1 and 2 in this example, as in:
   472  //
   473  //    This library is hosted on [Github][2], a git hosting site.
   474  //
   475  // Actual footnotes as specified in Pandoc and supported by some other Markdown
   476  // libraries such as php-markdown are also taken care of. They look like this:
   477  //
   478  //    This sentence needs a bit of further explanation.[^note]
   479  //
   480  //    [^note]: This is the explanation.
   481  //
   482  // Footnotes should be placed at the end of the document in an ordered list.
   483  // Finally, there are inline footnotes such as:
   484  //
   485  //    Inline footnotes^[Also supported.] provide a quick inline explanation,
   486  //    but are rendered at the bottom of the document.
   487  //
   488  
   489  // reference holds all information necessary for a reference-style links or
   490  // footnotes.
   491  //
   492  // Consider this markdown with reference-style links:
   493  //
   494  //     [link][ref]
   495  //
   496  //     [ref]: /url/ "tooltip title"
   497  //
   498  // It will be ultimately converted to this HTML:
   499  //
   500  //     <p><a href=\"/url/\" title=\"title\">link</a></p>
   501  //
   502  // And a reference structure will be populated as follows:
   503  //
   504  //     p.refs["ref"] = &reference{
   505  //         link: "/url/",
   506  //         title: "tooltip title",
   507  //     }
   508  //
   509  // Alternatively, reference can contain information about a footnote. Consider
   510  // this markdown:
   511  //
   512  //     Text needing a footnote.[^a]
   513  //
   514  //     [^a]: This is the note
   515  //
   516  // A reference structure will be populated as follows:
   517  //
   518  //     p.refs["a"] = &reference{
   519  //         link: "a",
   520  //         title: "This is the note",
   521  //         noteID: <some positive int>,
   522  //     }
   523  //
   524  // TODO: As you can see, it begs for splitting into two dedicated structures
   525  // for refs and for footnotes.
   526  type reference struct {
   527  	link     []byte
   528  	title    []byte
   529  	noteID   int // 0 if not a footnote ref
   530  	hasBlock bool
   531  	footnote *Node // a link to the Item node within a list of footnotes
   532  
   533  	text []byte // only gets populated by refOverride feature with Reference.Text
   534  }
   535  
   536  func (r *reference) String() string {
   537  	return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}",
   538  		r.link, r.title, r.text, r.noteID, r.hasBlock)
   539  }
   540  
   541  // Check whether or not data starts with a reference link.
   542  // If so, it is parsed and stored in the list of references
   543  // (in the render struct).
   544  // Returns the number of bytes to skip to move past it,
   545  // or zero if the first line is not a reference.
   546  func isReference(p *Markdown, data []byte, tabSize int) int {
   547  	// up to 3 optional leading spaces
   548  	if len(data) < 4 {
   549  		return 0
   550  	}
   551  	i := 0
   552  	for i < 3 && data[i] == ' ' {
   553  		i++
   554  	}
   555  
   556  	noteID := 0
   557  
   558  	// id part: anything but a newline between brackets
   559  	if data[i] != '[' {
   560  		return 0
   561  	}
   562  	i++
   563  	if p.extensions&Footnotes != 0 {
   564  		if i < len(data) && data[i] == '^' {
   565  			// we can set it to anything here because the proper noteIds will
   566  			// be assigned later during the second pass. It just has to be != 0
   567  			noteID = 1
   568  			i++
   569  		}
   570  	}
   571  	idOffset := i
   572  	for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
   573  		i++
   574  	}
   575  	if i >= len(data) || data[i] != ']' {
   576  		return 0
   577  	}
   578  	idEnd := i
   579  	// footnotes can have empty ID, like this: [^], but a reference can not be
   580  	// empty like this: []. Break early if it's not a footnote and there's no ID
   581  	if noteID == 0 && idOffset == idEnd {
   582  		return 0
   583  	}
   584  	// spacer: colon (space | tab)* newline? (space | tab)*
   585  	i++
   586  	if i >= len(data) || data[i] != ':' {
   587  		return 0
   588  	}
   589  	i++
   590  	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
   591  		i++
   592  	}
   593  	if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
   594  		i++
   595  		if i < len(data) && data[i] == '\n' && data[i-1] == '\r' {
   596  			i++
   597  		}
   598  	}
   599  	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
   600  		i++
   601  	}
   602  	if i >= len(data) {
   603  		return 0
   604  	}
   605  
   606  	var (
   607  		linkOffset, linkEnd   int
   608  		titleOffset, titleEnd int
   609  		lineEnd               int
   610  		raw                   []byte
   611  		hasBlock              bool
   612  	)
   613  
   614  	if p.extensions&Footnotes != 0 && noteID != 0 {
   615  		linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize)
   616  		lineEnd = linkEnd
   617  	} else {
   618  		linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i)
   619  	}
   620  	if lineEnd == 0 {
   621  		return 0
   622  	}
   623  
   624  	// a valid ref has been found
   625  
   626  	ref := &reference{
   627  		noteID:   noteID,
   628  		hasBlock: hasBlock,
   629  	}
   630  
   631  	if noteID > 0 {
   632  		// reusing the link field for the id since footnotes don't have links
   633  		ref.link = data[idOffset:idEnd]
   634  		// if footnote, it's not really a title, it's the contained text
   635  		ref.title = raw
   636  	} else {
   637  		ref.link = data[linkOffset:linkEnd]
   638  		ref.title = data[titleOffset:titleEnd]
   639  	}
   640  
   641  	// id matches are case-insensitive
   642  	id := string(bytes.ToLower(data[idOffset:idEnd]))
   643  
   644  	p.refs[id] = ref
   645  
   646  	return lineEnd
   647  }
   648  
   649  func scanLinkRef(p *Markdown, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
   650  	// link: whitespace-free sequence, optionally between angle brackets
   651  	if data[i] == '<' {
   652  		i++
   653  	}
   654  	linkOffset = i
   655  	for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
   656  		i++
   657  	}
   658  	linkEnd = i
   659  	if data[linkOffset] == '<' && data[linkEnd-1] == '>' {
   660  		linkOffset++
   661  		linkEnd--
   662  	}
   663  
   664  	// optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
   665  	for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
   666  		i++
   667  	}
   668  	if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
   669  		return
   670  	}
   671  
   672  	// compute end-of-line
   673  	if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
   674  		lineEnd = i
   675  	}
   676  	if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' {
   677  		lineEnd++
   678  	}
   679  
   680  	// optional (space|tab)* spacer after a newline
   681  	if lineEnd > 0 {
   682  		i = lineEnd + 1
   683  		for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
   684  			i++
   685  		}
   686  	}
   687  
   688  	// optional title: any non-newline sequence enclosed in '"() alone on its line
   689  	if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
   690  		i++
   691  		titleOffset = i
   692  
   693  		// look for EOL
   694  		for i < len(data) && data[i] != '\n' && data[i] != '\r' {
   695  			i++
   696  		}
   697  		if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
   698  			titleEnd = i + 1
   699  		} else {
   700  			titleEnd = i
   701  		}
   702  
   703  		// step back
   704  		i--
   705  		for i > titleOffset && (data[i] == ' ' || data[i] == '\t') {
   706  			i--
   707  		}
   708  		if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
   709  			lineEnd = titleEnd
   710  			titleEnd = i
   711  		}
   712  	}
   713  
   714  	return
   715  }
   716  
   717  // The first bit of this logic is the same as Parser.listItem, but the rest
   718  // is much simpler. This function simply finds the entire block and shifts it
   719  // over by one tab if it is indeed a block (just returns the line if it's not).
   720  // blockEnd is the end of the section in the input buffer, and contents is the
   721  // extracted text that was shifted over one tab. It will need to be rendered at
   722  // the end of the document.
   723  func scanFootnote(p *Markdown, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
   724  	if i == 0 || len(data) == 0 {
   725  		return
   726  	}
   727  
   728  	// skip leading whitespace on first line
   729  	for i < len(data) && data[i] == ' ' {
   730  		i++
   731  	}
   732  
   733  	blockStart = i
   734  
   735  	// find the end of the line
   736  	blockEnd = i
   737  	for i < len(data) && data[i-1] != '\n' {
   738  		i++
   739  	}
   740  
   741  	// get working buffer
   742  	var raw bytes.Buffer
   743  
   744  	// put the first line into the working buffer
   745  	raw.Write(data[blockEnd:i])
   746  	blockEnd = i
   747  
   748  	// process the following lines
   749  	containsBlankLine := false
   750  
   751  gatherLines:
   752  	for blockEnd < len(data) {
   753  		i++
   754  
   755  		// find the end of this line
   756  		for i < len(data) && data[i-1] != '\n' {
   757  			i++
   758  		}
   759  
   760  		// if it is an empty line, guess that it is part of this item
   761  		// and move on to the next line
   762  		if p.isEmpty(data[blockEnd:i]) > 0 {
   763  			containsBlankLine = true
   764  			blockEnd = i
   765  			continue
   766  		}
   767  
   768  		n := 0
   769  		if n = isIndented(data[blockEnd:i], indentSize); n == 0 {
   770  			// this is the end of the block.
   771  			// we don't want to include this last line in the index.
   772  			break gatherLines
   773  		}
   774  
   775  		// if there were blank lines before this one, insert a new one now
   776  		if containsBlankLine {
   777  			raw.WriteByte('\n')
   778  			containsBlankLine = false
   779  		}
   780  
   781  		// get rid of that first tab, write to buffer
   782  		raw.Write(data[blockEnd+n : i])
   783  		hasBlock = true
   784  
   785  		blockEnd = i
   786  	}
   787  
   788  	if data[blockEnd-1] != '\n' {
   789  		raw.WriteByte('\n')
   790  	}
   791  
   792  	contents = raw.Bytes()
   793  
   794  	return
   795  }
   796  
   797  //
   798  //
   799  // Miscellaneous helper functions
   800  //
   801  //
   802  
   803  // Test if a character is a punctuation symbol.
   804  // Taken from a private function in regexp in the stdlib.
   805  func ispunct(c byte) bool {
   806  	for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
   807  		if c == r {
   808  			return true
   809  		}
   810  	}
   811  	return false
   812  }
   813  
   814  // Test if a character is a whitespace character.
   815  func isspace(c byte) bool {
   816  	return ishorizontalspace(c) || isverticalspace(c)
   817  }
   818  
   819  // Test if a character is a horizontal whitespace character.
   820  func ishorizontalspace(c byte) bool {
   821  	return c == ' ' || c == '\t'
   822  }
   823  
   824  // Test if a character is a vertical character.
   825  func isverticalspace(c byte) bool {
   826  	return c == '\n' || c == '\r' || c == '\f' || c == '\v'
   827  }
   828  
   829  // Test if a character is letter.
   830  func isletter(c byte) bool {
   831  	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
   832  }
   833  
   834  // Test if a character is a letter or a digit.
   835  // TODO: check when this is looking for ASCII alnum and when it should use unicode
   836  func isalnum(c byte) bool {
   837  	return (c >= '0' && c <= '9') || isletter(c)
   838  }
   839  
   840  // Replace tab characters with spaces, aligning to the next TAB_SIZE column.
   841  // always ends output with a newline
   842  func expandTabs(out *bytes.Buffer, line []byte, tabSize int) {
   843  	// first, check for common cases: no tabs, or only tabs at beginning of line
   844  	i, prefix := 0, 0
   845  	slowcase := false
   846  	for i = 0; i < len(line); i++ {
   847  		if line[i] == '\t' {
   848  			if prefix == i {
   849  				prefix++
   850  			} else {
   851  				slowcase = true
   852  				break
   853  			}
   854  		}
   855  	}
   856  
   857  	// no need to decode runes if all tabs are at the beginning of the line
   858  	if !slowcase {
   859  		for i = 0; i < prefix*tabSize; i++ {
   860  			out.WriteByte(' ')
   861  		}
   862  		out.Write(line[prefix:])
   863  		return
   864  	}
   865  
   866  	// the slow case: we need to count runes to figure out how
   867  	// many spaces to insert for each tab
   868  	column := 0
   869  	i = 0
   870  	for i < len(line) {
   871  		start := i
   872  		for i < len(line) && line[i] != '\t' {
   873  			_, size := utf8.DecodeRune(line[i:])
   874  			i += size
   875  			column++
   876  		}
   877  
   878  		if i > start {
   879  			out.Write(line[start:i])
   880  		}
   881  
   882  		if i >= len(line) {
   883  			break
   884  		}
   885  
   886  		for {
   887  			out.WriteByte(' ')
   888  			column++
   889  			if column%tabSize == 0 {
   890  				break
   891  			}
   892  		}
   893  
   894  		i++
   895  	}
   896  }
   897  
   898  // Find if a line counts as indented or not.
   899  // Returns number of characters the indent is (0 = not indented).
   900  func isIndented(data []byte, indentSize int) int {
   901  	if len(data) == 0 {
   902  		return 0
   903  	}
   904  	if data[0] == '\t' {
   905  		return 1
   906  	}
   907  	if len(data) < indentSize {
   908  		return 0
   909  	}
   910  	for i := 0; i < indentSize; i++ {
   911  		if data[i] != ' ' {
   912  			return 0
   913  		}
   914  	}
   915  	return indentSize
   916  }
   917  
   918  // Create a url-safe slug for fragments
   919  func slugify(in []byte) []byte {
   920  	if len(in) == 0 {
   921  		return in
   922  	}
   923  	out := make([]byte, 0, len(in))
   924  	sym := false
   925  
   926  	for _, ch := range in {
   927  		if isalnum(ch) {
   928  			sym = false
   929  			out = append(out, ch)
   930  		} else if sym {
   931  			continue
   932  		} else {
   933  			out = append(out, '-')
   934  			sym = true
   935  		}
   936  	}
   937  	var a, b int
   938  	var ch byte
   939  	for a, ch = range out {
   940  		if ch != '-' {
   941  			break
   942  		}
   943  	}
   944  	for b = len(out) - 1; b > 0; b-- {
   945  		if out[b] != '-' {
   946  			break
   947  		}
   948  	}
   949  	return out[a : b+1]
   950  }
   951
View as plain text