...

Source file src/github.com/yuin/goldmark/renderer/html/html.go

Documentation: github.com/yuin/goldmark/renderer/html

     1  // Package html implements renderer that outputs HTMLs.
     2  package html
     3  
     4  import (
     5  	"bytes"
     6  	"fmt"
     7  	"strconv"
     8  	"unicode"
     9  	"unicode/utf8"
    10  
    11  	"github.com/yuin/goldmark/ast"
    12  	"github.com/yuin/goldmark/renderer"
    13  	"github.com/yuin/goldmark/util"
    14  )
    15  
    16  // A Config struct has configurations for the HTML based renderers.
    17  type Config struct {
    18  	Writer              Writer
    19  	HardWraps           bool
    20  	EastAsianLineBreaks EastAsianLineBreaks
    21  	XHTML               bool
    22  	Unsafe              bool
    23  }
    24  
    25  // NewConfig returns a new Config with defaults.
    26  func NewConfig() Config {
    27  	return Config{
    28  		Writer:              DefaultWriter,
    29  		HardWraps:           false,
    30  		EastAsianLineBreaks: EastAsianLineBreaksNone,
    31  		XHTML:               false,
    32  		Unsafe:              false,
    33  	}
    34  }
    35  
    36  // SetOption implements renderer.NodeRenderer.SetOption.
    37  func (c *Config) SetOption(name renderer.OptionName, value interface{}) {
    38  	switch name {
    39  	case optHardWraps:
    40  		c.HardWraps = value.(bool)
    41  	case optEastAsianLineBreaks:
    42  		c.EastAsianLineBreaks = value.(EastAsianLineBreaks)
    43  	case optXHTML:
    44  		c.XHTML = value.(bool)
    45  	case optUnsafe:
    46  		c.Unsafe = value.(bool)
    47  	case optTextWriter:
    48  		c.Writer = value.(Writer)
    49  	}
    50  }
    51  
    52  // An Option interface sets options for HTML based renderers.
    53  type Option interface {
    54  	SetHTMLOption(*Config)
    55  }
    56  
    57  // TextWriter is an option name used in WithWriter.
    58  const optTextWriter renderer.OptionName = "Writer"
    59  
    60  type withWriter struct {
    61  	value Writer
    62  }
    63  
    64  func (o *withWriter) SetConfig(c *renderer.Config) {
    65  	c.Options[optTextWriter] = o.value
    66  }
    67  
    68  func (o *withWriter) SetHTMLOption(c *Config) {
    69  	c.Writer = o.value
    70  }
    71  
    72  // WithWriter is a functional option that allow you to set the given writer to
    73  // the renderer.
    74  func WithWriter(writer Writer) interface {
    75  	renderer.Option
    76  	Option
    77  } {
    78  	return &withWriter{writer}
    79  }
    80  
    81  // HardWraps is an option name used in WithHardWraps.
    82  const optHardWraps renderer.OptionName = "HardWraps"
    83  
    84  type withHardWraps struct {
    85  }
    86  
    87  func (o *withHardWraps) SetConfig(c *renderer.Config) {
    88  	c.Options[optHardWraps] = true
    89  }
    90  
    91  func (o *withHardWraps) SetHTMLOption(c *Config) {
    92  	c.HardWraps = true
    93  }
    94  
    95  // WithHardWraps is a functional option that indicates whether softline breaks
    96  // should be rendered as '<br>'.
    97  func WithHardWraps() interface {
    98  	renderer.Option
    99  	Option
   100  } {
   101  	return &withHardWraps{}
   102  }
   103  
   104  // EastAsianLineBreaks is an option name used in WithEastAsianLineBreaks.
   105  const optEastAsianLineBreaks renderer.OptionName = "EastAsianLineBreaks"
   106  
   107  // A EastAsianLineBreaks is a style of east asian line breaks.
   108  type EastAsianLineBreaks int
   109  
   110  const (
   111  	//EastAsianLineBreaksNone renders line breaks as it is.
   112  	EastAsianLineBreaksNone EastAsianLineBreaks = iota
   113  	// EastAsianLineBreaksSimple follows east_asian_line_breaks in Pandoc.
   114  	EastAsianLineBreaksSimple
   115  	// EastAsianLineBreaksCSS3Draft follows CSS text level3 "Segment Break Transformation Rules" with some enhancements.
   116  	EastAsianLineBreaksCSS3Draft
   117  )
   118  
   119  func (b EastAsianLineBreaks) softLineBreak(thisLastRune rune, siblingFirstRune rune) bool {
   120  	switch b {
   121  	case EastAsianLineBreaksNone:
   122  		return false
   123  	case EastAsianLineBreaksSimple:
   124  		return !(util.IsEastAsianWideRune(thisLastRune) && util.IsEastAsianWideRune(siblingFirstRune))
   125  	case EastAsianLineBreaksCSS3Draft:
   126  		return eastAsianLineBreaksCSS3DraftSoftLineBreak(thisLastRune, siblingFirstRune)
   127  	}
   128  	return false
   129  }
   130  
   131  func eastAsianLineBreaksCSS3DraftSoftLineBreak(thisLastRune rune, siblingFirstRune rune) bool {
   132  	// Implements CSS text level3 Segment Break Transformation Rules with some enhancements.
   133  	// References:
   134  	//   - https://www.w3.org/TR/2020/WD-css-text-3-20200429/#line-break-transform
   135  	//   - https://github.com/w3c/csswg-drafts/issues/5086
   136  
   137  	// Rule1:
   138  	//   If the character immediately before or immediately after the segment break is
   139  	//   the zero-width space character (U+200B), then the break is removed, leaving behind the zero-width space.
   140  	if thisLastRune == '\u200B' || siblingFirstRune == '\u200B' {
   141  		return false
   142  	}
   143  
   144  	// Rule2:
   145  	//   Otherwise, if the East Asian Width property of both the character before and after the segment break is
   146  	//   F, W, or H (not A), and neither side is Hangul, then the segment break is removed.
   147  	thisLastRuneEastAsianWidth := util.EastAsianWidth(thisLastRune)
   148  	siblingFirstRuneEastAsianWidth := util.EastAsianWidth(siblingFirstRune)
   149  	if (thisLastRuneEastAsianWidth == "F" ||
   150  		thisLastRuneEastAsianWidth == "W" ||
   151  		thisLastRuneEastAsianWidth == "H") &&
   152  		(siblingFirstRuneEastAsianWidth == "F" ||
   153  			siblingFirstRuneEastAsianWidth == "W" ||
   154  			siblingFirstRuneEastAsianWidth == "H") {
   155  		return unicode.Is(unicode.Hangul, thisLastRune) || unicode.Is(unicode.Hangul, siblingFirstRune)
   156  	}
   157  
   158  	// Rule3:
   159  	//   Otherwise, if either the character before or after the segment break belongs to
   160  	//   the space-discarding character set and it is a Unicode Punctuation (P*) or U+3000,
   161  	//   then the segment break is removed.
   162  	if util.IsSpaceDiscardingUnicodeRune(thisLastRune) ||
   163  		unicode.IsPunct(thisLastRune) ||
   164  		thisLastRune == '\u3000' ||
   165  		util.IsSpaceDiscardingUnicodeRune(siblingFirstRune) ||
   166  		unicode.IsPunct(siblingFirstRune) ||
   167  		siblingFirstRune == '\u3000' {
   168  		return false
   169  	}
   170  
   171  	// Rule4:
   172  	//   Otherwise, the segment break is converted to a space (U+0020).
   173  	return true
   174  }
   175  
   176  type withEastAsianLineBreaks struct {
   177  	eastAsianLineBreaksStyle EastAsianLineBreaks
   178  }
   179  
   180  func (o *withEastAsianLineBreaks) SetConfig(c *renderer.Config) {
   181  	c.Options[optEastAsianLineBreaks] = o.eastAsianLineBreaksStyle
   182  }
   183  
   184  func (o *withEastAsianLineBreaks) SetHTMLOption(c *Config) {
   185  	c.EastAsianLineBreaks = o.eastAsianLineBreaksStyle
   186  }
   187  
   188  // WithEastAsianLineBreaks is a functional option that indicates whether softline breaks
   189  // between east asian wide characters should be ignored.
   190  func WithEastAsianLineBreaks(e EastAsianLineBreaks) interface {
   191  	renderer.Option
   192  	Option
   193  } {
   194  	return &withEastAsianLineBreaks{e}
   195  }
   196  
   197  // XHTML is an option name used in WithXHTML.
   198  const optXHTML renderer.OptionName = "XHTML"
   199  
   200  type withXHTML struct {
   201  }
   202  
   203  func (o *withXHTML) SetConfig(c *renderer.Config) {
   204  	c.Options[optXHTML] = true
   205  }
   206  
   207  func (o *withXHTML) SetHTMLOption(c *Config) {
   208  	c.XHTML = true
   209  }
   210  
   211  // WithXHTML is a functional option indicates that nodes should be rendered in
   212  // xhtml instead of HTML5.
   213  func WithXHTML() interface {
   214  	Option
   215  	renderer.Option
   216  } {
   217  	return &withXHTML{}
   218  }
   219  
   220  // Unsafe is an option name used in WithUnsafe.
   221  const optUnsafe renderer.OptionName = "Unsafe"
   222  
   223  type withUnsafe struct {
   224  }
   225  
   226  func (o *withUnsafe) SetConfig(c *renderer.Config) {
   227  	c.Options[optUnsafe] = true
   228  }
   229  
   230  func (o *withUnsafe) SetHTMLOption(c *Config) {
   231  	c.Unsafe = true
   232  }
   233  
   234  // WithUnsafe is a functional option that renders dangerous contents
   235  // (raw htmls and potentially dangerous links) as it is.
   236  func WithUnsafe() interface {
   237  	renderer.Option
   238  	Option
   239  } {
   240  	return &withUnsafe{}
   241  }
   242  
   243  // A Renderer struct is an implementation of renderer.NodeRenderer that renders
   244  // nodes as (X)HTML.
   245  type Renderer struct {
   246  	Config
   247  }
   248  
   249  // NewRenderer returns a new Renderer with given options.
   250  func NewRenderer(opts ...Option) renderer.NodeRenderer {
   251  	r := &Renderer{
   252  		Config: NewConfig(),
   253  	}
   254  
   255  	for _, opt := range opts {
   256  		opt.SetHTMLOption(&r.Config)
   257  	}
   258  	return r
   259  }
   260  
   261  // RegisterFuncs implements NodeRenderer.RegisterFuncs .
   262  func (r *Renderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
   263  	// blocks
   264  
   265  	reg.Register(ast.KindDocument, r.renderDocument)
   266  	reg.Register(ast.KindHeading, r.renderHeading)
   267  	reg.Register(ast.KindBlockquote, r.renderBlockquote)
   268  	reg.Register(ast.KindCodeBlock, r.renderCodeBlock)
   269  	reg.Register(ast.KindFencedCodeBlock, r.renderFencedCodeBlock)
   270  	reg.Register(ast.KindHTMLBlock, r.renderHTMLBlock)
   271  	reg.Register(ast.KindList, r.renderList)
   272  	reg.Register(ast.KindListItem, r.renderListItem)
   273  	reg.Register(ast.KindParagraph, r.renderParagraph)
   274  	reg.Register(ast.KindTextBlock, r.renderTextBlock)
   275  	reg.Register(ast.KindThematicBreak, r.renderThematicBreak)
   276  
   277  	// inlines
   278  
   279  	reg.Register(ast.KindAutoLink, r.renderAutoLink)
   280  	reg.Register(ast.KindCodeSpan, r.renderCodeSpan)
   281  	reg.Register(ast.KindEmphasis, r.renderEmphasis)
   282  	reg.Register(ast.KindImage, r.renderImage)
   283  	reg.Register(ast.KindLink, r.renderLink)
   284  	reg.Register(ast.KindRawHTML, r.renderRawHTML)
   285  	reg.Register(ast.KindText, r.renderText)
   286  	reg.Register(ast.KindString, r.renderString)
   287  }
   288  
   289  func (r *Renderer) writeLines(w util.BufWriter, source []byte, n ast.Node) {
   290  	l := n.Lines().Len()
   291  	for i := 0; i < l; i++ {
   292  		line := n.Lines().At(i)
   293  		r.Writer.RawWrite(w, line.Value(source))
   294  	}
   295  }
   296  
   297  // GlobalAttributeFilter defines attribute names which any elements can have.
   298  var GlobalAttributeFilter = util.NewBytesFilter(
   299  	[]byte("accesskey"),
   300  	[]byte("autocapitalize"),
   301  	[]byte("autofocus"),
   302  	[]byte("class"),
   303  	[]byte("contenteditable"),
   304  	[]byte("dir"),
   305  	[]byte("draggable"),
   306  	[]byte("enterkeyhint"),
   307  	[]byte("hidden"),
   308  	[]byte("id"),
   309  	[]byte("inert"),
   310  	[]byte("inputmode"),
   311  	[]byte("is"),
   312  	[]byte("itemid"),
   313  	[]byte("itemprop"),
   314  	[]byte("itemref"),
   315  	[]byte("itemscope"),
   316  	[]byte("itemtype"),
   317  	[]byte("lang"),
   318  	[]byte("part"),
   319  	[]byte("role"),
   320  	[]byte("slot"),
   321  	[]byte("spellcheck"),
   322  	[]byte("style"),
   323  	[]byte("tabindex"),
   324  	[]byte("title"),
   325  	[]byte("translate"),
   326  )
   327  
   328  func (r *Renderer) renderDocument(
   329  	w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
   330  	// nothing to do
   331  	return ast.WalkContinue, nil
   332  }
   333  
   334  // HeadingAttributeFilter defines attribute names which heading elements can have.
   335  var HeadingAttributeFilter = GlobalAttributeFilter
   336  
   337  func (r *Renderer) renderHeading(
   338  	w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
   339  	n := node.(*ast.Heading)
   340  	if entering {
   341  		_, _ = w.WriteString("<h")
   342  		_ = w.WriteByte("0123456"[n.Level])
   343  		if n.Attributes() != nil {
   344  			RenderAttributes(w, node, HeadingAttributeFilter)
   345  		}
   346  		_ = w.WriteByte('>')
   347  	} else {
   348  		_, _ = w.WriteString("</h")
   349  		_ = w.WriteByte("0123456"[n.Level])
   350  		_, _ = w.WriteString(">\n")
   351  	}
   352  	return ast.WalkContinue, nil
   353  }
   354  
   355  // BlockquoteAttributeFilter defines attribute names which blockquote elements can have.
   356  var BlockquoteAttributeFilter = GlobalAttributeFilter.Extend(
   357  	[]byte("cite"),
   358  )
   359  
   360  func (r *Renderer) renderBlockquote(
   361  	w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
   362  	if entering {
   363  		if n.Attributes() != nil {
   364  			_, _ = w.WriteString("<blockquote")
   365  			RenderAttributes(w, n, BlockquoteAttributeFilter)
   366  			_ = w.WriteByte('>')
   367  		} else {
   368  			_, _ = w.WriteString("<blockquote>\n")
   369  		}
   370  	} else {
   371  		_, _ = w.WriteString("</blockquote>\n")
   372  	}
   373  	return ast.WalkContinue, nil
   374  }
   375  
   376  func (r *Renderer) renderCodeBlock(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
   377  	if entering {
   378  		_, _ = w.WriteString("<pre><code>")
   379  		r.writeLines(w, source, n)
   380  	} else {
   381  		_, _ = w.WriteString("</code></pre>\n")
   382  	}
   383  	return ast.WalkContinue, nil
   384  }
   385  
   386  func (r *Renderer) renderFencedCodeBlock(
   387  	w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
   388  	n := node.(*ast.FencedCodeBlock)
   389  	if entering {
   390  		_, _ = w.WriteString("<pre><code")
   391  		language := n.Language(source)
   392  		if language != nil {
   393  			_, _ = w.WriteString(" class=\"language-")
   394  			r.Writer.Write(w, language)
   395  			_, _ = w.WriteString("\"")
   396  		}
   397  		_ = w.WriteByte('>')
   398  		r.writeLines(w, source, n)
   399  	} else {
   400  		_, _ = w.WriteString("</code></pre>\n")
   401  	}
   402  	return ast.WalkContinue, nil
   403  }
   404  
   405  func (r *Renderer) renderHTMLBlock(
   406  	w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
   407  	n := node.(*ast.HTMLBlock)
   408  	if entering {
   409  		if r.Unsafe {
   410  			l := n.Lines().Len()
   411  			for i := 0; i < l; i++ {
   412  				line := n.Lines().At(i)
   413  				r.Writer.SecureWrite(w, line.Value(source))
   414  			}
   415  		} else {
   416  			_, _ = w.WriteString("<!-- raw HTML omitted -->\n")
   417  		}
   418  	} else {
   419  		if n.HasClosure() {
   420  			if r.Unsafe {
   421  				closure := n.ClosureLine
   422  				r.Writer.SecureWrite(w, closure.Value(source))
   423  			} else {
   424  				_, _ = w.WriteString("<!-- raw HTML omitted -->\n")
   425  			}
   426  		}
   427  	}
   428  	return ast.WalkContinue, nil
   429  }
   430  
   431  // ListAttributeFilter defines attribute names which list elements can have.
   432  var ListAttributeFilter = GlobalAttributeFilter.Extend(
   433  	[]byte("start"),
   434  	[]byte("reversed"),
   435  	[]byte("type"),
   436  )
   437  
   438  func (r *Renderer) renderList(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
   439  	n := node.(*ast.List)
   440  	tag := "ul"
   441  	if n.IsOrdered() {
   442  		tag = "ol"
   443  	}
   444  	if entering {
   445  		_ = w.WriteByte('<')
   446  		_, _ = w.WriteString(tag)
   447  		if n.IsOrdered() && n.Start != 1 {
   448  			fmt.Fprintf(w, " start=\"%d\"", n.Start)
   449  		}
   450  		if n.Attributes() != nil {
   451  			RenderAttributes(w, n, ListAttributeFilter)
   452  		}
   453  		_, _ = w.WriteString(">\n")
   454  	} else {
   455  		_, _ = w.WriteString("</")
   456  		_, _ = w.WriteString(tag)
   457  		_, _ = w.WriteString(">\n")
   458  	}
   459  	return ast.WalkContinue, nil
   460  }
   461  
   462  // ListItemAttributeFilter defines attribute names which list item elements can have.
   463  var ListItemAttributeFilter = GlobalAttributeFilter.Extend(
   464  	[]byte("value"),
   465  )
   466  
   467  func (r *Renderer) renderListItem(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
   468  	if entering {
   469  		if n.Attributes() != nil {
   470  			_, _ = w.WriteString("<li")
   471  			RenderAttributes(w, n, ListItemAttributeFilter)
   472  			_ = w.WriteByte('>')
   473  		} else {
   474  			_, _ = w.WriteString("<li>")
   475  		}
   476  		fc := n.FirstChild()
   477  		if fc != nil {
   478  			if _, ok := fc.(*ast.TextBlock); !ok {
   479  				_ = w.WriteByte('\n')
   480  			}
   481  		}
   482  	} else {
   483  		_, _ = w.WriteString("</li>\n")
   484  	}
   485  	return ast.WalkContinue, nil
   486  }
   487  
   488  // ParagraphAttributeFilter defines attribute names which paragraph elements can have.
   489  var ParagraphAttributeFilter = GlobalAttributeFilter
   490  
   491  func (r *Renderer) renderParagraph(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
   492  	if entering {
   493  		if n.Attributes() != nil {
   494  			_, _ = w.WriteString("<p")
   495  			RenderAttributes(w, n, ParagraphAttributeFilter)
   496  			_ = w.WriteByte('>')
   497  		} else {
   498  			_, _ = w.WriteString("<p>")
   499  		}
   500  	} else {
   501  		_, _ = w.WriteString("</p>\n")
   502  	}
   503  	return ast.WalkContinue, nil
   504  }
   505  
   506  func (r *Renderer) renderTextBlock(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
   507  	if !entering {
   508  		if n.NextSibling() != nil && n.FirstChild() != nil {
   509  			_ = w.WriteByte('\n')
   510  		}
   511  	}
   512  	return ast.WalkContinue, nil
   513  }
   514  
   515  // ThematicAttributeFilter defines attribute names which hr elements can have.
   516  var ThematicAttributeFilter = GlobalAttributeFilter.Extend(
   517  	[]byte("align"),   // [Deprecated]
   518  	[]byte("color"),   // [Not Standardized]
   519  	[]byte("noshade"), // [Deprecated]
   520  	[]byte("size"),    // [Deprecated]
   521  	[]byte("width"),   // [Deprecated]
   522  )
   523  
   524  func (r *Renderer) renderThematicBreak(
   525  	w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
   526  	if !entering {
   527  		return ast.WalkContinue, nil
   528  	}
   529  	_, _ = w.WriteString("<hr")
   530  	if n.Attributes() != nil {
   531  		RenderAttributes(w, n, ThematicAttributeFilter)
   532  	}
   533  	if r.XHTML {
   534  		_, _ = w.WriteString(" />\n")
   535  	} else {
   536  		_, _ = w.WriteString(">\n")
   537  	}
   538  	return ast.WalkContinue, nil
   539  }
   540  
   541  // LinkAttributeFilter defines attribute names which link elements can have.
   542  var LinkAttributeFilter = GlobalAttributeFilter.Extend(
   543  	[]byte("download"),
   544  	// []byte("href"),
   545  	[]byte("hreflang"),
   546  	[]byte("media"),
   547  	[]byte("ping"),
   548  	[]byte("referrerpolicy"),
   549  	[]byte("rel"),
   550  	[]byte("shape"),
   551  	[]byte("target"),
   552  )
   553  
   554  func (r *Renderer) renderAutoLink(
   555  	w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
   556  	n := node.(*ast.AutoLink)
   557  	if !entering {
   558  		return ast.WalkContinue, nil
   559  	}
   560  	_, _ = w.WriteString(`<a href="`)
   561  	url := n.URL(source)
   562  	label := n.Label(source)
   563  	if n.AutoLinkType == ast.AutoLinkEmail && !bytes.HasPrefix(bytes.ToLower(url), []byte("mailto:")) {
   564  		_, _ = w.WriteString("mailto:")
   565  	}
   566  	_, _ = w.Write(util.EscapeHTML(util.URLEscape(url, false)))
   567  	if n.Attributes() != nil {
   568  		_ = w.WriteByte('"')
   569  		RenderAttributes(w, n, LinkAttributeFilter)
   570  		_ = w.WriteByte('>')
   571  	} else {
   572  		_, _ = w.WriteString(`">`)
   573  	}
   574  	_, _ = w.Write(util.EscapeHTML(label))
   575  	_, _ = w.WriteString(`</a>`)
   576  	return ast.WalkContinue, nil
   577  }
   578  
   579  // CodeAttributeFilter defines attribute names which code elements can have.
   580  var CodeAttributeFilter = GlobalAttributeFilter
   581  
   582  func (r *Renderer) renderCodeSpan(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
   583  	if entering {
   584  		if n.Attributes() != nil {
   585  			_, _ = w.WriteString("<code")
   586  			RenderAttributes(w, n, CodeAttributeFilter)
   587  			_ = w.WriteByte('>')
   588  		} else {
   589  			_, _ = w.WriteString("<code>")
   590  		}
   591  		for c := n.FirstChild(); c != nil; c = c.NextSibling() {
   592  			segment := c.(*ast.Text).Segment
   593  			value := segment.Value(source)
   594  			if bytes.HasSuffix(value, []byte("\n")) {
   595  				r.Writer.RawWrite(w, value[:len(value)-1])
   596  				r.Writer.RawWrite(w, []byte(" "))
   597  			} else {
   598  				r.Writer.RawWrite(w, value)
   599  			}
   600  		}
   601  		return ast.WalkSkipChildren, nil
   602  	}
   603  	_, _ = w.WriteString("</code>")
   604  	return ast.WalkContinue, nil
   605  }
   606  
   607  // EmphasisAttributeFilter defines attribute names which emphasis elements can have.
   608  var EmphasisAttributeFilter = GlobalAttributeFilter
   609  
   610  func (r *Renderer) renderEmphasis(
   611  	w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
   612  	n := node.(*ast.Emphasis)
   613  	tag := "em"
   614  	if n.Level == 2 {
   615  		tag = "strong"
   616  	}
   617  	if entering {
   618  		_ = w.WriteByte('<')
   619  		_, _ = w.WriteString(tag)
   620  		if n.Attributes() != nil {
   621  			RenderAttributes(w, n, EmphasisAttributeFilter)
   622  		}
   623  		_ = w.WriteByte('>')
   624  	} else {
   625  		_, _ = w.WriteString("</")
   626  		_, _ = w.WriteString(tag)
   627  		_ = w.WriteByte('>')
   628  	}
   629  	return ast.WalkContinue, nil
   630  }
   631  
   632  func (r *Renderer) renderLink(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
   633  	n := node.(*ast.Link)
   634  	if entering {
   635  		_, _ = w.WriteString("<a href=\"")
   636  		if r.Unsafe || !IsDangerousURL(n.Destination) {
   637  			_, _ = w.Write(util.EscapeHTML(util.URLEscape(n.Destination, true)))
   638  		}
   639  		_ = w.WriteByte('"')
   640  		if n.Title != nil {
   641  			_, _ = w.WriteString(` title="`)
   642  			r.Writer.Write(w, n.Title)
   643  			_ = w.WriteByte('"')
   644  		}
   645  		if n.Attributes() != nil {
   646  			RenderAttributes(w, n, LinkAttributeFilter)
   647  		}
   648  		_ = w.WriteByte('>')
   649  	} else {
   650  		_, _ = w.WriteString("</a>")
   651  	}
   652  	return ast.WalkContinue, nil
   653  }
   654  
   655  // ImageAttributeFilter defines attribute names which image elements can have.
   656  var ImageAttributeFilter = GlobalAttributeFilter.Extend(
   657  	[]byte("align"),
   658  	[]byte("border"),
   659  	[]byte("crossorigin"),
   660  	[]byte("decoding"),
   661  	[]byte("height"),
   662  	[]byte("importance"),
   663  	[]byte("intrinsicsize"),
   664  	[]byte("ismap"),
   665  	[]byte("loading"),
   666  	[]byte("referrerpolicy"),
   667  	[]byte("sizes"),
   668  	[]byte("srcset"),
   669  	[]byte("usemap"),
   670  	[]byte("width"),
   671  )
   672  
   673  func (r *Renderer) renderImage(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
   674  	if !entering {
   675  		return ast.WalkContinue, nil
   676  	}
   677  	n := node.(*ast.Image)
   678  	_, _ = w.WriteString("<img src=\"")
   679  	if r.Unsafe || !IsDangerousURL(n.Destination) {
   680  		_, _ = w.Write(util.EscapeHTML(util.URLEscape(n.Destination, true)))
   681  	}
   682  	_, _ = w.WriteString(`" alt="`)
   683  	_, _ = w.Write(nodeToHTMLText(n, source))
   684  	_ = w.WriteByte('"')
   685  	if n.Title != nil {
   686  		_, _ = w.WriteString(` title="`)
   687  		r.Writer.Write(w, n.Title)
   688  		_ = w.WriteByte('"')
   689  	}
   690  	if n.Attributes() != nil {
   691  		RenderAttributes(w, n, ImageAttributeFilter)
   692  	}
   693  	if r.XHTML {
   694  		_, _ = w.WriteString(" />")
   695  	} else {
   696  		_, _ = w.WriteString(">")
   697  	}
   698  	return ast.WalkSkipChildren, nil
   699  }
   700  
   701  func (r *Renderer) renderRawHTML(
   702  	w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
   703  	if !entering {
   704  		return ast.WalkSkipChildren, nil
   705  	}
   706  	if r.Unsafe {
   707  		n := node.(*ast.RawHTML)
   708  		l := n.Segments.Len()
   709  		for i := 0; i < l; i++ {
   710  			segment := n.Segments.At(i)
   711  			_, _ = w.Write(segment.Value(source))
   712  		}
   713  		return ast.WalkSkipChildren, nil
   714  	}
   715  	_, _ = w.WriteString("<!-- raw HTML omitted -->")
   716  	return ast.WalkSkipChildren, nil
   717  }
   718  
   719  func (r *Renderer) renderText(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
   720  	if !entering {
   721  		return ast.WalkContinue, nil
   722  	}
   723  	n := node.(*ast.Text)
   724  	segment := n.Segment
   725  	if n.IsRaw() {
   726  		r.Writer.RawWrite(w, segment.Value(source))
   727  	} else {
   728  		value := segment.Value(source)
   729  		r.Writer.Write(w, value)
   730  		if n.HardLineBreak() || (n.SoftLineBreak() && r.HardWraps) {
   731  			if r.XHTML {
   732  				_, _ = w.WriteString("<br />\n")
   733  			} else {
   734  				_, _ = w.WriteString("<br>\n")
   735  			}
   736  		} else if n.SoftLineBreak() {
   737  			if r.EastAsianLineBreaks != EastAsianLineBreaksNone && len(value) != 0 {
   738  				sibling := node.NextSibling()
   739  				if sibling != nil && sibling.Kind() == ast.KindText {
   740  					if siblingText := sibling.(*ast.Text).Text(source); len(siblingText) != 0 {
   741  						thisLastRune := util.ToRune(value, len(value)-1)
   742  						siblingFirstRune, _ := utf8.DecodeRune(siblingText)
   743  						if r.EastAsianLineBreaks.softLineBreak(thisLastRune, siblingFirstRune) {
   744  							_ = w.WriteByte('\n')
   745  						}
   746  					}
   747  				}
   748  			} else {
   749  				_ = w.WriteByte('\n')
   750  			}
   751  		}
   752  	}
   753  	return ast.WalkContinue, nil
   754  }
   755  
   756  func (r *Renderer) renderString(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
   757  	if !entering {
   758  		return ast.WalkContinue, nil
   759  	}
   760  	n := node.(*ast.String)
   761  	if n.IsCode() {
   762  		_, _ = w.Write(n.Value)
   763  	} else {
   764  		if n.IsRaw() {
   765  			r.Writer.RawWrite(w, n.Value)
   766  		} else {
   767  			r.Writer.Write(w, n.Value)
   768  		}
   769  	}
   770  	return ast.WalkContinue, nil
   771  }
   772  
   773  var dataPrefix = []byte("data-")
   774  
   775  // RenderAttributes renders given node's attributes.
   776  // You can specify attribute names to render by the filter.
   777  // If filter is nil, RenderAttributes renders all attributes.
   778  func RenderAttributes(w util.BufWriter, node ast.Node, filter util.BytesFilter) {
   779  	for _, attr := range node.Attributes() {
   780  		if filter != nil && !filter.Contains(attr.Name) {
   781  			if !bytes.HasPrefix(attr.Name, dataPrefix) {
   782  				continue
   783  			}
   784  		}
   785  		_, _ = w.WriteString(" ")
   786  		_, _ = w.Write(attr.Name)
   787  		_, _ = w.WriteString(`="`)
   788  		// TODO: convert numeric values to strings
   789  		_, _ = w.Write(util.EscapeHTML(attr.Value.([]byte)))
   790  		_ = w.WriteByte('"')
   791  	}
   792  }
   793  
   794  // A Writer interface writes textual contents to a writer.
   795  type Writer interface {
   796  	// Write writes the given source to writer with resolving references and unescaping
   797  	// backslash escaped characters.
   798  	Write(writer util.BufWriter, source []byte)
   799  
   800  	// RawWrite writes the given source to writer without resolving references and
   801  	// unescaping backslash escaped characters.
   802  	RawWrite(writer util.BufWriter, source []byte)
   803  
   804  	// SecureWrite writes the given source to writer with replacing insecure characters.
   805  	SecureWrite(writer util.BufWriter, source []byte)
   806  }
   807  
   808  var replacementCharacter = []byte("\ufffd")
   809  
   810  // A WriterConfig struct has configurations for the HTML based writers.
   811  type WriterConfig struct {
   812  	// EscapedSpace is an option that indicates that a '\' escaped half-space(0x20) should not be rendered.
   813  	EscapedSpace bool
   814  }
   815  
   816  // A WriterOption interface sets options for HTML based writers.
   817  type WriterOption func(*WriterConfig)
   818  
   819  // WithEscapedSpace is a WriterOption indicates that a '\' escaped half-space(0x20) should not be rendered.
   820  func WithEscapedSpace() WriterOption {
   821  	return func(c *WriterConfig) {
   822  		c.EscapedSpace = true
   823  	}
   824  }
   825  
   826  type defaultWriter struct {
   827  	WriterConfig
   828  }
   829  
   830  // NewWriter returns a new Writer.
   831  func NewWriter(opts ...WriterOption) Writer {
   832  	w := &defaultWriter{}
   833  	for _, opt := range opts {
   834  		opt(&w.WriterConfig)
   835  	}
   836  	return w
   837  }
   838  
   839  func escapeRune(writer util.BufWriter, r rune) {
   840  	if r < 256 {
   841  		v := util.EscapeHTMLByte(byte(r))
   842  		if v != nil {
   843  			_, _ = writer.Write(v)
   844  			return
   845  		}
   846  	}
   847  	_, _ = writer.WriteRune(util.ToValidRune(r))
   848  }
   849  
   850  func (d *defaultWriter) SecureWrite(writer util.BufWriter, source []byte) {
   851  	n := 0
   852  	l := len(source)
   853  	for i := 0; i < l; i++ {
   854  		if source[i] == '\u0000' {
   855  			_, _ = writer.Write(source[i-n : i])
   856  			n = 0
   857  			_, _ = writer.Write(replacementCharacter)
   858  			continue
   859  		}
   860  		n++
   861  	}
   862  	if n != 0 {
   863  		_, _ = writer.Write(source[l-n:])
   864  	}
   865  }
   866  
   867  func (d *defaultWriter) RawWrite(writer util.BufWriter, source []byte) {
   868  	n := 0
   869  	l := len(source)
   870  	for i := 0; i < l; i++ {
   871  		v := util.EscapeHTMLByte(source[i])
   872  		if v != nil {
   873  			_, _ = writer.Write(source[i-n : i])
   874  			n = 0
   875  			_, _ = writer.Write(v)
   876  			continue
   877  		}
   878  		n++
   879  	}
   880  	if n != 0 {
   881  		_, _ = writer.Write(source[l-n:])
   882  	}
   883  }
   884  
   885  func (d *defaultWriter) Write(writer util.BufWriter, source []byte) {
   886  	escaped := false
   887  	var ok bool
   888  	limit := len(source)
   889  	n := 0
   890  	for i := 0; i < limit; i++ {
   891  		c := source[i]
   892  		if escaped {
   893  			if util.IsPunct(c) {
   894  				d.RawWrite(writer, source[n:i-1])
   895  				n = i
   896  				escaped = false
   897  				continue
   898  			}
   899  			if d.EscapedSpace && c == ' ' {
   900  				d.RawWrite(writer, source[n:i-1])
   901  				n = i + 1
   902  				escaped = false
   903  				continue
   904  			}
   905  		}
   906  		if c == '\x00' {
   907  			d.RawWrite(writer, source[n:i])
   908  			d.RawWrite(writer, replacementCharacter)
   909  			n = i + 1
   910  			escaped = false
   911  			continue
   912  		}
   913  		if c == '&' {
   914  			pos := i
   915  			next := i + 1
   916  			if next < limit && source[next] == '#' {
   917  				nnext := next + 1
   918  				if nnext < limit {
   919  					nc := source[nnext]
   920  					// code point like #x22;
   921  					if nnext < limit && nc == 'x' || nc == 'X' {
   922  						start := nnext + 1
   923  						i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsHexDecimal)
   924  						if ok && i < limit && source[i] == ';' && i-start < 7 {
   925  							v, _ := strconv.ParseUint(util.BytesToReadOnlyString(source[start:i]), 16, 32)
   926  							d.RawWrite(writer, source[n:pos])
   927  							n = i + 1
   928  							escapeRune(writer, rune(v))
   929  							continue
   930  						}
   931  						// code point like #1234;
   932  					} else if nc >= '0' && nc <= '9' {
   933  						start := nnext
   934  						i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsNumeric)
   935  						if ok && i < limit && i-start < 8 && source[i] == ';' {
   936  							v, _ := strconv.ParseUint(util.BytesToReadOnlyString(source[start:i]), 10, 32)
   937  							d.RawWrite(writer, source[n:pos])
   938  							n = i + 1
   939  							escapeRune(writer, rune(v))
   940  							continue
   941  						}
   942  					}
   943  				}
   944  			} else {
   945  				start := next
   946  				i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsAlphaNumeric)
   947  				// entity reference
   948  				if ok && i < limit && source[i] == ';' {
   949  					name := util.BytesToReadOnlyString(source[start:i])
   950  					entity, ok := util.LookUpHTML5EntityByName(name)
   951  					if ok {
   952  						d.RawWrite(writer, source[n:pos])
   953  						n = i + 1
   954  						d.RawWrite(writer, entity.Characters)
   955  						continue
   956  					}
   957  				}
   958  			}
   959  			i = next - 1
   960  		}
   961  		if c == '\\' {
   962  			escaped = true
   963  			continue
   964  		}
   965  		escaped = false
   966  	}
   967  	d.RawWrite(writer, source[n:])
   968  }
   969  
   970  // DefaultWriter is a default instance of the Writer.
   971  var DefaultWriter = NewWriter()
   972  
   973  var bDataImage = []byte("data:image/")
   974  var bPng = []byte("png;")
   975  var bGif = []byte("gif;")
   976  var bJpeg = []byte("jpeg;")
   977  var bWebp = []byte("webp;")
   978  var bSvg = []byte("svg+xml;")
   979  var bJs = []byte("javascript:")
   980  var bVb = []byte("vbscript:")
   981  var bFile = []byte("file:")
   982  var bData = []byte("data:")
   983  
   984  func hasPrefix(s, prefix []byte) bool {
   985  	return len(s) >= len(prefix) && bytes.Equal(bytes.ToLower(s[0:len(prefix)]), bytes.ToLower(prefix))
   986  }
   987  
   988  // IsDangerousURL returns true if the given url seems a potentially dangerous url,
   989  // otherwise false.
   990  func IsDangerousURL(url []byte) bool {
   991  	if hasPrefix(url, bDataImage) && len(url) >= 11 {
   992  		v := url[11:]
   993  		if hasPrefix(v, bPng) || hasPrefix(v, bGif) ||
   994  			hasPrefix(v, bJpeg) || hasPrefix(v, bWebp) ||
   995  			hasPrefix(v, bSvg) {
   996  			return false
   997  		}
   998  		return true
   999  	}
  1000  	return hasPrefix(url, bJs) || hasPrefix(url, bVb) ||
  1001  		hasPrefix(url, bFile) || hasPrefix(url, bData)
  1002  }
  1003  
  1004  func nodeToHTMLText(n ast.Node, source []byte) []byte {
  1005  	var buf bytes.Buffer
  1006  	for c := n.FirstChild(); c != nil; c = c.NextSibling() {
  1007  		if s, ok := c.(*ast.String); ok && s.IsCode() {
  1008  			buf.Write(s.Text(source))
  1009  		} else if !c.HasChildren() {
  1010  			buf.Write(util.EscapeHTML(c.Text(source)))
  1011  			if t, ok := c.(*ast.Text); ok && t.SoftLineBreak() {
  1012  				buf.WriteByte('\n')
  1013  			}
  1014  		} else {
  1015  			buf.Write(nodeToHTMLText(c, source))
  1016  		}
  1017  	}
  1018  	return buf.Bytes()
  1019  }
  1020  

View as plain text