...

Source file src/github.com/yuin/goldmark/extension/typographer.go

Documentation: github.com/yuin/goldmark/extension

     1  package extension
     2  
     3  import (
     4  	"unicode"
     5  
     6  	"github.com/yuin/goldmark"
     7  	gast "github.com/yuin/goldmark/ast"
     8  	"github.com/yuin/goldmark/parser"
     9  	"github.com/yuin/goldmark/text"
    10  	"github.com/yuin/goldmark/util"
    11  )
    12  
    13  var uncloseCounterKey = parser.NewContextKey()
    14  
    15  type unclosedCounter struct {
    16  	Single int
    17  	Double int
    18  }
    19  
    20  func (u *unclosedCounter) Reset() {
    21  	u.Single = 0
    22  	u.Double = 0
    23  }
    24  
    25  func getUnclosedCounter(pc parser.Context) *unclosedCounter {
    26  	v := pc.Get(uncloseCounterKey)
    27  	if v == nil {
    28  		v = &unclosedCounter{}
    29  		pc.Set(uncloseCounterKey, v)
    30  	}
    31  	return v.(*unclosedCounter)
    32  }
    33  
    34  // TypographicPunctuation is a key of the punctuations that can be replaced with
    35  // typographic entities.
    36  type TypographicPunctuation int
    37  
    38  const (
    39  	// LeftSingleQuote is ' .
    40  	LeftSingleQuote TypographicPunctuation = iota + 1
    41  	// RightSingleQuote is ' .
    42  	RightSingleQuote
    43  	// LeftDoubleQuote is " .
    44  	LeftDoubleQuote
    45  	// RightDoubleQuote is " .
    46  	RightDoubleQuote
    47  	// EnDash is -- .
    48  	EnDash
    49  	// EmDash is --- .
    50  	EmDash
    51  	// Ellipsis is ... .
    52  	Ellipsis
    53  	// LeftAngleQuote is << .
    54  	LeftAngleQuote
    55  	// RightAngleQuote is >> .
    56  	RightAngleQuote
    57  	// Apostrophe is ' .
    58  	Apostrophe
    59  
    60  	typographicPunctuationMax
    61  )
    62  
    63  // An TypographerConfig struct is a data structure that holds configuration of the
    64  // Typographer extension.
    65  type TypographerConfig struct {
    66  	Substitutions [][]byte
    67  }
    68  
    69  func newDefaultSubstitutions() [][]byte {
    70  	replacements := make([][]byte, typographicPunctuationMax)
    71  	replacements[LeftSingleQuote] = []byte("&lsquo;")
    72  	replacements[RightSingleQuote] = []byte("&rsquo;")
    73  	replacements[LeftDoubleQuote] = []byte("&ldquo;")
    74  	replacements[RightDoubleQuote] = []byte("&rdquo;")
    75  	replacements[EnDash] = []byte("&ndash;")
    76  	replacements[EmDash] = []byte("&mdash;")
    77  	replacements[Ellipsis] = []byte("&hellip;")
    78  	replacements[LeftAngleQuote] = []byte("&laquo;")
    79  	replacements[RightAngleQuote] = []byte("&raquo;")
    80  	replacements[Apostrophe] = []byte("&rsquo;")
    81  
    82  	return replacements
    83  }
    84  
    85  // SetOption implements SetOptioner.
    86  func (b *TypographerConfig) SetOption(name parser.OptionName, value interface{}) {
    87  	switch name {
    88  	case optTypographicSubstitutions:
    89  		b.Substitutions = value.([][]byte)
    90  	}
    91  }
    92  
    93  // A TypographerOption interface sets options for the TypographerParser.
    94  type TypographerOption interface {
    95  	parser.Option
    96  	SetTypographerOption(*TypographerConfig)
    97  }
    98  
    99  const optTypographicSubstitutions parser.OptionName = "TypographicSubstitutions"
   100  
   101  // TypographicSubstitutions is a list of the substitutions for the Typographer extension.
   102  type TypographicSubstitutions map[TypographicPunctuation][]byte
   103  
   104  type withTypographicSubstitutions struct {
   105  	value [][]byte
   106  }
   107  
   108  func (o *withTypographicSubstitutions) SetParserOption(c *parser.Config) {
   109  	c.Options[optTypographicSubstitutions] = o.value
   110  }
   111  
   112  func (o *withTypographicSubstitutions) SetTypographerOption(p *TypographerConfig) {
   113  	p.Substitutions = o.value
   114  }
   115  
   116  // WithTypographicSubstitutions is a functional otpion that specify replacement text
   117  // for punctuations.
   118  func WithTypographicSubstitutions(values map[TypographicPunctuation][]byte) TypographerOption {
   119  	replacements := newDefaultSubstitutions()
   120  	for k, v := range values {
   121  		replacements[k] = v
   122  	}
   123  
   124  	return &withTypographicSubstitutions{replacements}
   125  }
   126  
   127  type typographerDelimiterProcessor struct {
   128  }
   129  
   130  func (p *typographerDelimiterProcessor) IsDelimiter(b byte) bool {
   131  	return b == '\'' || b == '"'
   132  }
   133  
   134  func (p *typographerDelimiterProcessor) CanOpenCloser(opener, closer *parser.Delimiter) bool {
   135  	return opener.Char == closer.Char
   136  }
   137  
   138  func (p *typographerDelimiterProcessor) OnMatch(consumes int) gast.Node {
   139  	return nil
   140  }
   141  
   142  var defaultTypographerDelimiterProcessor = &typographerDelimiterProcessor{}
   143  
   144  type typographerParser struct {
   145  	TypographerConfig
   146  }
   147  
   148  // NewTypographerParser return a new InlineParser that parses
   149  // typographer expressions.
   150  func NewTypographerParser(opts ...TypographerOption) parser.InlineParser {
   151  	p := &typographerParser{
   152  		TypographerConfig: TypographerConfig{
   153  			Substitutions: newDefaultSubstitutions(),
   154  		},
   155  	}
   156  	for _, o := range opts {
   157  		o.SetTypographerOption(&p.TypographerConfig)
   158  	}
   159  	return p
   160  }
   161  
   162  func (s *typographerParser) Trigger() []byte {
   163  	return []byte{'\'', '"', '-', '.', ',', '<', '>', '*', '['}
   164  }
   165  
   166  func (s *typographerParser) Parse(parent gast.Node, block text.Reader, pc parser.Context) gast.Node {
   167  	line, _ := block.PeekLine()
   168  	c := line[0]
   169  	if len(line) > 2 {
   170  		if c == '-' {
   171  			if s.Substitutions[EmDash] != nil && line[1] == '-' && line[2] == '-' { // ---
   172  				node := gast.NewString(s.Substitutions[EmDash])
   173  				node.SetCode(true)
   174  				block.Advance(3)
   175  				return node
   176  			}
   177  		} else if c == '.' {
   178  			if s.Substitutions[Ellipsis] != nil && line[1] == '.' && line[2] == '.' { // ...
   179  				node := gast.NewString(s.Substitutions[Ellipsis])
   180  				node.SetCode(true)
   181  				block.Advance(3)
   182  				return node
   183  			}
   184  			return nil
   185  		}
   186  	}
   187  	if len(line) > 1 {
   188  		if c == '<' {
   189  			if s.Substitutions[LeftAngleQuote] != nil && line[1] == '<' { // <<
   190  				node := gast.NewString(s.Substitutions[LeftAngleQuote])
   191  				node.SetCode(true)
   192  				block.Advance(2)
   193  				return node
   194  			}
   195  			return nil
   196  		} else if c == '>' {
   197  			if s.Substitutions[RightAngleQuote] != nil && line[1] == '>' { // >>
   198  				node := gast.NewString(s.Substitutions[RightAngleQuote])
   199  				node.SetCode(true)
   200  				block.Advance(2)
   201  				return node
   202  			}
   203  			return nil
   204  		} else if s.Substitutions[EnDash] != nil && c == '-' && line[1] == '-' { // --
   205  			node := gast.NewString(s.Substitutions[EnDash])
   206  			node.SetCode(true)
   207  			block.Advance(2)
   208  			return node
   209  		}
   210  	}
   211  	if c == '\'' || c == '"' {
   212  		before := block.PrecendingCharacter()
   213  		d := parser.ScanDelimiter(line, before, 1, defaultTypographerDelimiterProcessor)
   214  		if d == nil {
   215  			return nil
   216  		}
   217  		counter := getUnclosedCounter(pc)
   218  		if c == '\'' {
   219  			if s.Substitutions[Apostrophe] != nil {
   220  				// Handle decade abbrevations such as '90s
   221  				if d.CanOpen && !d.CanClose && len(line) > 3 &&
   222  					util.IsNumeric(line[1]) && util.IsNumeric(line[2]) && line[3] == 's' {
   223  					after := rune(' ')
   224  					if len(line) > 4 {
   225  						after = util.ToRune(line, 4)
   226  					}
   227  					if len(line) == 3 || util.IsSpaceRune(after) || util.IsPunctRune(after) {
   228  						node := gast.NewString(s.Substitutions[Apostrophe])
   229  						node.SetCode(true)
   230  						block.Advance(1)
   231  						return node
   232  					}
   233  				}
   234  				// special cases: 'twas, 'em, 'net
   235  				if len(line) > 1 && (unicode.IsPunct(before) || unicode.IsSpace(before)) &&
   236  					(line[1] == 't' || line[1] == 'e' || line[1] == 'n' || line[1] == 'l') {
   237  					node := gast.NewString(s.Substitutions[Apostrophe])
   238  					node.SetCode(true)
   239  					block.Advance(1)
   240  					return node
   241  				}
   242  				// Convert normal apostrophes. This is probably more flexible than necessary but
   243  				// converts any apostrophe in between two alphanumerics.
   244  				if len(line) > 1 && (unicode.IsDigit(before) || unicode.IsLetter(before)) &&
   245  					(unicode.IsLetter(util.ToRune(line, 1))) {
   246  					node := gast.NewString(s.Substitutions[Apostrophe])
   247  					node.SetCode(true)
   248  					block.Advance(1)
   249  					return node
   250  				}
   251  			}
   252  			if s.Substitutions[LeftSingleQuote] != nil && d.CanOpen && !d.CanClose {
   253  				nt := LeftSingleQuote
   254  				// special cases: Alice's, I'm, Don't, You'd
   255  				if len(line) > 1 && (line[1] == 's' || line[1] == 'm' || line[1] == 't' || line[1] == 'd') &&
   256  					(len(line) < 3 || util.IsPunct(line[2]) || util.IsSpace(line[2])) {
   257  					nt = RightSingleQuote
   258  				}
   259  				// special cases: I've, I'll, You're
   260  				if len(line) > 2 && ((line[1] == 'v' && line[2] == 'e') ||
   261  					(line[1] == 'l' && line[2] == 'l') || (line[1] == 'r' && line[2] == 'e')) &&
   262  					(len(line) < 4 || util.IsPunct(line[3]) || util.IsSpace(line[3])) {
   263  					nt = RightSingleQuote
   264  				}
   265  				if nt == LeftSingleQuote {
   266  					counter.Single++
   267  				}
   268  
   269  				node := gast.NewString(s.Substitutions[nt])
   270  				node.SetCode(true)
   271  				block.Advance(1)
   272  				return node
   273  			}
   274  			if s.Substitutions[RightSingleQuote] != nil {
   275  				// plural possesive and abbreviations: Smiths', doin'
   276  				if len(line) > 1 && unicode.IsSpace(util.ToRune(line, 0)) || unicode.IsPunct(util.ToRune(line, 0)) &&
   277  					(len(line) > 2 && !unicode.IsDigit(util.ToRune(line, 1))) {
   278  					node := gast.NewString(s.Substitutions[RightSingleQuote])
   279  					node.SetCode(true)
   280  					block.Advance(1)
   281  					return node
   282  				}
   283  			}
   284  			if s.Substitutions[RightSingleQuote] != nil && counter.Single > 0 {
   285  				isClose := d.CanClose && !d.CanOpen
   286  				maybeClose := d.CanClose && d.CanOpen && len(line) > 1 && unicode.IsPunct(util.ToRune(line, 1)) &&
   287  					(len(line) == 2 || (len(line) > 2 && util.IsPunct(line[2]) || util.IsSpace(line[2])))
   288  				if isClose || maybeClose {
   289  					node := gast.NewString(s.Substitutions[RightSingleQuote])
   290  					node.SetCode(true)
   291  					block.Advance(1)
   292  					counter.Single--
   293  					return node
   294  				}
   295  			}
   296  		}
   297  		if c == '"' {
   298  			if s.Substitutions[LeftDoubleQuote] != nil && d.CanOpen && !d.CanClose {
   299  				node := gast.NewString(s.Substitutions[LeftDoubleQuote])
   300  				node.SetCode(true)
   301  				block.Advance(1)
   302  				counter.Double++
   303  				return node
   304  			}
   305  			if s.Substitutions[RightDoubleQuote] != nil && counter.Double > 0 {
   306  				isClose := d.CanClose && !d.CanOpen
   307  				maybeClose := d.CanClose && d.CanOpen && len(line) > 1 && (unicode.IsPunct(util.ToRune(line, 1))) &&
   308  					(len(line) == 2 || (len(line) > 2 && util.IsPunct(line[2]) || util.IsSpace(line[2])))
   309  				if isClose || maybeClose {
   310  					// special case: "Monitor 21""
   311  					if len(line) > 1 && line[1] == '"' && unicode.IsDigit(before) {
   312  						return nil
   313  					}
   314  					node := gast.NewString(s.Substitutions[RightDoubleQuote])
   315  					node.SetCode(true)
   316  					block.Advance(1)
   317  					counter.Double--
   318  					return node
   319  				}
   320  			}
   321  		}
   322  	}
   323  	return nil
   324  }
   325  
   326  func (s *typographerParser) CloseBlock(parent gast.Node, pc parser.Context) {
   327  	getUnclosedCounter(pc).Reset()
   328  }
   329  
   330  type typographer struct {
   331  	options []TypographerOption
   332  }
   333  
   334  // Typographer is an extension that replaces punctuations with typographic entities.
   335  var Typographer = &typographer{}
   336  
   337  // NewTypographer returns a new Extender that replaces punctuations with typographic entities.
   338  func NewTypographer(opts ...TypographerOption) goldmark.Extender {
   339  	return &typographer{
   340  		options: opts,
   341  	}
   342  }
   343  
   344  func (e *typographer) Extend(m goldmark.Markdown) {
   345  	m.Parser().AddOptions(parser.WithInlineParsers(
   346  		util.Prioritized(NewTypographerParser(e.options...), 9999),
   347  	))
   348  }
   349  

View as plain text