...
1 package parser
2
3 import (
4 "bytes"
5 "regexp"
6
7 "github.com/yuin/goldmark/ast"
8 "github.com/yuin/goldmark/text"
9 "github.com/yuin/goldmark/util"
10 )
11
12 type rawHTMLParser struct {
13 }
14
15 var defaultRawHTMLParser = &rawHTMLParser{}
16
17
18
19 func NewRawHTMLParser() InlineParser {
20 return defaultRawHTMLParser
21 }
22
23 func (s *rawHTMLParser) Trigger() []byte {
24 return []byte{'<'}
25 }
26
27 func (s *rawHTMLParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.Node {
28 line, _ := block.PeekLine()
29 if len(line) > 1 && util.IsAlphaNumeric(line[1]) {
30 return s.parseMultiLineRegexp(openTagRegexp, block, pc)
31 }
32 if len(line) > 2 && line[1] == '/' && util.IsAlphaNumeric(line[2]) {
33 return s.parseMultiLineRegexp(closeTagRegexp, block, pc)
34 }
35 if bytes.HasPrefix(line, openComment) {
36 return s.parseComment(block, pc)
37 }
38 if bytes.HasPrefix(line, openProcessingInstruction) {
39 return s.parseUntil(block, closeProcessingInstruction, pc)
40 }
41 if len(line) > 2 && line[1] == '!' && line[2] >= 'A' && line[2] <= 'Z' {
42 return s.parseUntil(block, closeDecl, pc)
43 }
44 if bytes.HasPrefix(line, openCDATA) {
45 return s.parseUntil(block, closeCDATA, pc)
46 }
47 return nil
48 }
49
50 var tagnamePattern = `([A-Za-z][A-Za-z0-9-]*)`
51 var spaceOrOneNewline = `(?:[ \t]|(?:\r\n|\n){0,1})`
52 var attributePattern = `(?:[\r\n \t]+[a-zA-Z_:][a-zA-Z0-9:._-]*(?:[\r\n \t]*=[\r\n \t]*(?:[^\"'=<>` + "`" + `\x00-\x20]+|'[^']*'|"[^"]*"))?)`
53 var openTagRegexp = regexp.MustCompile("^<" + tagnamePattern + attributePattern + `*` + spaceOrOneNewline + `*/?>`)
54 var closeTagRegexp = regexp.MustCompile("^</" + tagnamePattern + spaceOrOneNewline + `*>`)
55
56 var openProcessingInstruction = []byte("<?")
57 var closeProcessingInstruction = []byte("?>")
58 var openCDATA = []byte("<![CDATA[")
59 var closeCDATA = []byte("]]>")
60 var closeDecl = []byte(">")
61 var emptyComment = []byte("<!---->")
62 var invalidComment1 = []byte("<!-->")
63 var invalidComment2 = []byte("<!--->")
64 var openComment = []byte("<!--")
65 var closeComment = []byte("-->")
66 var doubleHyphen = []byte("--")
67
68 func (s *rawHTMLParser) parseComment(block text.Reader, pc Context) ast.Node {
69 savedLine, savedSegment := block.Position()
70 node := ast.NewRawHTML()
71 line, segment := block.PeekLine()
72 if bytes.HasPrefix(line, emptyComment) {
73 node.Segments.Append(segment.WithStop(segment.Start + len(emptyComment)))
74 block.Advance(len(emptyComment))
75 return node
76 }
77 if bytes.HasPrefix(line, invalidComment1) || bytes.HasPrefix(line, invalidComment2) {
78 return nil
79 }
80 offset := len(openComment)
81 line = line[offset:]
82 for {
83 hindex := bytes.Index(line, doubleHyphen)
84 if hindex > -1 {
85 hindex += offset
86 }
87 index := bytes.Index(line, closeComment) + offset
88 if index > -1 && hindex == index {
89 if index == 0 || len(line) < 2 || line[index-offset-1] != '-' {
90 node.Segments.Append(segment.WithStop(segment.Start + index + len(closeComment)))
91 block.Advance(index + len(closeComment))
92 return node
93 }
94 }
95 if hindex > 0 {
96 break
97 }
98 node.Segments.Append(segment)
99 block.AdvanceLine()
100 line, segment = block.PeekLine()
101 offset = 0
102 if line == nil {
103 break
104 }
105 }
106 block.SetPosition(savedLine, savedSegment)
107 return nil
108 }
109
110 func (s *rawHTMLParser) parseUntil(block text.Reader, closer []byte, pc Context) ast.Node {
111 savedLine, savedSegment := block.Position()
112 node := ast.NewRawHTML()
113 for {
114 line, segment := block.PeekLine()
115 if line == nil {
116 break
117 }
118 index := bytes.Index(line, closer)
119 if index > -1 {
120 node.Segments.Append(segment.WithStop(segment.Start + index + len(closer)))
121 block.Advance(index + len(closer))
122 return node
123 }
124 node.Segments.Append(segment)
125 block.AdvanceLine()
126 }
127 block.SetPosition(savedLine, savedSegment)
128 return nil
129 }
130
131 func (s *rawHTMLParser) parseMultiLineRegexp(reg *regexp.Regexp, block text.Reader, pc Context) ast.Node {
132 sline, ssegment := block.Position()
133 if block.Match(reg) {
134 node := ast.NewRawHTML()
135 eline, esegment := block.Position()
136 block.SetPosition(sline, ssegment)
137 for {
138 line, segment := block.PeekLine()
139 if line == nil {
140 break
141 }
142 l, _ := block.Position()
143 start := segment.Start
144 if l == sline {
145 start = ssegment.Start
146 }
147 end := segment.Stop
148 if l == eline {
149 end = esegment.Start
150 }
151
152 node.Segments.Append(text.NewSegment(start, end))
153 if l == eline {
154 block.Advance(end - start)
155 break
156 }
157 block.AdvanceLine()
158 }
159 return node
160 }
161 return nil
162 }
163
View as plain text