1 package magic
2
3 import (
4 "bufio"
5 "bytes"
6 "strings"
7 "time"
8
9 "github.com/gabriel-vasile/mimetype/internal/charset"
10 "github.com/gabriel-vasile/mimetype/internal/json"
11 )
12
13 var (
14
15 HTML = markup(
16 []byte("<!DOCTYPE HTML"),
17 []byte("<HTML"),
18 []byte("<HEAD"),
19 []byte("<SCRIPT"),
20 []byte("<IFRAME"),
21 []byte("<H1"),
22 []byte("<DIV"),
23 []byte("<FONT"),
24 []byte("<TABLE"),
25 []byte("<A"),
26 []byte("<STYLE"),
27 []byte("<TITLE"),
28 []byte("<B"),
29 []byte("<BODY"),
30 []byte("<BR"),
31 []byte("<P"),
32 )
33
34 XML = markup([]byte("<?XML"))
35
36 Owl2 = xml(newXMLSig("Ontology", `xmlns="http://www.w3.org/2002/07/owl#"`))
37
38 Rss = xml(newXMLSig("rss", ""))
39
40 Atom = xml(newXMLSig("feed", `xmlns="http://www.w3.org/2005/Atom"`))
41
42 Kml = xml(
43 newXMLSig("kml", `xmlns="http://www.opengis.net/kml/2.2"`),
44 newXMLSig("kml", `xmlns="http://earth.google.com/kml/2.0"`),
45 newXMLSig("kml", `xmlns="http://earth.google.com/kml/2.1"`),
46 newXMLSig("kml", `xmlns="http://earth.google.com/kml/2.2"`),
47 )
48
49 Xliff = xml(newXMLSig("xliff", `xmlns="urn:oasis:names:tc:xliff:document:1.2"`))
50
51 Collada = xml(newXMLSig("COLLADA", `xmlns="http://www.collada.org/2005/11/COLLADASchema"`))
52
53 Gml = xml(
54 newXMLSig("", `xmlns:gml="http://www.opengis.net/gml"`),
55 newXMLSig("", `xmlns:gml="http://www.opengis.net/gml/3.2"`),
56 newXMLSig("", `xmlns:gml="http://www.opengis.net/gml/3.3/exr"`),
57 )
58
59 Gpx = xml(newXMLSig("gpx", `xmlns="http://www.topografix.com/GPX/1/1"`))
60
61 Tcx = xml(newXMLSig("TrainingCenterDatabase", `xmlns="http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2"`))
62
63 X3d = xml(newXMLSig("X3D", `xmlns:xsd="http://www.w3.org/2001/XMLSchema-instance"`))
64
65 Amf = xml(newXMLSig("amf", ""))
66
67 Threemf = xml(newXMLSig("model", `xmlns="http://schemas.microsoft.com/3dmanufacturing/core/2015/02"`))
68
69 Xfdf = xml(newXMLSig("xfdf", `xmlns="http://ns.adobe.com/xfdf/"`))
70
71 VCard = ciPrefix([]byte("BEGIN:VCARD\n"), []byte("BEGIN:VCARD\r\n"))
72
73 ICalendar = ciPrefix([]byte("BEGIN:VCALENDAR\n"), []byte("BEGIN:VCALENDAR\r\n"))
74 phpPageF = ciPrefix(
75 []byte("<?PHP"),
76 []byte("<?\n"),
77 []byte("<?\r"),
78 []byte("<? "),
79 )
80 phpScriptF = shebang(
81 []byte("/usr/local/bin/php"),
82 []byte("/usr/bin/php"),
83 []byte("/usr/bin/env php"),
84 )
85
86 Js = shebang(
87 []byte("/bin/node"),
88 []byte("/usr/bin/node"),
89 []byte("/bin/nodejs"),
90 []byte("/usr/bin/nodejs"),
91 []byte("/usr/bin/env node"),
92 []byte("/usr/bin/env nodejs"),
93 )
94
95 Lua = shebang(
96 []byte("/usr/bin/lua"),
97 []byte("/usr/local/bin/lua"),
98 []byte("/usr/bin/env lua"),
99 )
100
101 Perl = shebang(
102 []byte("/usr/bin/perl"),
103 []byte("/usr/bin/env perl"),
104 )
105
106 Python = shebang(
107 []byte("/usr/bin/python"),
108 []byte("/usr/local/bin/python"),
109 []byte("/usr/bin/env python"),
110 )
111
112 Tcl = shebang(
113 []byte("/usr/bin/tcl"),
114 []byte("/usr/local/bin/tcl"),
115 []byte("/usr/bin/env tcl"),
116 []byte("/usr/bin/tclsh"),
117 []byte("/usr/local/bin/tclsh"),
118 []byte("/usr/bin/env tclsh"),
119 []byte("/usr/bin/wish"),
120 []byte("/usr/local/bin/wish"),
121 []byte("/usr/bin/env wish"),
122 )
123
124 Rtf = prefix([]byte("{\\rtf1"))
125 )
126
127
128
129
130
131 func Text(raw []byte, limit uint32) bool {
132
133 if cset := charset.FromBOM(raw); cset != "" {
134 return true
135 }
136
137 for _, b := range raw {
138 if b <= 0x08 ||
139 b == 0x0B ||
140 0x0E <= b && b <= 0x1A ||
141 0x1C <= b && b <= 0x1F {
142 return false
143 }
144 }
145 return true
146 }
147
148
149 func Php(raw []byte, limit uint32) bool {
150 if res := phpPageF(raw, limit); res {
151 return res
152 }
153 return phpScriptF(raw, limit)
154 }
155
156
157 func JSON(raw []byte, limit uint32) bool {
158 raw = trimLWS(raw)
159
160
161 if len(raw) < 2 || (raw[0] != '[' && raw[0] != '{') {
162 return false
163 }
164 parsed, err := json.Scan(raw)
165
166 if limit == 0 || len(raw) < int(limit) {
167 return err == nil
168 }
169
170
171 return parsed == len(raw) && len(raw) > 0
172 }
173
174
175
176
177
178
179 func GeoJSON(raw []byte, limit uint32) bool {
180 raw = trimLWS(raw)
181 if len(raw) == 0 {
182 return false
183 }
184
185 if raw[0] != '{' {
186 return false
187 }
188
189 s := []byte(`"type"`)
190 si, sl := bytes.Index(raw, s), len(s)
191
192 if si == -1 {
193 return false
194 }
195
196
197
198 if si+sl == len(raw) {
199 return false
200 }
201
202 raw = raw[si+sl:]
203
204 raw = trimLWS(raw)
205
206 if len(raw) == 0 || raw[0] != ':' {
207 return false
208 }
209
210 raw = trimLWS(raw[1:])
211
212 geoJSONTypes := [][]byte{
213 []byte(`"Feature"`),
214 []byte(`"FeatureCollection"`),
215 []byte(`"Point"`),
216 []byte(`"LineString"`),
217 []byte(`"Polygon"`),
218 []byte(`"MultiPoint"`),
219 []byte(`"MultiLineString"`),
220 []byte(`"MultiPolygon"`),
221 []byte(`"GeometryCollection"`),
222 }
223 for _, t := range geoJSONTypes {
224 if bytes.HasPrefix(raw, t) {
225 return true
226 }
227 }
228
229 return false
230 }
231
232
233
234
235 func NdJSON(raw []byte, limit uint32) bool {
236 lCount, hasObjOrArr := 0, false
237 sc := bufio.NewScanner(dropLastLine(raw, limit))
238 for sc.Scan() {
239 l := sc.Bytes()
240
241 if l = trimRWS(trimLWS(l)); len(l) == 0 {
242 continue
243 }
244 _, err := json.Scan(l)
245 if err != nil {
246 return false
247 }
248 if l[0] == '[' || l[0] == '{' {
249 hasObjOrArr = true
250 }
251 lCount++
252 }
253
254 return lCount > 1 && hasObjOrArr
255 }
256
257
258
259 func HAR(raw []byte, limit uint32) bool {
260 s := []byte(`"log"`)
261 si, sl := bytes.Index(raw, s), len(s)
262
263 if si == -1 {
264 return false
265 }
266
267
268
269 if si+sl == len(raw) {
270 return false
271 }
272
273 raw = raw[si+sl:]
274
275 raw = trimLWS(raw)
276
277 if len(raw) == 0 || raw[0] != ':' {
278 return false
279 }
280
281 raw = trimLWS(raw[1:])
282
283 harJSONTypes := [][]byte{
284 []byte(`"version"`),
285 []byte(`"creator"`),
286 []byte(`"entries"`),
287 }
288 for _, t := range harJSONTypes {
289 si := bytes.Index(raw, t)
290 if si > -1 {
291 return true
292 }
293 }
294
295 return false
296 }
297
298
299 func Svg(raw []byte, limit uint32) bool {
300 return bytes.Contains(raw, []byte("<svg"))
301 }
302
303
304 func Srt(in []byte, _ uint32) bool {
305 s := bufio.NewScanner(bytes.NewReader(in))
306 if !s.Scan() {
307 return false
308 }
309
310 if s.Text() != "1" {
311 return false
312 }
313
314 if !s.Scan() {
315 return false
316 }
317 secondLine := s.Text()
318
319
320 if len(secondLine) != 29 {
321 return false
322 }
323
324
325 if strings.Contains(secondLine, ".") {
326 return false
327 }
328
329 secondLine = strings.ReplaceAll(secondLine, ",", ".")
330
331 ts := strings.Split(secondLine, " --> ")
332 if len(ts) != 2 {
333 return false
334 }
335 const layout = "15:04:05.000"
336 t0, err := time.Parse(layout, ts[0])
337 if err != nil {
338 return false
339 }
340 t1, err := time.Parse(layout, ts[1])
341 if err != nil {
342 return false
343 }
344 if t0.After(t1) {
345 return false
346 }
347
348
349 return s.Scan() && len(s.Bytes()) != 0
350 }
351
352
353
354 func Vtt(raw []byte, limit uint32) bool {
355
356 prefixes := [][]byte{
357 {0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x0A},
358 {0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x0D},
359 {0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x20},
360 {0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x09},
361 {0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x0A},
362 {0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x0D},
363 {0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x20},
364 {0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x09},
365 }
366 for _, p := range prefixes {
367 if bytes.HasPrefix(raw, p) {
368 return true
369 }
370 }
371
372
373 return bytes.Equal(raw, []byte{0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54}) ||
374 bytes.Equal(raw, []byte{0x57, 0x45, 0x42, 0x56, 0x54, 0x54})
375 }
376
View as plain text