1
2
3
4
5
6
7 package bson
8
9 import (
10 "bytes"
11 "encoding/hex"
12 "encoding/json"
13 "fmt"
14 "math"
15 "os"
16 "path"
17 "reflect"
18 "strconv"
19 "strings"
20 "testing"
21 "unicode"
22 "unicode/utf8"
23
24 "github.com/google/go-cmp/cmp"
25 "go.mongodb.org/mongo-driver/bson/primitive"
26 "go.mongodb.org/mongo-driver/internal/assert"
27 "go.mongodb.org/mongo-driver/internal/require"
28 )
29
30 type testCase struct {
31 Description string `json:"description"`
32 BsonType string `json:"bson_type"`
33 TestKey *string `json:"test_key"`
34 Valid []validityTestCase `json:"valid"`
35 DecodeErrors []decodeErrorTestCase `json:"decodeErrors"`
36 ParseErrors []parseErrorTestCase `json:"parseErrors"`
37 Deprecated *bool `json:"deprecated"`
38 }
39
40 type validityTestCase struct {
41 Description string `json:"description"`
42 CanonicalBson string `json:"canonical_bson"`
43 CanonicalExtJSON string `json:"canonical_extjson"`
44 RelaxedExtJSON *string `json:"relaxed_extjson"`
45 DegenerateBSON *string `json:"degenerate_bson"`
46 DegenerateExtJSON *string `json:"degenerate_extjson"`
47 ConvertedBSON *string `json:"converted_bson"`
48 ConvertedExtJSON *string `json:"converted_extjson"`
49 Lossy *bool `json:"lossy"`
50 }
51
52 type decodeErrorTestCase struct {
53 Description string `json:"description"`
54 Bson string `json:"bson"`
55 }
56
57 type parseErrorTestCase struct {
58 Description string `json:"description"`
59 String string `json:"string"`
60 }
61
62 const dataDir = "../testdata/bson-corpus/"
63
64 func findJSONFilesInDir(dir string) ([]string, error) {
65 files := make([]string, 0)
66
67 entries, err := os.ReadDir(dir)
68 if err != nil {
69 return nil, err
70 }
71
72 for _, entry := range entries {
73 if entry.IsDir() || path.Ext(entry.Name()) != ".json" {
74 continue
75 }
76
77 files = append(files, entry.Name())
78 }
79
80 return files, nil
81 }
82
83
84 func seedExtJSON(f *testing.F, extJSON string, extJSONType string, desc string) {
85 jbytes, err := jsonToBytes(extJSON, extJSONType, desc)
86 if err != nil {
87 f.Fatalf("failed to convert JSON to bytes: %v", err)
88 }
89
90 f.Add(jbytes)
91 }
92
93
94
95 func seedTestCase(f *testing.F, tcase *testCase) {
96 for _, vtc := range tcase.Valid {
97 seedExtJSON(f, vtc.CanonicalExtJSON, "canonical", vtc.Description)
98
99
100 if vtc.RelaxedExtJSON != nil {
101 seedExtJSON(f, *vtc.RelaxedExtJSON, "relaxed", vtc.Description)
102 }
103
104
105 if vtc.DegenerateExtJSON != nil {
106 seedExtJSON(f, *vtc.DegenerateExtJSON, "degenerate", vtc.Description)
107 }
108
109
110 if vtc.ConvertedExtJSON != nil {
111 seedExtJSON(f, *vtc.ConvertedExtJSON, "converted", vtc.Description)
112 }
113 }
114 }
115
116
117
118 func seedBSONCorpus(f *testing.F) {
119 fileNames, err := findJSONFilesInDir(dataDir)
120 if err != nil {
121 f.Fatalf("failed to find JSON files in directory %q: %v", dataDir, err)
122 }
123
124 for _, fileName := range fileNames {
125 filePath := path.Join(dataDir, fileName)
126
127 file, err := os.Open(filePath)
128 if err != nil {
129 f.Fatalf("failed to open file %q: %v", filePath, err)
130 }
131
132 var tcase testCase
133 if err := json.NewDecoder(file).Decode(&tcase); err != nil {
134 f.Fatal(err)
135 }
136
137 seedTestCase(f, &tcase)
138 }
139 }
140
141 func needsEscapedUnicode(bsonType string) bool {
142 return bsonType == "0x02" || bsonType == "0x0D" || bsonType == "0x0E" || bsonType == "0x0F"
143 }
144
145 func unescapeUnicode(s, bsonType string) string {
146 if !needsEscapedUnicode(bsonType) {
147 return s
148 }
149
150 newS := ""
151
152 for i := 0; i < len(s); i++ {
153 c := s[i]
154 switch c {
155 case '\\':
156 switch s[i+1] {
157 case 'u':
158 us := s[i : i+6]
159 u, err := strconv.Unquote(strings.Replace(strconv.Quote(us), `\\u`, `\u`, 1))
160 if err != nil {
161 return ""
162 }
163 for _, r := range u {
164 if r < ' ' {
165 newS += fmt.Sprintf(`\u%04x`, r)
166 } else {
167 newS += string(r)
168 }
169 }
170 i += 5
171 default:
172 newS += string(c)
173 }
174 default:
175 if c > unicode.MaxASCII {
176 r, size := utf8.DecodeRune([]byte(s[i:]))
177 newS += string(r)
178 i += size - 1
179 } else {
180 newS += string(c)
181 }
182 }
183 }
184
185 return newS
186 }
187
188 func formatDouble(f float64) string {
189 var s string
190 if math.IsInf(f, 1) {
191 s = "Infinity"
192 } else if math.IsInf(f, -1) {
193 s = "-Infinity"
194 } else if math.IsNaN(f) {
195 s = "NaN"
196 } else {
197
198
199 s = strconv.FormatFloat(f, 'G', -1, 64)
200 if !strings.ContainsRune(s, 'E') && !strings.ContainsRune(s, '.') {
201 s += ".0"
202 }
203 }
204
205 return s
206 }
207
208 func normalizeCanonicalDouble(t *testing.T, key string, cEJ string) string {
209
210 cEJMap := make(map[string]map[string]string)
211 err := json.Unmarshal([]byte(cEJ), &cEJMap)
212 require.NoError(t, err)
213
214
215 expectedString := cEJMap[key]["$numberDouble"]
216 expectedFloat, err := strconv.ParseFloat(expectedString, 64)
217 require.NoError(t, err)
218
219
220 return fmt.Sprintf(`{"%s":{"$numberDouble":"%s"}}`, key, formatDouble(expectedFloat))
221 }
222
223 func normalizeRelaxedDouble(t *testing.T, key string, rEJ string) string {
224
225 rEJMap := make(map[string]float64)
226 err := json.Unmarshal([]byte(rEJ), &rEJMap)
227 if err != nil {
228 return normalizeCanonicalDouble(t, key, rEJ)
229 }
230
231
232 expectedFloat := rEJMap[key]
233
234
235 return fmt.Sprintf(`{"%s":%s}`, key, formatDouble(expectedFloat))
236 }
237
238
239 func bsonToNative(t *testing.T, b []byte, bType, testDesc string) D {
240 var doc D
241 err := Unmarshal(b, &doc)
242 expectNoError(t, err, fmt.Sprintf("%s: decoding %s BSON", testDesc, bType))
243 return doc
244 }
245
246
247
248 func nativeToBSON(t *testing.T, cB []byte, doc D, testDesc, bType, docSrcDesc string) {
249 actual, err := Marshal(doc)
250 expectNoError(t, err, fmt.Sprintf("%s: encoding %s BSON", testDesc, bType))
251
252 if diff := cmp.Diff(cB, actual); diff != "" {
253 t.Errorf("%s: 'native_to_bson(%s) = cB' failed (-want, +got):\n-%v\n+%v\n",
254 testDesc, docSrcDesc, cB, actual)
255 t.FailNow()
256 }
257 }
258
259
260 func jsonToNative(ej, ejType, testDesc string) (D, error) {
261 var doc D
262 if err := UnmarshalExtJSON([]byte(ej), ejType != "relaxed", &doc); err != nil {
263 return nil, fmt.Errorf("%s: decoding %s extended JSON: %w", testDesc, ejType, err)
264 }
265 return doc, nil
266 }
267
268
269 func jsonToBytes(ej, ejType, testDesc string) ([]byte, error) {
270 native, err := jsonToNative(ej, ejType, testDesc)
271 if err != nil {
272 return nil, err
273 }
274
275 b, err := Marshal(native)
276 if err != nil {
277 return nil, fmt.Errorf("%s: encoding %s BSON: %w", testDesc, ejType, err)
278 }
279
280 return b, nil
281 }
282
283
284 func nativeToJSON(t *testing.T, ej string, doc D, testDesc, ejType, ejShortName, docSrcDesc string) {
285 actualEJ, err := MarshalExtJSON(doc, ejType != "relaxed", true)
286 expectNoError(t, err, fmt.Sprintf("%s: encoding %s extended JSON", testDesc, ejType))
287
288 if diff := cmp.Diff(ej, string(actualEJ)); diff != "" {
289 t.Errorf("%s: 'native_to_%s_extended_json(%s) = %s' failed (-want, +got):\n%s\n",
290 testDesc, ejType, docSrcDesc, ejShortName, diff)
291 t.FailNow()
292 }
293 }
294
295 func runTest(t *testing.T, file string) {
296 filepath := path.Join(dataDir, file)
297 content, err := os.ReadFile(filepath)
298 require.NoError(t, err)
299
300
301 file = file[:len(file)-5]
302 testName := "bson_corpus--" + file
303
304 t.Run(testName, func(t *testing.T) {
305 var test testCase
306 require.NoError(t, json.Unmarshal(content, &test))
307
308 t.Run("valid", func(t *testing.T) {
309 for _, v := range test.Valid {
310 t.Run(v.Description, func(t *testing.T) {
311
312 cB, err := hex.DecodeString(v.CanonicalBson)
313 expectNoError(t, err, fmt.Sprintf("%s: reading canonical BSON", v.Description))
314
315
316 var compactEJ bytes.Buffer
317 require.NoError(t, json.Compact(&compactEJ, []byte(v.CanonicalExtJSON)))
318 cEJ := unescapeUnicode(compactEJ.String(), test.BsonType)
319 if test.BsonType == "0x01" {
320 cEJ = normalizeCanonicalDouble(t, *test.TestKey, cEJ)
321 }
322
323
324 doc := bsonToNative(t, cB, "canonical", v.Description)
325
326
327 nativeToBSON(t, cB, doc, v.Description, "canonical", "bson_to_native(cB)")
328
329
330 nativeToJSON(t, cEJ, doc, v.Description, "canonical", "cEJ", "bson_to_native(cB)")
331
332
333 if v.RelaxedExtJSON != nil {
334 var compactEJ bytes.Buffer
335 require.NoError(t, json.Compact(&compactEJ, []byte(*v.RelaxedExtJSON)))
336 rEJ := unescapeUnicode(compactEJ.String(), test.BsonType)
337 if test.BsonType == "0x01" {
338 rEJ = normalizeRelaxedDouble(t, *test.TestKey, rEJ)
339 }
340
341 nativeToJSON(t, rEJ, doc, v.Description, "relaxed", "rEJ", "bson_to_native(cB)")
342
343
344 doc, err = jsonToNative(rEJ, "relaxed", v.Description)
345 require.NoError(t, err)
346
347
348 nativeToJSON(t, rEJ, doc, v.Description, "relaxed", "eJR", "json_to_native(rEJ)")
349 }
350
351
352 doc, err = jsonToNative(cEJ, "canonical", v.Description)
353 require.NoError(t, err)
354
355
356 nativeToJSON(t, cEJ, doc, v.Description, "canonical", "cEJ", "json_to_native(cEJ)")
357
358
359 if v.Lossy == nil || !*v.Lossy {
360 nativeToBSON(t, cB, doc, v.Description, "canonical", "json_to_native(cEJ)")
361 }
362
363
364 if v.DegenerateBSON != nil {
365 dB, err := hex.DecodeString(*v.DegenerateBSON)
366 expectNoError(t, err, fmt.Sprintf("%s: reading degenerate BSON", v.Description))
367
368 doc = bsonToNative(t, dB, "degenerate", v.Description)
369
370
371 nativeToBSON(t, cB, doc, v.Description, "degenerate", "bson_to_native(dB)")
372 }
373
374
375 if v.DegenerateExtJSON != nil {
376 var compactEJ bytes.Buffer
377 require.NoError(t, json.Compact(&compactEJ, []byte(*v.DegenerateExtJSON)))
378 dEJ := unescapeUnicode(compactEJ.String(), test.BsonType)
379 if test.BsonType == "0x01" {
380 dEJ = normalizeCanonicalDouble(t, *test.TestKey, dEJ)
381 }
382
383 doc, err = jsonToNative(dEJ, "degenerate canonical", v.Description)
384 require.NoError(t, err)
385
386
387 nativeToJSON(t, cEJ, doc, v.Description, "degenerate canonical", "cEJ", "json_to_native(dEJ)")
388
389
390 if v.Lossy == nil || !*v.Lossy {
391 nativeToBSON(t, cB, doc, v.Description, "canonical", "json_to_native(dEJ)")
392 }
393 }
394 })
395 }
396 })
397
398 t.Run("decode error", func(t *testing.T) {
399 for _, d := range test.DecodeErrors {
400 t.Run(d.Description, func(t *testing.T) {
401 b, err := hex.DecodeString(d.Bson)
402 expectNoError(t, err, d.Description)
403
404 var doc D
405 err = Unmarshal(b, &doc)
406
407
408
409
410 for _, elem := range doc {
411 value := reflect.ValueOf(elem.Value)
412 invalidString := (value.Kind() == reflect.String) && !utf8.ValidString(value.String())
413 dbPtr, ok := elem.Value.(primitive.DBPointer)
414 invalidDBPtr := ok && !utf8.ValidString(dbPtr.DB)
415
416 if invalidString || invalidDBPtr {
417 expectNoError(t, err, d.Description)
418 return
419 }
420 }
421
422 expectError(t, err, fmt.Sprintf("%s: expected decode error", d.Description))
423 })
424 }
425 })
426
427 t.Run("parse error", func(t *testing.T) {
428 for _, p := range test.ParseErrors {
429 t.Run(p.Description, func(t *testing.T) {
430 s := unescapeUnicode(p.String, test.BsonType)
431 if test.BsonType == "0x13" {
432 s = fmt.Sprintf(`{"decimal128": {"$numberDecimal": "%s"}}`, s)
433 }
434
435 switch test.BsonType {
436 case "0x00", "0x05", "0x13":
437 var doc D
438 err := UnmarshalExtJSON([]byte(s), true, &doc)
439
440 if strings.Contains(p.Description, "Null") {
441 _, err = Marshal(doc)
442 }
443 expectError(t, err, fmt.Sprintf("%s: expected parse error", p.Description))
444 default:
445 t.Errorf("Update test to check for parse errors for type %s", test.BsonType)
446 t.Fail()
447 }
448 })
449 }
450 })
451 })
452 }
453
454 func Test_BsonCorpus(t *testing.T) {
455 jsonFiles, err := findJSONFilesInDir(dataDir)
456 if err != nil {
457 t.Fatalf("error finding JSON files in %s: %v", dataDir, err)
458 }
459
460 for _, file := range jsonFiles {
461 runTest(t, file)
462 }
463 }
464
465 func expectNoError(t *testing.T, err error, desc string) {
466 if err != nil {
467 t.Helper()
468 t.Errorf("%s: Unepexted error: %v", desc, err)
469 t.FailNow()
470 }
471 }
472
473 func expectError(t *testing.T, err error, desc string) {
474 if err == nil {
475 t.Helper()
476 t.Errorf("%s: Expected error", desc)
477 t.FailNow()
478 }
479 }
480
481 func TestRelaxedUUIDValidation(t *testing.T) {
482 testCases := []struct {
483 description string
484 canonicalExtJSON string
485 degenerateExtJSON string
486 expectedErr string
487 }{
488 {
489 "valid uuid",
490 "{\"x\" : { \"$binary\" : {\"base64\" : \"c//SZESzTGmQ6OfR38A11A==\", \"subType\" : \"04\"}}}",
491 "{\"x\" : { \"$uuid\" : \"73ffd264-44b3-4c69-90e8-e7d1dfc035d4\"}}",
492 "",
493 },
494 {
495 "invalid uuid--no hyphens",
496 "",
497 "{\"x\" : { \"$uuid\" : \"73ffd26444b34c6990e8e7d1dfc035d4\"}}",
498 "$uuid value does not follow RFC 4122 format regarding length and hyphens",
499 },
500 {
501 "invalid uuid--trailing hyphens",
502 "",
503 "{\"x\" : { \"$uuid\" : \"73ffd264-44b3-4c69-90e8-e7d1dfc035--\"}}",
504 "$uuid value does not follow RFC 4122 format regarding length and hyphens",
505 },
506 {
507 "invalid uuid--malformed hex",
508 "",
509 "{\"x\" : { \"$uuid\" : \"q3@fd26l-44b3-4c69-90e8-e7d1dfc035d4\"}}",
510 "$uuid value does not follow RFC 4122 format regarding hex bytes: encoding/hex: invalid byte: U+0071 'q'",
511 },
512 }
513
514 for _, tc := range testCases {
515 t.Run(tc.description, func(t *testing.T) {
516
517 cEJ := ""
518 if tc.canonicalExtJSON != "" {
519 var compactCEJ bytes.Buffer
520 require.NoError(t, json.Compact(&compactCEJ, []byte(tc.canonicalExtJSON)))
521 cEJ = unescapeUnicode(compactCEJ.String(), "0x05")
522 }
523
524
525 var compactDEJ bytes.Buffer
526 require.NoError(t, json.Compact(&compactDEJ, []byte(tc.degenerateExtJSON)))
527 dEJ := unescapeUnicode(compactDEJ.String(), "0x05")
528
529
530 var doc D
531 err := UnmarshalExtJSON([]byte(dEJ), true, &doc)
532
533 if tc.expectedErr != "" {
534 assert.Equal(t, tc.expectedErr, err.Error(), "expected error %v, got %v", tc.expectedErr, err)
535 } else {
536 assert.Nil(t, err, "expected no error, got error: %v", err)
537
538
539 nativeToJSON(t, cEJ, doc, tc.description, "degenerate canonical", "cEJ", "json_to_native(dEJ)")
540 }
541 })
542 }
543
544 }
545
View as plain text