...

Source file src/go.mongodb.org/mongo-driver/bson/bson_corpus_spec_test.go

Documentation: go.mongodb.org/mongo-driver/bson

     1  // Copyright (C) MongoDB, Inc. 2017-present.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License"); you may
     4  // not use this file except in compliance with the License. You may obtain
     5  // a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
     6  
     7  package bson
     8  
     9  import (
    10  	"bytes"
    11  	"encoding/hex"
    12  	"encoding/json"
    13  	"fmt"
    14  	"math"
    15  	"os"
    16  	"path"
    17  	"reflect"
    18  	"strconv"
    19  	"strings"
    20  	"testing"
    21  	"unicode"
    22  	"unicode/utf8"
    23  
    24  	"github.com/google/go-cmp/cmp"
    25  	"go.mongodb.org/mongo-driver/bson/primitive"
    26  	"go.mongodb.org/mongo-driver/internal/assert"
    27  	"go.mongodb.org/mongo-driver/internal/require"
    28  )
    29  
    30  type testCase struct {
    31  	Description  string                `json:"description"`
    32  	BsonType     string                `json:"bson_type"`
    33  	TestKey      *string               `json:"test_key"`
    34  	Valid        []validityTestCase    `json:"valid"`
    35  	DecodeErrors []decodeErrorTestCase `json:"decodeErrors"`
    36  	ParseErrors  []parseErrorTestCase  `json:"parseErrors"`
    37  	Deprecated   *bool                 `json:"deprecated"`
    38  }
    39  
    40  type validityTestCase struct {
    41  	Description       string  `json:"description"`
    42  	CanonicalBson     string  `json:"canonical_bson"`
    43  	CanonicalExtJSON  string  `json:"canonical_extjson"`
    44  	RelaxedExtJSON    *string `json:"relaxed_extjson"`
    45  	DegenerateBSON    *string `json:"degenerate_bson"`
    46  	DegenerateExtJSON *string `json:"degenerate_extjson"`
    47  	ConvertedBSON     *string `json:"converted_bson"`
    48  	ConvertedExtJSON  *string `json:"converted_extjson"`
    49  	Lossy             *bool   `json:"lossy"`
    50  }
    51  
    52  type decodeErrorTestCase struct {
    53  	Description string `json:"description"`
    54  	Bson        string `json:"bson"`
    55  }
    56  
    57  type parseErrorTestCase struct {
    58  	Description string `json:"description"`
    59  	String      string `json:"string"`
    60  }
    61  
    62  const dataDir = "../testdata/bson-corpus/"
    63  
    64  func findJSONFilesInDir(dir string) ([]string, error) {
    65  	files := make([]string, 0)
    66  
    67  	entries, err := os.ReadDir(dir)
    68  	if err != nil {
    69  		return nil, err
    70  	}
    71  
    72  	for _, entry := range entries {
    73  		if entry.IsDir() || path.Ext(entry.Name()) != ".json" {
    74  			continue
    75  		}
    76  
    77  		files = append(files, entry.Name())
    78  	}
    79  
    80  	return files, nil
    81  }
    82  
    83  // seedExtJSON will add the byte representation of the "extJSON" string to the fuzzer's coprus.
    84  func seedExtJSON(f *testing.F, extJSON string, extJSONType string, desc string) {
    85  	jbytes, err := jsonToBytes(extJSON, extJSONType, desc)
    86  	if err != nil {
    87  		f.Fatalf("failed to convert JSON to bytes: %v", err)
    88  	}
    89  
    90  	f.Add(jbytes)
    91  }
    92  
    93  // seedTestCase will add the byte representation for each "extJSON" string of each valid test case to the fuzzer's
    94  // corpus.
    95  func seedTestCase(f *testing.F, tcase *testCase) {
    96  	for _, vtc := range tcase.Valid {
    97  		seedExtJSON(f, vtc.CanonicalExtJSON, "canonical", vtc.Description)
    98  
    99  		// Seed the relaxed extended JSON.
   100  		if vtc.RelaxedExtJSON != nil {
   101  			seedExtJSON(f, *vtc.RelaxedExtJSON, "relaxed", vtc.Description)
   102  		}
   103  
   104  		// Seed the degenerate extended JSON.
   105  		if vtc.DegenerateExtJSON != nil {
   106  			seedExtJSON(f, *vtc.DegenerateExtJSON, "degenerate", vtc.Description)
   107  		}
   108  
   109  		// Seed the converted extended JSON.
   110  		if vtc.ConvertedExtJSON != nil {
   111  			seedExtJSON(f, *vtc.ConvertedExtJSON, "converted", vtc.Description)
   112  		}
   113  	}
   114  }
   115  
   116  // seedBSONCorpus will unmarshal the data from "testdata/bson-corpus" into a slice of "testCase" structs and then
   117  // marshal the "*_extjson" field of each "validityTestCase" into a slice of bytes to seed the fuzz corpus.
   118  func seedBSONCorpus(f *testing.F) {
   119  	fileNames, err := findJSONFilesInDir(dataDir)
   120  	if err != nil {
   121  		f.Fatalf("failed to find JSON files in directory %q: %v", dataDir, err)
   122  	}
   123  
   124  	for _, fileName := range fileNames {
   125  		filePath := path.Join(dataDir, fileName)
   126  
   127  		file, err := os.Open(filePath)
   128  		if err != nil {
   129  			f.Fatalf("failed to open file %q: %v", filePath, err)
   130  		}
   131  
   132  		var tcase testCase
   133  		if err := json.NewDecoder(file).Decode(&tcase); err != nil {
   134  			f.Fatal(err)
   135  		}
   136  
   137  		seedTestCase(f, &tcase)
   138  	}
   139  }
   140  
   141  func needsEscapedUnicode(bsonType string) bool {
   142  	return bsonType == "0x02" || bsonType == "0x0D" || bsonType == "0x0E" || bsonType == "0x0F"
   143  }
   144  
   145  func unescapeUnicode(s, bsonType string) string {
   146  	if !needsEscapedUnicode(bsonType) {
   147  		return s
   148  	}
   149  
   150  	newS := ""
   151  
   152  	for i := 0; i < len(s); i++ {
   153  		c := s[i]
   154  		switch c {
   155  		case '\\':
   156  			switch s[i+1] {
   157  			case 'u':
   158  				us := s[i : i+6]
   159  				u, err := strconv.Unquote(strings.Replace(strconv.Quote(us), `\\u`, `\u`, 1))
   160  				if err != nil {
   161  					return ""
   162  				}
   163  				for _, r := range u {
   164  					if r < ' ' {
   165  						newS += fmt.Sprintf(`\u%04x`, r)
   166  					} else {
   167  						newS += string(r)
   168  					}
   169  				}
   170  				i += 5
   171  			default:
   172  				newS += string(c)
   173  			}
   174  		default:
   175  			if c > unicode.MaxASCII {
   176  				r, size := utf8.DecodeRune([]byte(s[i:]))
   177  				newS += string(r)
   178  				i += size - 1
   179  			} else {
   180  				newS += string(c)
   181  			}
   182  		}
   183  	}
   184  
   185  	return newS
   186  }
   187  
   188  func formatDouble(f float64) string {
   189  	var s string
   190  	if math.IsInf(f, 1) {
   191  		s = "Infinity"
   192  	} else if math.IsInf(f, -1) {
   193  		s = "-Infinity"
   194  	} else if math.IsNaN(f) {
   195  		s = "NaN"
   196  	} else {
   197  		// Print exactly one decimalType place for integers; otherwise, print as many are necessary to
   198  		// perfectly represent it.
   199  		s = strconv.FormatFloat(f, 'G', -1, 64)
   200  		if !strings.ContainsRune(s, 'E') && !strings.ContainsRune(s, '.') {
   201  			s += ".0"
   202  		}
   203  	}
   204  
   205  	return s
   206  }
   207  
   208  func normalizeCanonicalDouble(t *testing.T, key string, cEJ string) string {
   209  	// Unmarshal string into map
   210  	cEJMap := make(map[string]map[string]string)
   211  	err := json.Unmarshal([]byte(cEJ), &cEJMap)
   212  	require.NoError(t, err)
   213  
   214  	// Parse the float contained by the map.
   215  	expectedString := cEJMap[key]["$numberDouble"]
   216  	expectedFloat, err := strconv.ParseFloat(expectedString, 64)
   217  	require.NoError(t, err)
   218  
   219  	// Normalize the string
   220  	return fmt.Sprintf(`{"%s":{"$numberDouble":"%s"}}`, key, formatDouble(expectedFloat))
   221  }
   222  
   223  func normalizeRelaxedDouble(t *testing.T, key string, rEJ string) string {
   224  	// Unmarshal string into map
   225  	rEJMap := make(map[string]float64)
   226  	err := json.Unmarshal([]byte(rEJ), &rEJMap)
   227  	if err != nil {
   228  		return normalizeCanonicalDouble(t, key, rEJ)
   229  	}
   230  
   231  	// Parse the float contained by the map.
   232  	expectedFloat := rEJMap[key]
   233  
   234  	// Normalize the string
   235  	return fmt.Sprintf(`{"%s":%s}`, key, formatDouble(expectedFloat))
   236  }
   237  
   238  // bsonToNative decodes the BSON bytes (b) into a native Document
   239  func bsonToNative(t *testing.T, b []byte, bType, testDesc string) D {
   240  	var doc D
   241  	err := Unmarshal(b, &doc)
   242  	expectNoError(t, err, fmt.Sprintf("%s: decoding %s BSON", testDesc, bType))
   243  	return doc
   244  }
   245  
   246  // nativeToBSON encodes the native Document (doc) into canonical BSON and compares it to the expected
   247  // canonical BSON (cB)
   248  func nativeToBSON(t *testing.T, cB []byte, doc D, testDesc, bType, docSrcDesc string) {
   249  	actual, err := Marshal(doc)
   250  	expectNoError(t, err, fmt.Sprintf("%s: encoding %s BSON", testDesc, bType))
   251  
   252  	if diff := cmp.Diff(cB, actual); diff != "" {
   253  		t.Errorf("%s: 'native_to_bson(%s) = cB' failed (-want, +got):\n-%v\n+%v\n",
   254  			testDesc, docSrcDesc, cB, actual)
   255  		t.FailNow()
   256  	}
   257  }
   258  
   259  // jsonToNative decodes the extended JSON string (ej) into a native Document
   260  func jsonToNative(ej, ejType, testDesc string) (D, error) {
   261  	var doc D
   262  	if err := UnmarshalExtJSON([]byte(ej), ejType != "relaxed", &doc); err != nil {
   263  		return nil, fmt.Errorf("%s: decoding %s extended JSON: %w", testDesc, ejType, err)
   264  	}
   265  	return doc, nil
   266  }
   267  
   268  // jsonToBytes decodes the extended JSON string (ej) into canonical BSON and then encodes it into a byte slice.
   269  func jsonToBytes(ej, ejType, testDesc string) ([]byte, error) {
   270  	native, err := jsonToNative(ej, ejType, testDesc)
   271  	if err != nil {
   272  		return nil, err
   273  	}
   274  
   275  	b, err := Marshal(native)
   276  	if err != nil {
   277  		return nil, fmt.Errorf("%s: encoding %s BSON: %w", testDesc, ejType, err)
   278  	}
   279  
   280  	return b, nil
   281  }
   282  
   283  // nativeToJSON encodes the native Document (doc) into an extended JSON string
   284  func nativeToJSON(t *testing.T, ej string, doc D, testDesc, ejType, ejShortName, docSrcDesc string) {
   285  	actualEJ, err := MarshalExtJSON(doc, ejType != "relaxed", true)
   286  	expectNoError(t, err, fmt.Sprintf("%s: encoding %s extended JSON", testDesc, ejType))
   287  
   288  	if diff := cmp.Diff(ej, string(actualEJ)); diff != "" {
   289  		t.Errorf("%s: 'native_to_%s_extended_json(%s) = %s' failed (-want, +got):\n%s\n",
   290  			testDesc, ejType, docSrcDesc, ejShortName, diff)
   291  		t.FailNow()
   292  	}
   293  }
   294  
   295  func runTest(t *testing.T, file string) {
   296  	filepath := path.Join(dataDir, file)
   297  	content, err := os.ReadFile(filepath)
   298  	require.NoError(t, err)
   299  
   300  	// Remove ".json" from filename.
   301  	file = file[:len(file)-5]
   302  	testName := "bson_corpus--" + file
   303  
   304  	t.Run(testName, func(t *testing.T) {
   305  		var test testCase
   306  		require.NoError(t, json.Unmarshal(content, &test))
   307  
   308  		t.Run("valid", func(t *testing.T) {
   309  			for _, v := range test.Valid {
   310  				t.Run(v.Description, func(t *testing.T) {
   311  					// get canonical BSON
   312  					cB, err := hex.DecodeString(v.CanonicalBson)
   313  					expectNoError(t, err, fmt.Sprintf("%s: reading canonical BSON", v.Description))
   314  
   315  					// get canonical extended JSON
   316  					var compactEJ bytes.Buffer
   317  					require.NoError(t, json.Compact(&compactEJ, []byte(v.CanonicalExtJSON)))
   318  					cEJ := unescapeUnicode(compactEJ.String(), test.BsonType)
   319  					if test.BsonType == "0x01" {
   320  						cEJ = normalizeCanonicalDouble(t, *test.TestKey, cEJ)
   321  					}
   322  
   323  					/*** canonical BSON round-trip tests ***/
   324  					doc := bsonToNative(t, cB, "canonical", v.Description)
   325  
   326  					// native_to_bson(bson_to_native(cB)) = cB
   327  					nativeToBSON(t, cB, doc, v.Description, "canonical", "bson_to_native(cB)")
   328  
   329  					// native_to_canonical_extended_json(bson_to_native(cB)) = cEJ
   330  					nativeToJSON(t, cEJ, doc, v.Description, "canonical", "cEJ", "bson_to_native(cB)")
   331  
   332  					// native_to_relaxed_extended_json(bson_to_native(cB)) = rEJ (if rEJ exists)
   333  					if v.RelaxedExtJSON != nil {
   334  						var compactEJ bytes.Buffer
   335  						require.NoError(t, json.Compact(&compactEJ, []byte(*v.RelaxedExtJSON)))
   336  						rEJ := unescapeUnicode(compactEJ.String(), test.BsonType)
   337  						if test.BsonType == "0x01" {
   338  							rEJ = normalizeRelaxedDouble(t, *test.TestKey, rEJ)
   339  						}
   340  
   341  						nativeToJSON(t, rEJ, doc, v.Description, "relaxed", "rEJ", "bson_to_native(cB)")
   342  
   343  						/*** relaxed extended JSON round-trip tests (if exists) ***/
   344  						doc, err = jsonToNative(rEJ, "relaxed", v.Description)
   345  						require.NoError(t, err)
   346  
   347  						// native_to_relaxed_extended_json(json_to_native(rEJ)) = rEJ
   348  						nativeToJSON(t, rEJ, doc, v.Description, "relaxed", "eJR", "json_to_native(rEJ)")
   349  					}
   350  
   351  					/*** canonical extended JSON round-trip tests ***/
   352  					doc, err = jsonToNative(cEJ, "canonical", v.Description)
   353  					require.NoError(t, err)
   354  
   355  					// native_to_canonical_extended_json(json_to_native(cEJ)) = cEJ
   356  					nativeToJSON(t, cEJ, doc, v.Description, "canonical", "cEJ", "json_to_native(cEJ)")
   357  
   358  					// native_to_bson(json_to_native(cEJ)) = cb (unless lossy)
   359  					if v.Lossy == nil || !*v.Lossy {
   360  						nativeToBSON(t, cB, doc, v.Description, "canonical", "json_to_native(cEJ)")
   361  					}
   362  
   363  					/*** degenerate BSON round-trip tests (if exists) ***/
   364  					if v.DegenerateBSON != nil {
   365  						dB, err := hex.DecodeString(*v.DegenerateBSON)
   366  						expectNoError(t, err, fmt.Sprintf("%s: reading degenerate BSON", v.Description))
   367  
   368  						doc = bsonToNative(t, dB, "degenerate", v.Description)
   369  
   370  						// native_to_bson(bson_to_native(dB)) = cB
   371  						nativeToBSON(t, cB, doc, v.Description, "degenerate", "bson_to_native(dB)")
   372  					}
   373  
   374  					/*** degenerate JSON round-trip tests (if exists) ***/
   375  					if v.DegenerateExtJSON != nil {
   376  						var compactEJ bytes.Buffer
   377  						require.NoError(t, json.Compact(&compactEJ, []byte(*v.DegenerateExtJSON)))
   378  						dEJ := unescapeUnicode(compactEJ.String(), test.BsonType)
   379  						if test.BsonType == "0x01" {
   380  							dEJ = normalizeCanonicalDouble(t, *test.TestKey, dEJ)
   381  						}
   382  
   383  						doc, err = jsonToNative(dEJ, "degenerate canonical", v.Description)
   384  						require.NoError(t, err)
   385  
   386  						// native_to_canonical_extended_json(json_to_native(dEJ)) = cEJ
   387  						nativeToJSON(t, cEJ, doc, v.Description, "degenerate canonical", "cEJ", "json_to_native(dEJ)")
   388  
   389  						// native_to_bson(json_to_native(dEJ)) = cB (unless lossy)
   390  						if v.Lossy == nil || !*v.Lossy {
   391  							nativeToBSON(t, cB, doc, v.Description, "canonical", "json_to_native(dEJ)")
   392  						}
   393  					}
   394  				})
   395  			}
   396  		})
   397  
   398  		t.Run("decode error", func(t *testing.T) {
   399  			for _, d := range test.DecodeErrors {
   400  				t.Run(d.Description, func(t *testing.T) {
   401  					b, err := hex.DecodeString(d.Bson)
   402  					expectNoError(t, err, d.Description)
   403  
   404  					var doc D
   405  					err = Unmarshal(b, &doc)
   406  
   407  					// The driver unmarshals invalid UTF-8 strings without error. Loop over the unmarshalled elements
   408  					// and assert that there was no error if any of the string or DBPointer values contain invalid UTF-8
   409  					// characters.
   410  					for _, elem := range doc {
   411  						value := reflect.ValueOf(elem.Value)
   412  						invalidString := (value.Kind() == reflect.String) && !utf8.ValidString(value.String())
   413  						dbPtr, ok := elem.Value.(primitive.DBPointer)
   414  						invalidDBPtr := ok && !utf8.ValidString(dbPtr.DB)
   415  
   416  						if invalidString || invalidDBPtr {
   417  							expectNoError(t, err, d.Description)
   418  							return
   419  						}
   420  					}
   421  
   422  					expectError(t, err, fmt.Sprintf("%s: expected decode error", d.Description))
   423  				})
   424  			}
   425  		})
   426  
   427  		t.Run("parse error", func(t *testing.T) {
   428  			for _, p := range test.ParseErrors {
   429  				t.Run(p.Description, func(t *testing.T) {
   430  					s := unescapeUnicode(p.String, test.BsonType)
   431  					if test.BsonType == "0x13" {
   432  						s = fmt.Sprintf(`{"decimal128": {"$numberDecimal": "%s"}}`, s)
   433  					}
   434  
   435  					switch test.BsonType {
   436  					case "0x00", "0x05", "0x13":
   437  						var doc D
   438  						err := UnmarshalExtJSON([]byte(s), true, &doc)
   439  						// Null bytes are validated when marshaling to BSON
   440  						if strings.Contains(p.Description, "Null") {
   441  							_, err = Marshal(doc)
   442  						}
   443  						expectError(t, err, fmt.Sprintf("%s: expected parse error", p.Description))
   444  					default:
   445  						t.Errorf("Update test to check for parse errors for type %s", test.BsonType)
   446  						t.Fail()
   447  					}
   448  				})
   449  			}
   450  		})
   451  	})
   452  }
   453  
   454  func Test_BsonCorpus(t *testing.T) {
   455  	jsonFiles, err := findJSONFilesInDir(dataDir)
   456  	if err != nil {
   457  		t.Fatalf("error finding JSON files in %s: %v", dataDir, err)
   458  	}
   459  
   460  	for _, file := range jsonFiles {
   461  		runTest(t, file)
   462  	}
   463  }
   464  
   465  func expectNoError(t *testing.T, err error, desc string) {
   466  	if err != nil {
   467  		t.Helper()
   468  		t.Errorf("%s: Unepexted error: %v", desc, err)
   469  		t.FailNow()
   470  	}
   471  }
   472  
   473  func expectError(t *testing.T, err error, desc string) {
   474  	if err == nil {
   475  		t.Helper()
   476  		t.Errorf("%s: Expected error", desc)
   477  		t.FailNow()
   478  	}
   479  }
   480  
   481  func TestRelaxedUUIDValidation(t *testing.T) {
   482  	testCases := []struct {
   483  		description       string
   484  		canonicalExtJSON  string
   485  		degenerateExtJSON string
   486  		expectedErr       string
   487  	}{
   488  		{
   489  			"valid uuid",
   490  			"{\"x\" : { \"$binary\" : {\"base64\" : \"c//SZESzTGmQ6OfR38A11A==\", \"subType\" : \"04\"}}}",
   491  			"{\"x\" : { \"$uuid\" : \"73ffd264-44b3-4c69-90e8-e7d1dfc035d4\"}}",
   492  			"",
   493  		},
   494  		{
   495  			"invalid uuid--no hyphens",
   496  			"",
   497  			"{\"x\" : { \"$uuid\" : \"73ffd26444b34c6990e8e7d1dfc035d4\"}}",
   498  			"$uuid value does not follow RFC 4122 format regarding length and hyphens",
   499  		},
   500  		{
   501  			"invalid uuid--trailing hyphens",
   502  			"",
   503  			"{\"x\" : { \"$uuid\" : \"73ffd264-44b3-4c69-90e8-e7d1dfc035--\"}}",
   504  			"$uuid value does not follow RFC 4122 format regarding length and hyphens",
   505  		},
   506  		{
   507  			"invalid uuid--malformed hex",
   508  			"",
   509  			"{\"x\" : { \"$uuid\" : \"q3@fd26l-44b3-4c69-90e8-e7d1dfc035d4\"}}",
   510  			"$uuid value does not follow RFC 4122 format regarding hex bytes: encoding/hex: invalid byte: U+0071 'q'",
   511  		},
   512  	}
   513  
   514  	for _, tc := range testCases {
   515  		t.Run(tc.description, func(t *testing.T) {
   516  			// get canonical extended JSON (if provided)
   517  			cEJ := ""
   518  			if tc.canonicalExtJSON != "" {
   519  				var compactCEJ bytes.Buffer
   520  				require.NoError(t, json.Compact(&compactCEJ, []byte(tc.canonicalExtJSON)))
   521  				cEJ = unescapeUnicode(compactCEJ.String(), "0x05")
   522  			}
   523  
   524  			// get degenerate extended JSON
   525  			var compactDEJ bytes.Buffer
   526  			require.NoError(t, json.Compact(&compactDEJ, []byte(tc.degenerateExtJSON)))
   527  			dEJ := unescapeUnicode(compactDEJ.String(), "0x05")
   528  
   529  			// convert dEJ to native doc
   530  			var doc D
   531  			err := UnmarshalExtJSON([]byte(dEJ), true, &doc)
   532  
   533  			if tc.expectedErr != "" {
   534  				assert.Equal(t, tc.expectedErr, err.Error(), "expected error %v, got %v", tc.expectedErr, err)
   535  			} else {
   536  				assert.Nil(t, err, "expected no error, got error: %v", err)
   537  
   538  				// Marshal doc into extended JSON and compare with cEJ
   539  				nativeToJSON(t, cEJ, doc, tc.description, "degenerate canonical", "cEJ", "json_to_native(dEJ)")
   540  			}
   541  		})
   542  	}
   543  
   544  }
   545  

View as plain text