...

Text file src/github.com/google/flatbuffers/tests/fuzz_test.cpp

Documentation: github.com/google/flatbuffers/tests

     1#include "fuzz_test.h"
     2
     3#include <algorithm>
     4
     5#include "flatbuffers/flatbuffers.h"
     6#include "flatbuffers/idl.h"
     7#include "test_assert.h"
     8
     9namespace flatbuffers {
    10namespace tests {
    11namespace {
    12
    13// Include simple random number generator to ensure results will be the
    14// same cross platform.
    15// http://en.wikipedia.org/wiki/Park%E2%80%93Miller_random_number_generator
    16uint32_t lcg_seed = 48271;
    17uint32_t lcg_rand() {
    18  return lcg_seed =
    19             (static_cast<uint64_t>(lcg_seed) * 279470273UL) % 4294967291UL;
    20}
    21void lcg_reset() { lcg_seed = 48271; }
    22
    23template<typename T>
    24static void CompareTableFieldValue(flatbuffers::Table *table,
    25                                   flatbuffers::voffset_t voffset, T val) {
    26  T read = table->GetField(voffset, static_cast<T>(0));
    27  TEST_EQ(read, val);
    28}
    29
    30}  // namespace
    31
    32// Low level stress/fuzz test: serialize/deserialize a variety of
    33// different kinds of data in different combinations
    34void FuzzTest1() {
    35  // Values we're testing against: chosen to ensure no bits get chopped
    36  // off anywhere, and also be different from eachother.
    37  const uint8_t bool_val = true;
    38  const int8_t char_val = -127;  // 0x81
    39  const uint8_t uchar_val = 0xFF;
    40  const int16_t short_val = -32222;  // 0x8222;
    41  const uint16_t ushort_val = 0xFEEE;
    42  const int32_t int_val = 0x83333333;
    43  const uint32_t uint_val = 0xFDDDDDDD;
    44  const int64_t long_val = 0x8444444444444444LL;
    45  const uint64_t ulong_val = 0xFCCCCCCCCCCCCCCCULL;
    46  const float float_val = 3.14159f;
    47  const double double_val = 3.14159265359;
    48
    49  const int test_values_max = 11;
    50  const flatbuffers::voffset_t fields_per_object = 4;
    51  const int num_fuzz_objects = 10000;  // The higher, the more thorough :)
    52
    53  flatbuffers::FlatBufferBuilder builder;
    54
    55  lcg_reset();  // Keep it deterministic.
    56
    57  flatbuffers::uoffset_t objects[num_fuzz_objects];
    58
    59  // Generate num_fuzz_objects random objects each consisting of
    60  // fields_per_object fields, each of a random type.
    61  for (int i = 0; i < num_fuzz_objects; i++) {
    62    auto start = builder.StartTable();
    63    for (flatbuffers::voffset_t f = 0; f < fields_per_object; f++) {
    64      int choice = lcg_rand() % test_values_max;
    65      auto off = flatbuffers::FieldIndexToOffset(f);
    66      switch (choice) {
    67        case 0: builder.AddElement<uint8_t>(off, bool_val, 0); break;
    68        case 1: builder.AddElement<int8_t>(off, char_val, 0); break;
    69        case 2: builder.AddElement<uint8_t>(off, uchar_val, 0); break;
    70        case 3: builder.AddElement<int16_t>(off, short_val, 0); break;
    71        case 4: builder.AddElement<uint16_t>(off, ushort_val, 0); break;
    72        case 5: builder.AddElement<int32_t>(off, int_val, 0); break;
    73        case 6: builder.AddElement<uint32_t>(off, uint_val, 0); break;
    74        case 7: builder.AddElement<int64_t>(off, long_val, 0); break;
    75        case 8: builder.AddElement<uint64_t>(off, ulong_val, 0); break;
    76        case 9: builder.AddElement<float>(off, float_val, 0); break;
    77        case 10: builder.AddElement<double>(off, double_val, 0); break;
    78      }
    79    }
    80    objects[i] = builder.EndTable(start);
    81  }
    82  builder.PreAlign<flatbuffers::largest_scalar_t>(0);  // Align whole buffer.
    83
    84  lcg_reset();  // Reset.
    85
    86  uint8_t *eob = builder.GetCurrentBufferPointer() + builder.GetSize();
    87
    88  // Test that all objects we generated are readable and return the
    89  // expected values. We generate random objects in the same order
    90  // so this is deterministic.
    91  for (int i = 0; i < num_fuzz_objects; i++) {
    92    auto table = reinterpret_cast<flatbuffers::Table *>(eob - objects[i]);
    93    for (flatbuffers::voffset_t f = 0; f < fields_per_object; f++) {
    94      int choice = lcg_rand() % test_values_max;
    95      flatbuffers::voffset_t off = flatbuffers::FieldIndexToOffset(f);
    96      switch (choice) {
    97        case 0: CompareTableFieldValue(table, off, bool_val); break;
    98        case 1: CompareTableFieldValue(table, off, char_val); break;
    99        case 2: CompareTableFieldValue(table, off, uchar_val); break;
   100        case 3: CompareTableFieldValue(table, off, short_val); break;
   101        case 4: CompareTableFieldValue(table, off, ushort_val); break;
   102        case 5: CompareTableFieldValue(table, off, int_val); break;
   103        case 6: CompareTableFieldValue(table, off, uint_val); break;
   104        case 7: CompareTableFieldValue(table, off, long_val); break;
   105        case 8: CompareTableFieldValue(table, off, ulong_val); break;
   106        case 9: CompareTableFieldValue(table, off, float_val); break;
   107        case 10: CompareTableFieldValue(table, off, double_val); break;
   108      }
   109    }
   110  }
   111}
   112
   113// High level stress/fuzz test: generate a big schema and
   114// matching json data in random combinations, then parse both,
   115// generate json back from the binary, and compare with the original.
   116void FuzzTest2() {
   117  lcg_reset();  // Keep it deterministic.
   118
   119  const int num_definitions = 30;
   120  const int num_struct_definitions = 5;  // Subset of num_definitions.
   121  const int fields_per_definition = 15;
   122  const int instances_per_definition = 5;
   123  const int deprecation_rate = 10;  // 1 in deprecation_rate fields will
   124                                    // be deprecated.
   125
   126  std::string schema = "namespace test;\n\n";
   127
   128  struct RndDef {
   129    std::string instances[instances_per_definition];
   130
   131    // Since we're generating schema and corresponding data in tandem,
   132    // this convenience function adds strings to both at once.
   133    static void Add(RndDef (&definitions_l)[num_definitions],
   134                    std::string &schema_l, const int instances_per_definition_l,
   135                    const char *schema_add, const char *instance_add,
   136                    int definition) {
   137      schema_l += schema_add;
   138      for (int i = 0; i < instances_per_definition_l; i++)
   139        definitions_l[definition].instances[i] += instance_add;
   140    }
   141  };
   142
   143  // clang-format off
   144  #define AddToSchemaAndInstances(schema_add, instance_add) \
   145    RndDef::Add(definitions, schema, instances_per_definition, \
   146                schema_add, instance_add, definition)
   147
   148  #define Dummy() \
   149    RndDef::Add(definitions, schema, instances_per_definition, \
   150                "byte", "1", definition)
   151  // clang-format on
   152
   153  RndDef definitions[num_definitions];
   154
   155  // We are going to generate num_definitions, the first
   156  // num_struct_definitions will be structs, the rest tables. For each
   157  // generate random fields, some of which may be struct/table types
   158  // referring to previously generated structs/tables.
   159  // Simultanenously, we generate instances_per_definition JSON data
   160  // definitions, which will have identical structure to the schema
   161  // being generated. We generate multiple instances such that when creating
   162  // hierarchy, we get some variety by picking one randomly.
   163  for (int definition = 0; definition < num_definitions; definition++) {
   164    std::string definition_name = "D" + flatbuffers::NumToString(definition);
   165
   166    bool is_struct = definition < num_struct_definitions;
   167
   168    AddToSchemaAndInstances(
   169        ((is_struct ? "struct " : "table ") + definition_name + " {\n").c_str(),
   170        "{\n");
   171
   172    for (int field = 0; field < fields_per_definition; field++) {
   173      const bool is_last_field = field == fields_per_definition - 1;
   174
   175      // Deprecate 1 in deprecation_rate fields. Only table fields can be
   176      // deprecated.
   177      // Don't deprecate the last field to avoid dangling commas in JSON.
   178      const bool deprecated =
   179          !is_struct && !is_last_field && (lcg_rand() % deprecation_rate == 0);
   180
   181      std::string field_name = "f" + flatbuffers::NumToString(field);
   182      AddToSchemaAndInstances(("  " + field_name + ":").c_str(),
   183                              deprecated ? "" : (field_name + ": ").c_str());
   184      // Pick random type:
   185      auto base_type = static_cast<flatbuffers::BaseType>(
   186          lcg_rand() % (flatbuffers::BASE_TYPE_UNION + 1));
   187      switch (base_type) {
   188        case flatbuffers::BASE_TYPE_STRING:
   189          if (is_struct) {
   190            Dummy();  // No strings in structs.
   191          } else {
   192            AddToSchemaAndInstances("string", deprecated ? "" : "\"hi\"");
   193          }
   194          break;
   195        case flatbuffers::BASE_TYPE_VECTOR:
   196          if (is_struct) {
   197            Dummy();  // No vectors in structs.
   198          } else {
   199            AddToSchemaAndInstances("[ubyte]",
   200                                    deprecated ? "" : "[\n0,\n1,\n255\n]");
   201          }
   202          break;
   203        case flatbuffers::BASE_TYPE_NONE:
   204        case flatbuffers::BASE_TYPE_UTYPE:
   205        case flatbuffers::BASE_TYPE_STRUCT:
   206        case flatbuffers::BASE_TYPE_UNION:
   207          if (definition) {
   208            // Pick a random previous definition and random data instance of
   209            // that definition.
   210            int defref = lcg_rand() % definition;
   211            int instance = lcg_rand() % instances_per_definition;
   212            AddToSchemaAndInstances(
   213                ("D" + flatbuffers::NumToString(defref)).c_str(),
   214                deprecated ? ""
   215                           : definitions[defref].instances[instance].c_str());
   216          } else {
   217            // If this is the first definition, we have no definition we can
   218            // refer to.
   219            Dummy();
   220          }
   221          break;
   222        case flatbuffers::BASE_TYPE_BOOL:
   223          AddToSchemaAndInstances(
   224              "bool", deprecated ? "" : (lcg_rand() % 2 ? "true" : "false"));
   225          break;
   226        case flatbuffers::BASE_TYPE_ARRAY:
   227          if (!is_struct) {
   228            AddToSchemaAndInstances(
   229                "ubyte",
   230                deprecated ? "" : "255");  // No fixed-length arrays in tables.
   231          } else {
   232            AddToSchemaAndInstances("[int:3]", deprecated ? "" : "[\n,\n,\n]");
   233          }
   234          break;
   235        default:
   236          // All the scalar types.
   237          schema += flatbuffers::TypeName(base_type);
   238
   239          if (!deprecated) {
   240            // We want each instance to use its own random value.
   241            for (int inst = 0; inst < instances_per_definition; inst++)
   242              definitions[definition].instances[inst] +=
   243                  flatbuffers::IsFloat(base_type)
   244                      ? flatbuffers::NumToString<double>(lcg_rand() % 128)
   245                            .c_str()
   246                      : flatbuffers::NumToString<int>(lcg_rand() % 128).c_str();
   247          }
   248      }
   249      AddToSchemaAndInstances(deprecated ? "(deprecated);\n" : ";\n",
   250                              deprecated      ? ""
   251                              : is_last_field ? "\n"
   252                                              : ",\n");
   253    }
   254    AddToSchemaAndInstances("}\n\n", "}");
   255  }
   256
   257  schema += "root_type D" + flatbuffers::NumToString(num_definitions - 1);
   258  schema += ";\n";
   259
   260  flatbuffers::Parser parser;
   261
   262  // Will not compare against the original if we don't write defaults
   263  parser.builder_.ForceDefaults(true);
   264
   265  // Parse the schema, parse the generated data, then generate text back
   266  // from the binary and compare against the original.
   267  TEST_EQ(parser.Parse(schema.c_str()), true);
   268
   269  const std::string &json =
   270      definitions[num_definitions - 1].instances[0] + "\n";
   271
   272  TEST_EQ(parser.Parse(json.c_str()), true);
   273
   274  std::string jsongen;
   275  parser.opts.indent_step = 0;
   276  auto result = GenText(parser, parser.builder_.GetBufferPointer(), &jsongen);
   277  TEST_NULL(result);
   278
   279  if (jsongen != json) {
   280    // These strings are larger than a megabyte, so we show the bytes around
   281    // the first bytes that are different rather than the whole string.
   282    size_t len = std::min(json.length(), jsongen.length());
   283    for (size_t i = 0; i < len; i++) {
   284      if (json[i] != jsongen[i]) {
   285        i -= std::min(static_cast<size_t>(10), i);  // show some context;
   286        size_t end = std::min(len, i + 20);
   287        for (; i < end; i++)
   288          TEST_OUTPUT_LINE("at %d: found \"%c\", expected \"%c\"\n",
   289                           static_cast<int>(i), jsongen[i], json[i]);
   290        break;
   291      }
   292    }
   293    TEST_NOTNULL(nullptr);  //-V501 (this comment suppresses CWE-570 warning)
   294  }
   295
   296  // clang-format off
   297  #ifdef FLATBUFFERS_TEST_VERBOSE
   298    TEST_OUTPUT_LINE("%dk schema tested with %dk of json\n",
   299                     static_cast<int>(schema.length() / 1024),
   300                     static_cast<int>(json.length() / 1024));
   301  #endif
   302  // clang-format on
   303}
   304
   305}  // namespace tests
   306}  // namespace flatbuffers

View as plain text