...

Text file src/github.com/google/flatbuffers/tests/fuzzer/flatbuffers_scalar_fuzzer.cc

Documentation: github.com/google/flatbuffers/tests/fuzzer

     1/*
     2 * Copyright 2014 Google Inc. All rights reserved.
     3 *
     4 * Licensed under the Apache License, Version 2.0 (the "License");
     5 * you may not use this file except in compliance with the License.
     6 * You may obtain a copy of the License at
     7 *
     8 *     http://www.apache.org/licenses/LICENSE-2.0
     9 *
    10 * Unless required by applicable law or agreed to in writing, software
    11 * distributed under the License is distributed on an "AS IS" BASIS,
    12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13 * See the License for the specific language governing permissions and
    14 * limitations under the License.
    15 */
    16
    17#include <assert.h>
    18#include <stddef.h>
    19#include <stdint.h>
    20
    21#include <algorithm>
    22#include <clocale>
    23#include <memory>
    24#include <regex>
    25#include <string>
    26
    27#include "flatbuffers/idl.h"
    28#include "test_init.h"
    29
    30static constexpr size_t kMinInputLength = 1;
    31static constexpr size_t kMaxInputLength = 3000;
    32
    33static constexpr uint8_t flags_scalar_type = 0x0F;  // type of scalar value
    34static constexpr uint8_t flags_quotes_kind = 0x10;  // quote " or '
    35// reserved for future: json {named} or [unnamed]
    36// static constexpr uint8_t flags_json_bracer = 0x20;
    37
    38// Find all 'subj' sub-strings and replace first character of sub-string.
    39// BreakSequence("testest","tes", 'X') -> "XesXest".
    40// BreakSequence("xxx","xx", 'Y') -> "YYx".
    41static void BreakSequence(std::string &s, const char *subj, char repl) {
    42  size_t pos = 0;
    43  while (pos = s.find(subj, pos), pos != std::string::npos) {
    44    s.at(pos) = repl;
    45    pos++;
    46  }
    47}
    48
    49// Remove all leading and trailing symbols matched with pattern set.
    50// StripString("xy{xy}y", "xy") -> "{xy}"
    51static std::string StripString(const std::string &s, const char *pattern,
    52                               size_t *pos = nullptr) {
    53  if (pos) *pos = 0;
    54  // leading
    55  auto first = s.find_first_not_of(pattern);
    56  if (std::string::npos == first) return "";
    57  if (pos) *pos = first;
    58  // trailing
    59  auto last = s.find_last_not_of(pattern);
    60  assert(last < s.length());
    61  assert(first <= last);
    62  return s.substr(first, last - first + 1);
    63}
    64
    65class RegexMatcher {
    66 protected:
    67  virtual bool MatchNumber(const std::string &input) const = 0;
    68
    69 public:
    70  virtual ~RegexMatcher() = default;
    71
    72  struct MatchResult {
    73    size_t pos{ 0 };
    74    size_t len{ 0 };
    75    bool res{ false };
    76    bool quoted{ false };
    77  };
    78
    79  MatchResult Match(const std::string &input) const {
    80    MatchResult r;
    81    // strip leading and trailing "spaces" accepted by flatbuffer
    82    auto test = StripString(input, "\t\r\n ", &r.pos);
    83    r.len = test.size();
    84    // check quotes
    85    if (test.size() >= 2) {
    86      auto fch = test.front();
    87      auto lch = test.back();
    88      r.quoted = (fch == lch) && (fch == '\'' || fch == '\"');
    89      if (r.quoted) {
    90        // remove quotes for regex test
    91        test = test.substr(1, test.size() - 2);
    92      }
    93    }
    94    // Fast check:
    95    if (test.empty()) return r;
    96    // A string with a valid scalar shouldn't have non-ascii or non-printable
    97    // symbols.
    98    for (auto c : test) {
    99      if ((c < ' ') || (c > '~')) return r;
   100    }
   101    // Check with regex
   102    r.res = MatchNumber(test);
   103    return r;
   104  }
   105
   106  bool MatchRegexList(const std::string &input,
   107                      const std::vector<std::regex> &re_list) const {
   108    auto str = StripString(input, " ");
   109    if (str.empty()) return false;
   110    for (auto &re : re_list) {
   111      std::smatch match;
   112      if (std::regex_match(str, match, re)) return true;
   113    }
   114    return false;
   115  }
   116};
   117
   118class IntegerRegex : public RegexMatcher {
   119 protected:
   120  bool MatchNumber(const std::string &input) const override {
   121    static const std::vector<std::regex> re_list = {
   122      std::regex{ R"(^[-+]?[0-9]+$)", std::regex_constants::optimize },
   123
   124      std::regex{ R"(^[-+]?0[xX][0-9a-fA-F]+$)",
   125                  std::regex_constants::optimize }
   126    };
   127    return MatchRegexList(input, re_list);
   128  }
   129
   130 public:
   131  IntegerRegex() = default;
   132  virtual ~IntegerRegex() = default;
   133};
   134
   135class UIntegerRegex : public RegexMatcher {
   136 protected:
   137  bool MatchNumber(const std::string &input) const override {
   138    static const std::vector<std::regex> re_list = {
   139      std::regex{ R"(^[+]?[0-9]+$)", std::regex_constants::optimize },
   140      std::regex{ R"(^[+]?0[xX][0-9a-fA-F]+$)",
   141                  std::regex_constants::optimize },
   142      // accept -0 number
   143      std::regex{ R"(^[-](?:0[xX])?0+$)", std::regex_constants::optimize }
   144    };
   145    return MatchRegexList(input, re_list);
   146  }
   147
   148 public:
   149  UIntegerRegex() = default;
   150  virtual ~UIntegerRegex() = default;
   151};
   152
   153class BooleanRegex : public IntegerRegex {
   154 protected:
   155  bool MatchNumber(const std::string &input) const override {
   156    if (input == "true" || input == "false") return true;
   157    return IntegerRegex::MatchNumber(input);
   158  }
   159
   160 public:
   161  BooleanRegex() = default;
   162  virtual ~BooleanRegex() = default;
   163};
   164
   165class FloatRegex : public RegexMatcher {
   166 protected:
   167  bool MatchNumber(const std::string &input) const override {
   168    static const std::vector<std::regex> re_list = {
   169      // hex-float
   170      std::regex{
   171          R"(^[-+]?0[xX](?:(?:[.][0-9a-fA-F]+)|(?:[0-9a-fA-F]+[.][0-9a-fA-F]*)|(?:[0-9a-fA-F]+))[pP][-+]?[0-9]+$)",
   172          std::regex_constants::optimize },
   173      // dec-float
   174      std::regex{
   175          R"(^[-+]?(?:(?:[.][0-9]+)|(?:[0-9]+[.][0-9]*)|(?:[0-9]+))(?:[eE][-+]?[0-9]+)?$)",
   176          std::regex_constants::optimize },
   177
   178      std::regex{ R"(^[-+]?(?:nan|inf|infinity)$)",
   179                  std::regex_constants::optimize | std::regex_constants::icase }
   180    };
   181    return MatchRegexList(input, re_list);
   182  }
   183
   184 public:
   185  FloatRegex() = default;
   186  virtual ~FloatRegex() = default;
   187};
   188
   189class ScalarReferenceResult {
   190 private:
   191  ScalarReferenceResult(const char *_type, RegexMatcher::MatchResult _matched)
   192      : type(_type), matched(_matched) {}
   193
   194 public:
   195  // Decode scalar type and check if the input string satisfies the scalar type.
   196  static ScalarReferenceResult Check(uint8_t code, const std::string &input) {
   197    switch (code) {
   198      case 0x0: return { "double", FloatRegex().Match(input) };
   199      case 0x1: return { "float", FloatRegex().Match(input) };
   200      case 0x2: return { "int8", IntegerRegex().Match(input) };
   201      case 0x3: return { "int16", IntegerRegex().Match(input) };
   202      case 0x4: return { "int32", IntegerRegex().Match(input) };
   203      case 0x5: return { "int64", IntegerRegex().Match(input) };
   204      case 0x6: return { "uint8", UIntegerRegex().Match(input) };
   205      case 0x7: return { "uint16", UIntegerRegex().Match(input) };
   206      case 0x8: return { "uint32", UIntegerRegex().Match(input) };
   207      case 0x9: return { "uint64", UIntegerRegex().Match(input) };
   208      case 0xA: return { "bool", BooleanRegex().Match(input) };
   209      default: return { "float", FloatRegex().Match(input) };
   210    };
   211  }
   212
   213  const char *type;
   214  const RegexMatcher::MatchResult matched;
   215};
   216
   217bool Parse(flatbuffers::Parser &parser, const std::string &json,
   218           std::string *_text) {
   219  auto done = parser.ParseJson(json.c_str());
   220  if (done) {
   221    TEST_NULL(GenText(parser, parser.builder_.GetBufferPointer(), _text));
   222  } else {
   223    *_text = parser.error_;
   224  }
   225  return done;
   226}
   227
   228// Utility for test run.
   229OneTimeTestInit OneTimeTestInit::one_time_init_;
   230
   231// llvm std::regex have problem with stack overflow, limit maximum length.
   232// ./scalar_fuzzer -max_len=3000
   233extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
   234  // Reserve one byte for Parser flags and one byte for repetition counter.
   235  if (size < 3) return 0;
   236  const uint8_t flags = data[0];
   237  // normalize to ascii alphabet
   238  const int extra_rep_number =
   239      std::max(5, (data[1] > '0' ? (data[1] - '0') : 0));
   240  data += 2;
   241  size -= 2;  // bypass
   242
   243  // Guarantee 0-termination.
   244  const std::string original(reinterpret_cast<const char *>(data), size);
   245  auto input = std::string(original.c_str());  // until '\0'
   246  if (input.size() < kMinInputLength || input.size() > kMaxInputLength)
   247    return 0;
   248
   249  // Break comments in json to avoid complexity with regex matcher.
   250  // The string " 12345 /* text */" will be accepted if insert it to string
   251  // expression: "table X { Y: " + " 12345 /* text */" + "; }.
   252  // But strings like this will complicate regex matcher.
   253  // We reject this by transform "/* text */ 12345" to "@* text */ 12345".
   254  BreakSequence(input, "//", '@');  // "//" -> "@/"
   255  BreakSequence(input, "/*", '@');  // "/*" -> "@*"
   256  // { "$schema: "text" } is exceptional case.
   257  // This key:value ignored by the parser. Numbers can not have $.
   258  BreakSequence(input, "$schema", '@');  // "$schema" -> "@schema"
   259  // Break all known scalar functions (todo: add them to regex?):
   260  for (auto f : { "deg", "rad", "sin", "cos", "tan", "asin", "acos", "atan" }) {
   261    BreakSequence(input, f, '_');  // ident -> ident
   262  }
   263
   264  // Extract type of scalar from 'flags' and check if the input string satisfies
   265  // the scalar type.
   266  const auto ref_res =
   267      ScalarReferenceResult::Check(flags & flags_scalar_type, input);
   268  auto &recheck = ref_res.matched;
   269
   270  // Create parser
   271  flatbuffers::IDLOptions opts;
   272  opts.force_defaults = true;
   273  opts.output_default_scalars_in_json = true;
   274  opts.indent_step = -1;
   275  opts.strict_json = true;
   276
   277  flatbuffers::Parser parser(opts);
   278  auto schema =
   279      "table X { Y: " + std::string(ref_res.type) + "; } root_type X;";
   280  TEST_EQ_FUNC(parser.Parse(schema.c_str()), true);
   281
   282  // The fuzzer can adjust the number repetition if a side-effects have found.
   283  // Each test should pass at least two times to ensure that the parser doesn't
   284  // have any hidden-states or locale-depended effects.
   285  for (auto cnt = 0; cnt < (extra_rep_number + 2); cnt++) {
   286    // Each even run (0,2,4..) will test locale independed code.
   287    auto use_locale = !!OneTimeTestInit::test_locale() && (0 == (cnt % 2));
   288    // Set new locale.
   289    if (use_locale) {
   290      FLATBUFFERS_ASSERT(setlocale(LC_ALL, OneTimeTestInit::test_locale()));
   291    }
   292
   293    // Parse original input as-is.
   294    auto orig_scalar = "{\"Y\" : " + input + "}";
   295    std::string orig_back;
   296    auto orig_done = Parse(parser, orig_scalar, &orig_back);
   297
   298    if (recheck.res != orig_done) {
   299      // look for "does not fit" or "doesn't fit" or "out of range"
   300      auto not_fit =
   301          (true == recheck.res)
   302              ? ((orig_back.find("does not fit") != std::string::npos) ||
   303                 (orig_back.find("out of range") != std::string::npos))
   304              : false;
   305
   306      if (false == not_fit) {
   307        TEST_OUTPUT_LINE("Stage 1 failed: Parser(%d) != Regex(%d)", orig_done,
   308                         recheck.res);
   309        TEST_EQ_STR(orig_back.c_str(),
   310                    input.substr(recheck.pos, recheck.len).c_str());
   311        TEST_EQ_FUNC(orig_done, recheck.res);
   312      }
   313    }
   314
   315    // Try to make quoted string and test it.
   316    std::string qouted_input;
   317    if (true == recheck.quoted) {
   318      // we can't simply remove quotes, they may be nested "'12'".
   319      // Original string "\'12\'" converted to "'12'".
   320      // The string can be an invalid string by JSON rules, but after quotes
   321      // removed can transform to valid.
   322      assert(recheck.len >= 2);
   323    } else {
   324      const auto quote = (flags & flags_quotes_kind) ? '\"' : '\'';
   325      qouted_input = input;  // copy
   326      qouted_input.insert(recheck.pos + recheck.len, 1, quote);
   327      qouted_input.insert(recheck.pos, 1, quote);
   328    }
   329
   330    // Test quoted version of the string
   331    if (!qouted_input.empty()) {
   332      auto fix_scalar = "{\"Y\" : " + qouted_input + "}";
   333      std::string fix_back;
   334      auto fix_done = Parse(parser, fix_scalar, &fix_back);
   335
   336      if (orig_done != fix_done) {
   337        TEST_OUTPUT_LINE("Stage 2 failed: Parser(%d) != Regex(%d)", fix_done,
   338                         orig_done);
   339        TEST_EQ_STR(fix_back.c_str(), orig_back.c_str());
   340      }
   341      if (orig_done) { TEST_EQ_STR(fix_back.c_str(), orig_back.c_str()); }
   342      TEST_EQ_FUNC(fix_done, orig_done);
   343    }
   344
   345    // Create new parser and test default value
   346    if (true == orig_done) {
   347      flatbuffers::Parser def_parser(opts);  // re-use options
   348      auto def_schema = "table X { Y: " + std::string(ref_res.type) + " = " +
   349                        input + "; } root_type X;" +
   350                        "{}";  // <- with empty json {}!
   351
   352      auto def_done = def_parser.Parse(def_schema.c_str());
   353      if (false == def_done) {
   354        TEST_OUTPUT_LINE("Stage 3.1 failed with _error = %s",
   355                         def_parser.error_.c_str());
   356        FLATBUFFERS_ASSERT(false);
   357      }
   358      // Compare with print.
   359      std::string ref_string, def_string;
   360      FLATBUFFERS_ASSERT(!GenText(
   361          parser, parser.builder_.GetBufferPointer(), &ref_string));
   362      FLATBUFFERS_ASSERT(!GenText(
   363          def_parser, def_parser.builder_.GetBufferPointer(), &def_string));
   364      if (ref_string != def_string) {
   365        TEST_OUTPUT_LINE("Stage 3.2 failed: '%s' != '%s'", def_string.c_str(),
   366                         ref_string.c_str());
   367        FLATBUFFERS_ASSERT(false);
   368      }
   369    }
   370
   371    // Restore locale.
   372    if (use_locale) { FLATBUFFERS_ASSERT(setlocale(LC_ALL, "C")); }
   373  }
   374  return 0;
   375}

View as plain text