Add a multi-file input mode for test fuzzers

The test_fuzzers.py script is very slow especially on CI. Add a mode to the test fuzzers where it will accept many files on the command line and test each of them in turn. This is 100s of times faster, as it avoids all overhead from fork/exec. It has the downside that you can't tell which input caused a crash, so retain the old mode with --one-at-a-time option for debugging work.
author: Jack Lloyd <[email protected]> 2018-12-23 18:14:52 -0500
committer: Jack Lloyd <[email protected]> 2018-12-23 18:14:52 -0500
commit: ef2c3d7d01ffdeb1b29c439b9ec0348302170e00 (patch)
tree: bfa831022451bda44fd290f284bb3363b9c6a6af /src
parent: b914ec97ebe4dd207ab15cbc6f65256c3b147b08 (diff)
3 files changed, 105 insertions, 24 deletions
diff --git a/src/fuzzer/fuzzers.h b/src/fuzzer/fuzzers.h
index 91a8b8cdc..8248a4f58 100644
--- a/src/fuzzer/fuzzers.h
+++ b/src/fuzzer/fuzzers.h
@@ -72,10 +72,48 @@ inline Botan::RandomNumberGenerator& fuzzer_rng()
    #error "Build configured for AFL but not being compiled by AFL compiler"
 #endif
 
+#if defined(BOTAN_FUZZER_IS_TEST)
+
+#include <fstream>
+
+namespace {
+
+int fuzz_files(char* files[])
+   {
+   for(size_t i = 0; files[i]; ++i)
+      {
+      std::ifstream in(files[i]);
+
+      if(in.good())
+         {
+         std::vector<uint8_t> buf(max_fuzzer_input_size);
+         in.read((char*)buf.data(), buf.size());
+         const size_t got = std::cin.gcount();
+         buf.resize(got);
+         buf.shrink_to_fit();
+
+         LLVMFuzzerTestOneInput(buf.data(), got);
+         }
+      }
+
+   return 0;
+   }
+
+}
+
+#endif
+
 int main(int argc, char* argv[])
    {
    LLVMFuzzerInitialize(&argc, &argv);
 
+#if defined(BOTAN_FUZZER_IS_TEST)
+   if(argc > 1)
+      {
+      return fuzz_files(&argv[1]);
+      }
+#endif
+
 #if defined(__AFL_LOOP)
    while(__AFL_LOOP(1000))
 #endif
diff --git a/src/scripts/ci_build.py b/src/scripts/ci_build.py
index 9f6a01e4c..1244d4b47 100755
--- a/src/scripts/ci_build.py
+++ b/src/scripts/ci_build.py
@@ -419,6 +419,7 @@ def main(args=None):
             'src/scripts/website.py',
             'src/scripts/bench.py',
             'src/scripts/test_python.py',
+            'src/scripts/test_fuzzers.py',
             'src/scripts/test_cli.py',
             'src/scripts/python_unittests.py',
             'src/scripts/python_unittests_unix.py']
diff --git a/src/scripts/test_fuzzers.py b/src/scripts/test_fuzzers.py
index 167f6dc41..75d98abe0 100755
--- a/src/scripts/test_fuzzers.py
+++ b/src/scripts/test_fuzzers.py
@@ -5,7 +5,7 @@
 import sys
 import os
 import subprocess
-import optparse
+import optparse # pylint: disable=deprecated-module
 import stat
 import multiprocessing
 import time
@@ -37,7 +37,17 @@ def run_fuzzer(args):
     corpus_fd.close()
     return (corpus_file, fuzzer_proc.returncode, stdout.decode('ascii'), stderr.decode('ascii'))
 
+def run_fuzzer_many_files(fuzzer_bin, corpus_files):
+    fuzzer_proc = subprocess.Popen([fuzzer_bin] + corpus_files, stdin=None,
+                                   stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
+    (stdout, stderr) = fuzzer_proc.communicate()
+    return (fuzzer_proc.returncode, stdout.decode('ascii'), stderr.decode('ascii'))
+
 def main(args=None):
+    #pylint: disable=too-many-branches
+    #pylint: disable=too-many-statements
+    #pylint: disable=too-many-locals
+
     if args is None:
         args = sys.argv
 
@@ -48,12 +58,19 @@ def main(args=None):
     parser.add_option('--gdb', action='store_true',
                       help='Run under GDB and capture backtraces')
 
+    parser.add_option('--one-at-a-time', action='store_true', default=False,
+                      help='Test one corpus input at a time')
+
     (options, args) = parser.parse_args(args)
 
     if len(args) != 3:
         parser.print_usage()
         return 1
 
+    if options.gdb and not options.one_at_a_time:
+        print("Option --gdb requires --one-at-a-time")
+        return 1
+
     corpus_dir = args[1]
     fuzzer_dir = args[2]
 
@@ -96,46 +113,71 @@ def main(args=None):
     stderr_count = 0
     stdout_count = 0
 
-    gdb_commands = None
+    if options.one_at_a_time:
+        pool = multiprocessing.Pool(multiprocessing.cpu_count() * 2)
+        chunk_size = 32 # arbitrary
+
+        run_fuzzer_func = run_fuzzer_gdb if options.gdb else run_fuzzer
+
+        for fuzzer in sorted(list(fuzzers_with_corpus)):
+            fuzzer_bin = os.path.join(fuzzer_dir, fuzzer)
+            corpus_subdir = os.path.join(corpus_dir, fuzzer)
+            corpus_files = [os.path.join(corpus_subdir, l) for l in sorted(list(os.listdir(corpus_subdir)))]
+
+            # We have to do this hack because multiprocessing's Pool.map doesn't support
+            # passing any initial arguments, just the single iteratable
+            map_args = [(fuzzer_bin, f) for f in corpus_files]
+
+            start = time.time()
+
+            for result in pool.map(run_fuzzer_func, map_args, chunk_size):
+                (corpus_file, retcode, stdout, stderr) = result
 
-    pool = multiprocessing.Pool(multiprocessing.cpu_count() * 2)
-    chunk_size = 32 # arbitrary
+                if retcode != 0:
+                    print("Fuzzer %s crashed with input %s returncode %d" % (fuzzer, corpus_file, retcode))
+                    crash_count += 1
 
-    run_fuzzer_func = run_fuzzer_gdb if options.gdb else run_fuzzer
+                if stdout:
+                    print("Fuzzer %s produced stdout on input %s:\n%s" % (fuzzer, corpus_file, stdout))
+                    stdout_count += 1
 
-    for fuzzer in sorted(list(fuzzers_with_corpus)):
-        fuzzer_bin = os.path.join(fuzzer_dir, fuzzer)
-        corpus_subdir = os.path.join(corpus_dir, fuzzer)
-        corpus_files = [os.path.join(corpus_subdir, l) for l in sorted(list(os.listdir(corpus_subdir)))]
+                if stderr:
+                    print("Fuzzer %s produced stderr on input %s:\n%s" % (fuzzer, corpus_file, stderr))
+                    stderr_count += 1
 
-        # We have to do this hack because multiprocessing's Pool.map doesn't support
-        # passing any initial arguments, just the single iteratable
-        map_args = [(fuzzer_bin, f) for f in corpus_files]
+            duration = time.time() - start
+            print("Tested fuzzer %s with %d test cases, %d crashes in %.02f seconds" % (
+                fuzzer, len(corpus_files), crash_count, duration))
+            sys.stdout.flush()
+    else:
+        for fuzzer in sorted(list(fuzzers_with_corpus)):
+            fuzzer_bin = os.path.join(fuzzer_dir, fuzzer)
+            corpus_subdir = os.path.join(corpus_dir, fuzzer)
+            corpus_files = [os.path.join(corpus_subdir, l) for l in sorted(list(os.listdir(corpus_subdir)))]
 
-        start = time.time()
+            start = time.time()
 
-        for result in pool.map(run_fuzzer_func, map_args, chunk_size):
-            (corpus_file, retcode, stdout, stderr) = result
+            (retcode, stdout, stderr) = run_fuzzer_many_files(fuzzer_bin, corpus_files)
 
             if retcode != 0:
-                print("Fuzzer %s crashed with input %s returncode %d" % (fuzzer, corpus_file, retcode))
+                print("Fuzzer %s crashed returncode %d" % (fuzzer, retcode))
                 crash_count += 1
 
-            if len(stdout) != 0:
-                print("Fuzzer %s produced stdout on input %s:\n%s" % (fuzzer, corpus_file, stdout))
+            if stdout:
+                print("Fuzzer %s produced stdout:\n%s" % (fuzzer, stdout))
                 stdout_count += 1
 
-            if len(stderr) != 0:
-                print("Fuzzer %s produced stderr on input %s:\n%s" % (fuzzer, corpus_file, stderr))
+            if stderr:
+                print("Fuzzer %s produced stderr:\n%s" % (fuzzer, stderr))
                 stderr_count += 1
 
-        duration = time.time() - start
-        print("Tested fuzzer %s with %d test cases, %d crashes in %.02f seconds" % (fuzzer, len(corpus_files), crash_count, duration))
-        sys.stdout.flush()
+            duration = time.time() - start
 
+            print("Tested fuzzer %s with %d test cases, %d crashes in %.02f seconds" % (
+                fuzzer, len(corpus_files), crash_count, duration))
 
     if crash_count > 0 or stderr_count > 0 or stdout_count > 0:
-        print("Ran fuzzer tests, %d crashes %d stdout %d stderr" % (crash_count, stdout_count, stderr_count, duration))
+        print("Ran fuzzer tests, %d crashes %d stdout %d stderr" % (crash_count, stdout_count, stderr_count))
         return 2
     return 0
author	Jack Lloyd <[email protected]>	2018-12-23 18:14:52 -0500
committer	Jack Lloyd <[email protected]>	2018-12-23 18:14:52 -0500
commit	ef2c3d7d01ffdeb1b29c439b9ec0348302170e00 (patch)
tree	bfa831022451bda44fd290f284bb3363b9c6a6af /src
parent	b914ec97ebe4dd207ab15cbc6f65256c3b147b08 (diff)