diff options
author | Jack Lloyd <[email protected]> | 2018-12-23 18:14:52 -0500 |
---|---|---|
committer | Jack Lloyd <[email protected]> | 2018-12-23 18:14:52 -0500 |
commit | ef2c3d7d01ffdeb1b29c439b9ec0348302170e00 (patch) | |
tree | bfa831022451bda44fd290f284bb3363b9c6a6af /src | |
parent | b914ec97ebe4dd207ab15cbc6f65256c3b147b08 (diff) |
Add a multi-file input mode for test fuzzers
The test_fuzzers.py script is very slow especially on CI. Add a mode
to the test fuzzers where it will accept many files on the command
line and test each of them in turn. This is 100s of times faster,
as it avoids all overhead from fork/exec.
It has the downside that you can't tell which input caused a crash, so
retain the old mode with --one-at-a-time option for debugging work.
Diffstat (limited to 'src')
-rw-r--r-- | src/fuzzer/fuzzers.h | 38 | ||||
-rwxr-xr-x | src/scripts/ci_build.py | 1 | ||||
-rwxr-xr-x | src/scripts/test_fuzzers.py | 90 |
3 files changed, 105 insertions, 24 deletions
diff --git a/src/fuzzer/fuzzers.h b/src/fuzzer/fuzzers.h index 91a8b8cdc..8248a4f58 100644 --- a/src/fuzzer/fuzzers.h +++ b/src/fuzzer/fuzzers.h @@ -72,10 +72,48 @@ inline Botan::RandomNumberGenerator& fuzzer_rng() #error "Build configured for AFL but not being compiled by AFL compiler" #endif +#if defined(BOTAN_FUZZER_IS_TEST) + +#include <fstream> + +namespace { + +int fuzz_files(char* files[]) + { + for(size_t i = 0; files[i]; ++i) + { + std::ifstream in(files[i]); + + if(in.good()) + { + std::vector<uint8_t> buf(max_fuzzer_input_size); + in.read((char*)buf.data(), buf.size()); + const size_t got = std::cin.gcount(); + buf.resize(got); + buf.shrink_to_fit(); + + LLVMFuzzerTestOneInput(buf.data(), got); + } + } + + return 0; + } + +} + +#endif + int main(int argc, char* argv[]) { LLVMFuzzerInitialize(&argc, &argv); +#if defined(BOTAN_FUZZER_IS_TEST) + if(argc > 1) + { + return fuzz_files(&argv[1]); + } +#endif + #if defined(__AFL_LOOP) while(__AFL_LOOP(1000)) #endif diff --git a/src/scripts/ci_build.py b/src/scripts/ci_build.py index 9f6a01e4c..1244d4b47 100755 --- a/src/scripts/ci_build.py +++ b/src/scripts/ci_build.py @@ -419,6 +419,7 @@ def main(args=None): 'src/scripts/website.py', 'src/scripts/bench.py', 'src/scripts/test_python.py', + 'src/scripts/test_fuzzers.py', 'src/scripts/test_cli.py', 'src/scripts/python_unittests.py', 'src/scripts/python_unittests_unix.py'] diff --git a/src/scripts/test_fuzzers.py b/src/scripts/test_fuzzers.py index 167f6dc41..75d98abe0 100755 --- a/src/scripts/test_fuzzers.py +++ b/src/scripts/test_fuzzers.py @@ -5,7 +5,7 @@ import sys import os import subprocess -import optparse +import optparse # pylint: disable=deprecated-module import stat import multiprocessing import time @@ -37,7 +37,17 @@ def run_fuzzer(args): corpus_fd.close() return (corpus_file, fuzzer_proc.returncode, stdout.decode('ascii'), stderr.decode('ascii')) +def run_fuzzer_many_files(fuzzer_bin, corpus_files): + fuzzer_proc = subprocess.Popen([fuzzer_bin] + corpus_files, stdin=None, + stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) + (stdout, stderr) = fuzzer_proc.communicate() + return (fuzzer_proc.returncode, stdout.decode('ascii'), stderr.decode('ascii')) + def main(args=None): + #pylint: disable=too-many-branches + #pylint: disable=too-many-statements + #pylint: disable=too-many-locals + if args is None: args = sys.argv @@ -48,12 +58,19 @@ def main(args=None): parser.add_option('--gdb', action='store_true', help='Run under GDB and capture backtraces') + parser.add_option('--one-at-a-time', action='store_true', default=False, + help='Test one corpus input at a time') + (options, args) = parser.parse_args(args) if len(args) != 3: parser.print_usage() return 1 + if options.gdb and not options.one_at_a_time: + print("Option --gdb requires --one-at-a-time") + return 1 + corpus_dir = args[1] fuzzer_dir = args[2] @@ -96,46 +113,71 @@ def main(args=None): stderr_count = 0 stdout_count = 0 - gdb_commands = None + if options.one_at_a_time: + pool = multiprocessing.Pool(multiprocessing.cpu_count() * 2) + chunk_size = 32 # arbitrary + + run_fuzzer_func = run_fuzzer_gdb if options.gdb else run_fuzzer + + for fuzzer in sorted(list(fuzzers_with_corpus)): + fuzzer_bin = os.path.join(fuzzer_dir, fuzzer) + corpus_subdir = os.path.join(corpus_dir, fuzzer) + corpus_files = [os.path.join(corpus_subdir, l) for l in sorted(list(os.listdir(corpus_subdir)))] + + # We have to do this hack because multiprocessing's Pool.map doesn't support + # passing any initial arguments, just the single iteratable + map_args = [(fuzzer_bin, f) for f in corpus_files] + + start = time.time() + + for result in pool.map(run_fuzzer_func, map_args, chunk_size): + (corpus_file, retcode, stdout, stderr) = result - pool = multiprocessing.Pool(multiprocessing.cpu_count() * 2) - chunk_size = 32 # arbitrary + if retcode != 0: + print("Fuzzer %s crashed with input %s returncode %d" % (fuzzer, corpus_file, retcode)) + crash_count += 1 - run_fuzzer_func = run_fuzzer_gdb if options.gdb else run_fuzzer + if stdout: + print("Fuzzer %s produced stdout on input %s:\n%s" % (fuzzer, corpus_file, stdout)) + stdout_count += 1 - for fuzzer in sorted(list(fuzzers_with_corpus)): - fuzzer_bin = os.path.join(fuzzer_dir, fuzzer) - corpus_subdir = os.path.join(corpus_dir, fuzzer) - corpus_files = [os.path.join(corpus_subdir, l) for l in sorted(list(os.listdir(corpus_subdir)))] + if stderr: + print("Fuzzer %s produced stderr on input %s:\n%s" % (fuzzer, corpus_file, stderr)) + stderr_count += 1 - # We have to do this hack because multiprocessing's Pool.map doesn't support - # passing any initial arguments, just the single iteratable - map_args = [(fuzzer_bin, f) for f in corpus_files] + duration = time.time() - start + print("Tested fuzzer %s with %d test cases, %d crashes in %.02f seconds" % ( + fuzzer, len(corpus_files), crash_count, duration)) + sys.stdout.flush() + else: + for fuzzer in sorted(list(fuzzers_with_corpus)): + fuzzer_bin = os.path.join(fuzzer_dir, fuzzer) + corpus_subdir = os.path.join(corpus_dir, fuzzer) + corpus_files = [os.path.join(corpus_subdir, l) for l in sorted(list(os.listdir(corpus_subdir)))] - start = time.time() + start = time.time() - for result in pool.map(run_fuzzer_func, map_args, chunk_size): - (corpus_file, retcode, stdout, stderr) = result + (retcode, stdout, stderr) = run_fuzzer_many_files(fuzzer_bin, corpus_files) if retcode != 0: - print("Fuzzer %s crashed with input %s returncode %d" % (fuzzer, corpus_file, retcode)) + print("Fuzzer %s crashed returncode %d" % (fuzzer, retcode)) crash_count += 1 - if len(stdout) != 0: - print("Fuzzer %s produced stdout on input %s:\n%s" % (fuzzer, corpus_file, stdout)) + if stdout: + print("Fuzzer %s produced stdout:\n%s" % (fuzzer, stdout)) stdout_count += 1 - if len(stderr) != 0: - print("Fuzzer %s produced stderr on input %s:\n%s" % (fuzzer, corpus_file, stderr)) + if stderr: + print("Fuzzer %s produced stderr:\n%s" % (fuzzer, stderr)) stderr_count += 1 - duration = time.time() - start - print("Tested fuzzer %s with %d test cases, %d crashes in %.02f seconds" % (fuzzer, len(corpus_files), crash_count, duration)) - sys.stdout.flush() + duration = time.time() - start + print("Tested fuzzer %s with %d test cases, %d crashes in %.02f seconds" % ( + fuzzer, len(corpus_files), crash_count, duration)) if crash_count > 0 or stderr_count > 0 or stdout_count > 0: - print("Ran fuzzer tests, %d crashes %d stdout %d stderr" % (crash_count, stdout_count, stderr_count, duration)) + print("Ran fuzzer tests, %d crashes %d stdout %d stderr" % (crash_count, stdout_count, stderr_count)) return 2 return 0 |