#!/usr/bin/python3 # This file is part of the SV-Benchmarks collection of verification tasks: # https://github.com/sosy-lab/sv-benchmarks # # SPDX-FileCopyrightText: 2011-2020 The SV-Benchmarks Community # # SPDX-License-Identifier: Apache-2.0 # This script iterates over all tasks and performs the following check: # For each file with existing ".c"- and ".i"-file # we generate the goto-cc program (i.e., intermediate representation of cbmc) # and compare the goto-cc programs with goto-diff. # Finding a difference implies a wrong preprocessing. import argparse import fnmatch import glob import yaml import os import shutil import subprocess import tempfile # tasks to ignore, with reason why TASKS_TO_IGNORE = { "floats-esbmc-regression/trunc_nondet_2.i": "(platform-dependent types)", "*pthread*/*": "(platform-dependent types)", "goblint-regression/*": "(platform-dependent types)", } # categories to be excluded ... (with reason and debug information) CATEGORIES_TO_IGNORE = { "ConcurrencySafety-Main": "(platform-dependent types)", "ConcurrencySafety-NoOverflows": "(platform-dependent types)", "ConcurrencySafety-MemSafety": "(platform-dependent types)", "NoDataRace-Main": "(platform-dependent types)", "SoftwareSystems-OpenBSD-MemSafety": "(only custom includes, no system headers, complicated build process)", "SoftwareSystems-SQLite-MemSafety": "(complicated build process, requires patched version of cilly)", } # categories to be excluded, if option "skip-large" is used ... (with reason and debug information) LARGE_CATEGORIES = { "SoftwareSystems-DeviceDriversLinux64-ReachSafety": "(only custom includes, no system headers, checking takes too much time)", "SoftwareSystems-DeviceDriversLinux64-MemSafety": "(only custom includes, no system headers, checking takes too much time)", "SoftwareSystems-DeviceDriversLinux64Large-ReachSafety": "(only custom includes, no system headers, checking takes too much time)", } # no original source available, there are only preprocessed files. TASKS_ONLY_PREPROCESSED = [ 'ddv-machzwd/', 'aws-c-common/', # for LDV: there is a related .cil.c file, but it doesn't necessarily match at all 'ldv-linux-3.0/', 'ldv-regression/', 'ldv-linux-3.14-races/linux-3.14--drivers--media--platform--marvell-ccic--cafe_ccic.ko.cil-1.i', 'loops/s3.i', # this single file is special 'Juliet_Test/', # for Juliet there are c files, but each was preprocessed into two tasks, one valid and one invalid 'combinations/' ] CBMC_GIT_PATH = "../cbmc.git/" DATA_MODELS = {"ILP32": "32", "LP64": "64"} def fail(*msg): print("CRITICAL ERROR:", *msg) exit(1) def expand(s): return sorted(glob.glob(s)) def get_tasks_from_set(setFile): with open(setFile, "r") as fp: for line in fp: if not line.startswith("#"): # ignore comments for task in expand(line.strip()): yield task def build_goto_cc(): """ build goto-cc and goto-diff if not available, and then set PATH to find it """ if not shutil.which("goto-cc") or not shutil.which("goto-diff"): if not os.path.exists(CBMC_GIT_PATH + "src/goto-cc/goto-cc"): subprocess.check_call(["git", "clone", "--depth=1", "http://github.com/diffblue/cbmc.git", CBMC_GIT_PATH]) subprocess.check_call(["make", "-j2", "minisat2-download"], cwd=CBMC_GIT_PATH + "src") subprocess.check_call(["make", "-j2", "CXX=g++-5", "goto-diff.dir", "goto-cc.dir"], cwd=CBMC_GIT_PATH + "src") cwd = os.getcwd() os.environ['PATH'] = ":".join([ cwd + "/" + CBMC_GIT_PATH + "src/goto-cc", cwd + "/" + CBMC_GIT_PATH + "src/goto-diff", os.environ['PATH'] ]) def execute_goto_cc(args, bits, orig, taskfile): """ convert both preprocessed and non-preprocessed files into goto-cc intermediate language and compare them """ with tempfile.NamedTemporaryFile(prefix="compare_orig_", suffix=".out") as origoutfile, \ tempfile.NamedTemporaryFile(prefix="compare_task_", suffix=".out") as taskoutfile: origout = origoutfile.name taskout = taskoutfile.name subprocess.check_call(["goto-cc", "-m" + bits, orig, "-o", origout]) subprocess.check_call(["goto-cc", "-m" + bits, taskfile, "-o", taskout]) stdout, stderr = subprocess.Popen(["goto-diff", "--verbosity", "2", "-u", origout, taskout], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() if len(stderr) > 0: print(stderr.decode('utf-8').strip()) if len(stdout) > 0: if args.SHOW_DIFF: subprocess.call(["goto-diff", "-u", origout, taskout]) reason_to_ignore = get_reason_if_ignored(taskfile) if reason_to_ignore: print("WARNING: Difference on", taskfile, "detected (ignored)", reason_to_ignore) else: print("ERROR: Difference on", taskfile, "detected") if args.KEEP_GOING: global EC EC = 1 else: exit(1) def get_setfiles(args): for setfileWildcard in args.setfiles: setfiles = expand(setfileWildcard) if setfiles: for setfile in setfiles: yield setfile else: fail("Could not find a matching set file for", setfileWildcard) def get_inputfiles_from_yml(yml, taskfile): # check whether the input_basename exists (nobody should ever use "null" as a filename!) inputFiles = yml['input_files'] if not inputFiles: fail("No input files defined in", taskfile) elif isinstance(inputFiles, list): return inputFiles else: return [inputFiles] # always wrap as list def get_bits_from_yml(yml, taskfile): data_model = yml.get("options", {}).get("data_model") if not data_model: fail("No data model defined in", taskfile) if not data_model in DATA_MODELS: fail("Unknown data model", data_model, "defined in", taskfile) return DATA_MODELS[data_model] def get_orig_filename(taskfile): """ try to find a matching source file (.c) for a preprocessed task """ if taskfile.endswith('.c.i'): orig = taskfile[:-2] # remove ending ".i" if taskfile.startswith('ldv-memsafety/memleaks'): taskfile = taskfile.replace('memleaks', 'memleaks-notpreprocessed/memleaks') orig = taskfile[:-2] + ".c" # replace .i with .c (or .c with .c) if os.path.exists(orig): return orig else: fail("No original source of", taskfile, "found at", orig) def get_reason_if_ignored(taskfile): for pattern, reason in TASKS_TO_IGNORE.items(): if fnmatch.fnmatch(taskfile, pattern): return reason return None # parse comand line options and set default values parser = argparse.ArgumentParser() parser.add_argument("-k", "--keep-going", dest="KEEP_GOING", action="store_true", help="do not exit after error") parser.add_argument("-v", "--diff", dest="SHOW_DIFF", action="store_true", help="show the changes for the preprocessed files") parser.add_argument("--skip-large", dest="SKIP_LARGE", action="store_true", help="ignore large benchmark sets (see internal list of large categories)") parser.add_argument(dest="setfiles", type=str, nargs='*', default=["*.set"], help='set files to be analysed') parser.add_argument("-d", "--directory", dest="directories", type=str, action='append', default=None, help='directories with files to be analysed') args = parser.parse_args() build_goto_cc() EC=0 if args.directories: print("Analyzing only files from the following directories:\n " + "\n ".join(args.directories)) for directory in args.directories: if not os.path.isdir(directory): print("Directory '{}' does not exist.".format(directory)) for setfile in get_setfiles(args): setname = os.path.basename(setfile)[:-4] # remove ending ".set" # skip some sets, like LDV (too big) or Concurrency (pthread headers are very platform dependent) if setname in CATEGORIES_TO_IGNORE: print("Skipping category", setname, CATEGORIES_TO_IGNORE[setname]) continue if args.SKIP_LARGE and setname in LARGE_CATEGORIES: print("Skipping category", setname, LARGE_CATEGORIES[setname]) continue print("Processing category", setname) i = 0 for taskfile in get_tasks_from_set(setfile): if args.directories and os.path.dirname(taskfile) not in args.directories: continue if taskfile.endswith(".yml"): with open(taskfile, 'r') as yamlfile: yml = yaml.safe_load(yamlfile) inputFiles = get_inputfiles_from_yml(yml, taskfile) bits = get_bits_from_yml(yml, taskfile) if len(inputFiles) == 1: taskfile = os.path.join(os.path.dirname(taskfile), inputFiles[0]) else: print("ignoring task consisting of multiple sourcefiles", inputFiles) continue if not os.path.exists(taskfile): fail("No task file", taskfile, "found") # no original source available if taskfile.startswith(tuple(TASKS_ONLY_PREPROCESSED)): continue # try to find a matching source file for a preprocessed task orig = get_orig_filename(taskfile) if orig == taskfile: continue # taskfile was already a .c-file i += 1 if i % 10 == 0: print("Processing file", i, "of category", setname) # now we have found all required files and start with the actual check: execute_goto_cc(args, bits, orig, taskfile) exit(EC)