#!/usr/bin/env python3 # This file is part of the SV-Benchmarks collection of verification tasks: # https://github.com/sosy-lab/sv-benchmarks # # SPDX-FileCopyrightText: 2011-2020 The SV-Benchmarks Community # # SPDX-License-Identifier: Apache-2.0 import argparse import collections import fnmatch import functools import glob import hashlib import logging import multiprocessing import os import re import sys from typing import List, Tuple try: import yaml except ImportError: yaml = None LOG_FORMAT = "%(levelname)-7s %(message)s" try: import coloredlogs coloredlogs.install(fmt=LOG_FORMAT, level='INFO') except ImportError: logging.basicConfig(format=LOG_FORMAT, level='INFO') README_PATTERN = re.compile('^readme(\.(txt|md))?$', re.I) BENCHMARK_PATTERN = re.compile('^.*\.yml$') EXPECTED_FILE_PATTERN = re.compile( '^(.*\.(c|h|i|yml)|(readme|license([-.].*)?|.*\.error_trace)(\.(txt|md))?|Makefile|.gitignore)$', re.I) CONFIG_KEYS = set(["Architecture", "Description"]) PROPERTIES = set(["def-behavior", "no-overflow", "no-data-race", "termination", "unreach-call", "valid-deref", "valid-free", "valid-memcleanup", "valid-memsafety", "valid-memtrack", "coverage-error-call", "coverage-branches", "coverage-conditions", "coverage-statements", "unreach-call-a", "unreach-call-b"]) # multiple properties for eca-rers2018-files for i in range(100): PROPERTIES.add("unreach-call-%d" % i) # properties of ldv-multiproperty PROPERTIES |= { "unreach-call-alloc_irq", "unreach-call-alloc_spinlock", "unreach-call-alloc_usb_lock", "unreach-call-arch_io", "unreach-call-block_genhd", "unreach-call-block_queue", "unreach-call-block_request", "unreach-call-drivers_base_class", "unreach-call-fs_char_dev", "unreach-call-fs_sysfs", "unreach-call-kernel_locking_mutex", "unreach-call-kernel_locking_rwlock", "unreach-call-kernel_locking_spinlock", "unreach-call-kernel_module", "unreach-call-kernel_rcu_srcu", "unreach-call-kernel_rcu_update_lock", "unreach-call-kernel_rcu_update_lock_bh", "unreach-call-kernel_rcu_update_lock_sched", "unreach-call-kernel_sched_completion", "unreach-call-lib_find_bit", "unreach-call-lib_idr", "unreach-call-mmc_sdio_func", "unreach-call-net_register", "unreach-call-net_rtnetlink", "unreach-call-net_sock", "unreach-call-usb_coherent", "unreach-call-usb_dev", "unreach-call-usb_gadget", "unreach-call-usb_register", "unreach-call-usb_urb", } DATA_MODELS = {"ILP32": 32, "LP64": 64} # Ignore regression # as long as no yml-task definitions exist for the tasks in these directories IGNORED_DIRECTORIES = set(["properties", "regression"]) """Directories which are completely ignored by this script""" UNUSED_DIRECTORIES = set(["ldv-multiproperty", "regression"]) """Directories which expected to contain tasks that are not included in any category""" EXPECTED_SUBDIRECTORIES = set(["model", "todo", "properties", "original"]) """Directories that can appear inside directories with tasks but contain other files""" LINE_DIRECTIVE = re.compile('^#(line| [0-9]+) ') PREPROCESSOR_DIRECTIVE = re.compile('^ *#(define|include)') KNOWN_DIRECTORY_PROBLEMS = [ # TODO Please fix ("ldv-linux-4.2-rc1", "missing readme"), ("ldv-memsafety", "missing readme"), ("ldv-races", "missing readme"), ("ldv-validator-v0.8", "missing readme"), ("Juliet_Test", "unexpected subdirectory Juliet_Test_Suite_v1.3_for_C_Cpp"), ("termination-memory-alloca", "BradleyMannaSipma-2005CAV-Fig1-alloca_unknown-termination.c has no known verdict"), ("termination-memory-alloca", "BradleyMannaSipma-2005CAV-Fig1-alloca_unknown-termination.c.i has no known verdict"), ("termination-memory-alloca", "BradleyMannaSipma-2005CAV-Fig1-modified-alloca_unknown-termination.c has no known verdict"), ("termination-memory-alloca", "BradleyMannaSipma-2005CAV-Fig1-modified-alloca_unknown-termination.c.i has no known verdict"), ("termination-memory-alloca", "BradleyMannaSipma-2005ICALP-Fig1-alloca_unknown-termination.c has no known verdict"), ("termination-memory-alloca", "BradleyMannaSipma-2005ICALP-Fig1-alloca_unknown-termination.c.i has no known verdict"), ("termination-memory-alloca", "HarrisLalNoriRajamani-2010SAS-Fig2-alloca_unknown-termination.c has no known verdict"), ("termination-memory-alloca", "HarrisLalNoriRajamani-2010SAS-Fig2-alloca_unknown-termination.c.i has no known verdict"), ("termination-memory-alloca", "LarrazOliverasRodriguez-CarbonellRubio-2013FMCAD-Fig1-alloca_unknown-termination.c has no known verdict"), ("termination-memory-alloca", "LarrazOliverasRodriguez-CarbonellRubio-2013FMCAD-Fig1-alloca_unknown-termination.c.i has no known verdict"), ("ldv-memsafety", "unexpected subdirectory memleaks-notpreprocessed"), ("eca-rers2018", "unexpected file RERS_18_solutions_dot_petri.csv"), ("eca-rers2018", "unexpected file createYml.py"), ("nla-digbench-scaling", "unexpected file generate.py"), ("combinations", "unexpected file generate-tasks.py"), # historical ("ntdrivers", "missing readme"), ("ntdrivers-simplified", "missing readme"), ("openbsd-6.2", "unexpected subdirectory sources"), ("openbsd-6.2", "unexpected file prepreprocess.py"), ("aws-c-common", "unexpected file patch.diff"), ("aws-c-common", "unexpected file s_buf_belongs_to_pool-fix.diff"), ("aws-c-common", "unexpected file s_swap-fix.diff"), ("aws-c-common", "unexpected file s_remove_node-fix.diff"), ("aws-c-common", "unexpected file aws_byte_cursor_read-fix.diff"), ("aws-c-common", "unexpected file overflow-fix-1.diff"), ("aws-c-common", "unexpected file overflow-fix-2.diff"), ("aws-c-common", "unexpected file makeall"), ("aws-c-common", "unexpected file Makefile.sv-benchmarks"), ("aws-c-common", "unexpected file yml.sh"), ("aws-c-common", "unexpected file negate_assertion.sh"), ("xcsp", "unexpected file create_from_xmls.py"), ("xcsp", "unexpected file xcsp3_cpp_parser"), ("xcsp", "unexpected file xcsp3_cpp_parser.license"), ] KNOWN_BENCHMARK_FILE_PROBLEMS = [ ("forester-heap/dll-rb-cnstr_1-2.yml", "has expected undefined behavior but also a verdict for some other property"), ("forester-heap/sll-01-2.yml", "has expected undefined behavior but also a verdict for some other property"), ("forester-heap/sll-rb-cnstr_1-2.yml", "has expected undefined behavior but also a verdict for some other property"), ("heap-manipulation/tree-2.yml", "has expected undefined behavior but also a verdict for some other property"), ("list-ext-properties/list-ext.yml", "has expected undefined behavior but also a verdict for some other property"), ("list-ext-properties/list-ext_flag.yml", "has expected undefined behavior but also a verdict for some other property"), ("termination-crafted/NonTermination3-1.yml", "has expected undefined behavior but also a verdict for some other property"), ("termination-numeric/Binomial.yml", "has expected undefined behavior but also a verdict for some other property"), ("termination-numeric/TerminatorRec02.yml", "has expected undefined behavior but also a verdict for some other property"), ("termination-memory-alloca/Avery-2006FLOPS-Tabel1_true-alloca.yml", "has unknown property alloca"), ("termination-memory-alloca/aviad_true-alloca.yml", "has unknown property alloca"), ] KNOWN_SET_PROBLEMS = [ ] KNOWN_GLOBAL_PROBLEMS = [ ] class CheckFailed(Exception): pass class Checks(object): """Collections of checks that should be implemented in methods named "check*" in subclasses.""" def __init__(self, name, known_problems, quiet=False): super(Checks, self).__init__() self.name = name self._known_problems = known_problems self._quiet = quiet self._errors = False self._warnings = False def run(self): """Run all checks of this instance. Raise an exception if any check fails.""" attrs = [getattr(self, a) for a in dir(self)] tests = [a for a in attrs if callable(a) and a.__name__.startswith('check')] for test in tests: try: test() except CheckFailed: self._errors = True if not self._quiet and not (self._errors or self._warnings) and self.name: logging.info("%s: OK", self.name) if self._errors: raise CheckFailed() def error(self, msg, *args): """Mark the current check as failed.""" msg = msg % args error_id = (self.name, msg) if self.name else msg if error_id in self._known_problems: self._warnings = True log = logging.warning else: self._errors = True log = logging.error if self.name: log("%s: %s", self.name, msg) else: log("%s", msg) class DirectoryChecks(Checks): """Checks for a directory in the repository, e.g. about what files are in it. Also executes specific checks for each benchmark file in the directory.""" def __init__(self, path, all_patterns, *args, **kwargs): super(DirectoryChecks, self).__init__(known_problems=KNOWN_DIRECTORY_PROBLEMS, *args, **kwargs) self.path = path self.content = os.listdir(path) self.all_patterns = all_patterns def run(self): ok = True try: super(DirectoryChecks, self).run() except CheckFailed: ok = False for entry in self.content: if BENCHMARK_PATTERN.match(entry): dir_and_name = os.path.join(self.name, entry) try: TaskDefinitionFileChecks( path=os.path.join(self.path, entry), name=dir_and_name, contained_in_category=self.all_patterns.match(dir_and_name) ).run() except CheckFailed: ok = False if not ok: raise CheckFailed() def check_has_benchmarks(self): for entry in self.content: if BENCHMARK_PATTERN.match(entry): return self.error("contains no benchmark files") def check_has_subdirectories(self): for entry in self.content: if os.path.isdir(os.path.join(self.path, entry)) and not entry in EXPECTED_SUBDIRECTORIES: self.error("unexpected subdirectory %s", entry) def check_has_unexpected_file(self): for entry in self.content: if (not EXPECTED_FILE_PATTERN.match(entry) and not os.path.isdir(os.path.join(self.path, entry))): self.error("unexpected file %s", entry) def check_has_readme(self): for entry in self.content: if README_PATTERN.match(entry): return self.error("missing readme") def check_has_Makefile(self): if not 'Makefile' in self.content: self.error("missing Makefile") def check_files_contained_in_category(self): if self.name in UNUSED_DIRECTORIES: return for entry in self.content: if BENCHMARK_PATTERN.match(entry)\ and not self.all_patterns.match(os.path.join(self.name, entry)): self.error("%s is not contained in any category", entry) class FileChecks(Checks): """Checks about the content of a file.""" def __init__(self, path, *args, **kwargs): super(FileChecks, self).__init__(known_problems=KNOWN_SET_PROBLEMS, quiet=True, *args, **kwargs) with open(path, 'r') as f: self.lines = f.readlines() def check_file_has_no_windows_line_ending(self): if any('\r' in line for line in self.lines): self.error("Windows line endings") def check_file_has_no_VERIFIER_ASSUME_call(self): if any('__VERIFIER_assume' in line for line in self.lines): self.error( "Deprecated call '__VERIFIER_assume' found. " "Please replace with: if(!COND) { abort(); }" ) def check_file_has_no_VERIFIER_ERROR_call(self): if any('__VERIFIER_error' in line for line in self.lines): self.error( "Deprecated call '__VERIFIER_error' found. " "Please replace it by applying the following three steps:\n" "\n" "1. Insert the next two lines to the top of your c-file\n" " #include \n" " void reach_error() { assert(0); }\n" "\n" "2. Replace any '__VERIFIER_error()' by 'reach_error()'\n" "3. Create a preprocessed file (.i)-file and refer to that in the " "corresponding .yml file\n" ) class TaskDefinitionFileChecks(FileChecks): """Checks about the content of a single task definition .yml file.""" def __init__(self, path, contained_in_category, *args, **kwargs): super(TaskDefinitionFileChecks, self).__init__(path, *args, **kwargs) self.path = path self.directory = os.path.dirname(path) self.filename = os.path.basename(self.name) self.contained_in_category = contained_in_category if yaml: with open(self.path) as f: self.content = yaml.safe_load(f) def check_format_version(self): if not 'format_version' in self.content: self.error("has no format_version") elif not (self.content['format_version'] and type(self.content['format_version']) is str): self.error("has invalid format version") def check_input_files(self): if not self.content: return None input_files = self._get_input_files() properties_and_verdicts = self._get_properties() ok = True for f in input_files: f_path = os.path.join(self.directory, f) if not os.path.exists(f_path): self.error("references inaccessible file: " + f_path) else: try: InputFileChecks( path=f_path, name=f_path, contained_in_category=self.contained_in_category, properties_and_verdicts=properties_and_verdicts, ).run() except CheckFailed: ok = False if not ok: raise CheckFailed() def check_properties(self): prop_and_verdict = self._get_properties() PropertiesChecks( properties=prop_and_verdict, contained_in_category=self.contained_in_category, name=self.name, ).run() def check_language(self): if not self.content: return language = self.content.get("options", {}).get("language") if language != "C": self.error("unexpected language %s", language) def check_data_model(self): if not self.content: return data_model = self.content.get("options", {}).get("data_model") if not data_model: self.error("missing declaration of data_model") return if not data_model in DATA_MODELS: self.error("unknown data_model %s", data_model) return makefile_path = os.path.join(self.directory, "Makefile") if os.path.exists(makefile_path): archs = self._get_architecture_from_makefile(makefile_path) if len(archs) > 1: self.error("multiple architecture declarations in %s", makefile_path) arch = int(next(iter(archs), "32")) if DATA_MODELS.get(data_model) != arch: self.error( "Makefile of directory %s declares %d bit, but task has data model %s", self.directory, arch, data_model, ) def _get_input_files(self) -> list: if 'input_files' not in self.content: self.error("has no input file definition") return [] input_files = self.content['input_files'] if not input_files: self.error("has no input file definition") return [] if type(input_files) is not list: input_files = [input_files] return input_files def _get_properties(self) -> List[Tuple[str, str]]: """Return list of tuples (property, verdict) present in the task definition.""" if not "properties" in self.content or not self.content["properties"]: self.error("No properties") # Return empty dict instead of None so that calling check stops gracefully return dict() props = [] for prop_def in self.content['properties']: if type(prop_def) is not dict\ or 'property_file' not in prop_def: self.error("invalid property definition: %s" % prop_def) else: props.append(prop_def) prop_and_verdict = list() for prop_def in props: # Strip directories and ".prp" suffix prop = os.path.basename(prop_def['property_file']) if prop.endswith('.prp'): prop = prop[:-4] if 'expected_verdict' in prop_def: verdict = prop_def['expected_verdict'] else: verdict = None if "subproperty" in prop_def: prop = prop_def["subproperty"] prop_and_verdict.append((prop, verdict)) return prop_and_verdict @classmethod @functools.lru_cache() # avoid opening Makefile for each task def _get_architecture_from_makefile(cls, makefile_path): with open(makefile_path) as makefile: archs = [line.split(" ")[-1] for line in makefile if "CC.Arch" in line] return archs class PropertiesChecks(Checks): """Checks about the properties of task definitions.""" def __init__(self, properties, contained_in_category, *args, **kwargs): super(PropertiesChecks, self).__init__( known_problems=KNOWN_BENCHMARK_FILE_PROBLEMS, quiet=True, *args, **kwargs ) self.prop_and_verdict = properties self.prop_names = [prop for prop, verdict in properties] def check_no_unknown_property(self): [self.error("has unknown property " + p) for p in self.prop_names if p not in PROPERTIES] def check_no_duplicate_properties(self): counts = collections.Counter((prop for prop, verdict in self.prop_and_verdict)) [self.error("has duplicate property " + p) for p, c in counts.items() if c > 1] def check_no_multiple_memsafety_verdicts(self): distinct_prop_count = self.prop_names.count("valid-deref")\ + self.prop_names.count("valid-free")\ + self.prop_names.count("valid-memtrack")\ + self.prop_names.count("valid-memsafety") if distinct_prop_count > 1: self.error("has verdicts for multiple memsafety properties") def check_no_contradicting_verdicts(self): # Properties may also have no verdict (None), i.e., (not violates) != fulfills. Thus we need both methods prop_to_verdict = dict(self.prop_and_verdict) def has_prop(prop): return prop in prop_to_verdict def violates(prop): return prop in prop_to_verdict and prop_to_verdict[prop] is False def fulfills(prop): return prop in prop_to_verdict and prop_to_verdict[prop] is True if ( violates("valid-deref") or violates("valid-free") or violates("no-overflow") or violates("no-data-race") or violates("def-behavior") ): if ( any(fulfills(p) for p in prop_to_verdict) or len([p for p in prop_to_verdict if violates(p)]) > 1 ): self.error( "has expected undefined behavior but also a verdict for some other property") if violates("unreach-call") and fulfills("valid-memcleanup"): # calling the error function aborts the program, and if there is still any # allocated memory this would violate memcleanup. # We think this is probable (though not guaranteed), so we issue a warning. self.error("has reachable error location but claims to have no memory leaks (this is not necessarily wrong but should be checked)") if has_prop("coverage-error-call") and not violates("unreach-call"): self.error("claims that coverage-error-call is possible but has no reachable error location") if violates("termination") and has_prop("coverage-branches"): self.error("does not terminate but claims to have coverage-branches") def check_no_invalid_verdicts(self): for prop, verdict in self.prop_and_verdict: if prop.startswith("coverage-") and verdict is not None: self.error("has verdict for property " + prop) class InputFileChecks(FileChecks): """Checks about the contents of a single benchmark input file.""" def __init__( self, path, contained_in_category, properties_and_verdicts, *args, **kwargs ): super(InputFileChecks, self).__init__(path, *args, **kwargs) self.path = path self.filename = os.path.basename(self.path) self.contained_in_category = contained_in_category self.prop_and_verdict = properties_and_verdicts def check_file_has_no_line_directive(self): if any(LINE_DIRECTIVE.match(line) for line in self.lines): self.error( "line directives from preprocessor present, " "please use 'cpp -P' for preprocessing") def check_unreach_call_tasks_have_verifier_error(self): if not "unreach-call" in dict(self.prop_and_verdict): return if not self.contained_in_category: # Some such files have calls to reach_error inside #include return if not any("reach_error" in line for line in self.lines if not "void reach_error" in line): self.error("has property unreach-call, but does not call reach_error") def check_no_include_or_define(self): if not self.contained_in_category: return if any(PREPROCESSOR_DIRECTIVE.match(line) for line in self.lines): self.error("#define or #include statement present, please add preprocessed version") class SetFileChecks(Checks): """Checks about the .set files that define categories.""" def __init__(self, path, *args, **kwargs): super(SetFileChecks, self).__init__(known_problems=KNOWN_SET_PROBLEMS, *args, **kwargs) self.path = path self.base_path = os.path.dirname(path) self.category = os.path.basename(path) if self.category.endswith(".set"): self.category = self.category[:-4] self.patterns = list(read_set_file(path)) self.matched_files = [file for pattern in self.patterns for file in glob.iglob(os.path.join(self.base_path, pattern))] def check_all_patterns_match_files(self): for pattern in self.patterns: first_match = next(glob.iglob(os.path.join(self.base_path, pattern)), None) if not first_match: self.error("Pattern <%s> does not match anything.", pattern) def check_patterns_match_only_expected_files(self): unexpected_files = [ file for file in self.matched_files if not BENCHMARK_PATTERN.match(os.path.basename(file))] if unexpected_files: self.error("includes files %s that do have unexpected file names", unexpected_files) def read_set_file(path): with open(path) as f: for line in f: line = line.strip() if line and not line[0] == '#': yield line class GlobalChecks(Checks): def __init__(self, all_matched_files, base_path, *args, **kwargs): super(GlobalChecks, self).__init__( name=None, known_problems=KNOWN_GLOBAL_PROBLEMS, *args, **kwargs) self.all_matched_files = all_matched_files self.base_path = base_path def check_no_duplicate_filenames(self): all_filenames = collections.defaultdict(list) for file in self.all_matched_files: all_filenames[os.path.basename(file).lower()].append(file) for filename, files in all_filenames.items(): if len(files) > 1: self.error( "files %s have names that are easy to confuse", ", ".join(os.path.relpath(file, self.base_path) for file in files)) def check_no_duplicate_files(self): sizes = collections.defaultdict(list) for filename in self.all_matched_files: sizes[os.path.getsize(filename)].append(filename) for file_candidates in sizes.values(): if len(file_candidates) > 1: hashes = collections.defaultdict(list) for filename in file_candidates: hashes[hash_file(filename)].append(filename) for files in hashes.values(): if len(files) > 1: self.error( "files %s have the same content", ", ".join(os.path.relpath(file, self.base_path) for file in sorted(files))) def hash_file(filename, hash_alg=hashlib.sha1, block_size_factor=100000): with open(filename, 'rb') as f: hasher = hash_alg() block_size = hasher.block_size * block_size_factor while True: block = f.read(block_size) if not block: break hasher.update(block) return hasher.hexdigest() def _check_known_errors_consistent(main_dir): for i, _ in KNOWN_SET_PROBLEMS + KNOWN_DIRECTORY_PROBLEMS + KNOWN_BENCHMARK_FILE_PROBLEMS: path = os.path.join(main_dir, i) assert os.path.exists(path), "Whitelisted file doesn't exist: %s" % path def _run_directory_checks(directory, all_patterns, entry): try: DirectoryChecks(directory, all_patterns, entry).run() except CheckFailed: return False, set() else: return True, set() def _run_set_file_checks(set_file, all_patterns, entry): try: check = SetFileChecks(set_file, entry) check.run() except CheckFailed: return False, check.matched_files else: return True, check.matched_files def _check_benchmark_entry(entry, main_directory, all_patterns): path = os.path.join(main_directory, entry) if not (entry[0] == "." or entry == "bin" or entry.endswith("-todo")): if os.path.isdir(path) and not entry in IGNORED_DIRECTORIES: return _run_directory_checks(path, all_patterns, entry) elif entry.endswith(".set"): return _run_set_file_checks(path, all_patterns, entry) logging.debug("%s: skipped", entry) return True, set() def main(num_processes): if not yaml: logging.warning("Missing python-yaml, not all checks can be executed") main_directory = os.path.relpath(os.path.dirname(__file__) or '.') _check_known_errors_consistent(main_directory) entries = sorted(os.listdir(main_directory)) all_patterns_re = ( fnmatch.translate(pattern) for entry in entries if entry.endswith(".set") for pattern in read_set_file(os.path.join(main_directory, entry))) all_patterns = re.compile("^(" + "|".join(all_patterns_re) + ")$") check_func = functools.partial( _check_benchmark_entry, main_directory=main_directory, all_patterns=all_patterns ) with multiprocessing.Pool(num_processes) as p: check_results, matched_file_sets = zip(*p.map(check_func, entries)) ok = all(check_results) all_matched_files = set(f for f_set in matched_file_sets for f in f_set) try: GlobalChecks(all_matched_files, main_directory).run() except CheckFailed: ok = False if not ok: sys.exit(1) sys.exit(0) def _parse_arguments(args): parser = argparse.ArgumentParser() parser.add_argument( "--num-processes", type=int, default=4, help="Number of concurrent processes to use", ) return parser.parse_args(args) if __name__ == "__main__": args = _parse_arguments(sys.argv[1:]) main(num_processes=args.num_processes)