diff --git a/BUILD b/BUILD index 4300013..ad67678 100644 --- a/BUILD +++ b/BUILD @@ -1,4 +1,4 @@ -load("@bazel_python//:bazel_python.bzl", "bazel_python_interpreter") +load("@bazel_python//:bazel_python.bzl", "bazel_python_coverage_report", "bazel_python_interpreter") bazel_python_interpreter( name = "bazel_python_venv", @@ -36,21 +36,10 @@ genrule( visibility = ["//:__subpackages__"], ) -# For generating the coverage report. -sh_binary( +bazel_python_coverage_report( name = "coverage_report", - srcs = ["coverage_report.sh"], - deps = [":_dummy_coverage_report"], -) - -# This is only to get bazel_python_venv as a data dependency for -# coverage_report above. For some reason, this doesn't work if we directly put -# it on the sh_binary. This is a known issue: -# https://github.com/bazelbuild/bazel/issues/1147#issuecomment-428698802 -sh_library( - name = "_dummy_coverage_report", - srcs = ["coverage_report.sh"], - data = ["//:bazel_python_venv"], + code_paths = ["pysyrenn/*/*.py"], + test_paths = ["pysyrenn/*/tests/*"], ) # For wheel-ifying the Python code. diff --git a/Makefile b/Makefile index 2c65029..803bdb1 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ .PHONY: pysyrenn_coverage start_server acas_lines_experiment \ integrated_gradients_experiment linearity_hypothesis_experiment \ exactline_experiments toy_examples_experiment acas_planes_experiment \ - model_checking_experiment netpatch_experiment plane_experiments \ + model_checking_experiment plane_experiments \ all_experiments pysyrenn_coverage: @@ -40,12 +40,8 @@ model_checking_experiment: @echo "Running the Bounded Model Checking experiment" bazel run experiments:model_checking -netpatch_experiment: - @echo "Running the Network Patching experiment" - bazel run experiments:netpatch - plane_experiments: toy_examples_experiment acas_planes_experiment \ - model_checking_experiment netpatch_experiment + model_checking_experiment # Run experiments from [1] and [2] all_experiments: exactline_experiments plane_experiments diff --git a/README.md b/README.md index e073a12..09f4be5 100644 --- a/README.md +++ b/README.md @@ -6,63 +6,36 @@ one- and two-dimensional input "restriction domains of interest." In particular, this repository contains the code described and utilized in the papers: -**"Computing Linear Restrictions of Neural Networks" ([1])** - -[Conference on Neural Information Processing Systems (NeurIPS) -2019](https://neurips.cc/Conferences/2019) - -Links: -[Paper](https://papers.nips.cc/paper/9562-computing-linear-restrictions-of-neural-networks), -[Slides](https://zenodo.org/record/3520104), -[Poster](https://zenodo.org/record/3520102) -``` -@incollection{sotoudeh:linear_restrictions, - title = {Computing Linear Restrictions of Neural Networks}, - author = {Sotoudeh, Matthew and Thakur, Aditya V}, - booktitle = {Advances in Neural Information Processing Systems 32}, - editor = {H. Wallach and H. Larochelle and A. Beygelzimer and F. d\textquotesingle Alch\'{e}-Buc and E. Fox and R. Garnett}, - pages = {14132--14143}, - year = {2019}, - publisher = {Curran Associates, Inc.}, - url = {http://papers.nips.cc/paper/9562-computing-linear-restrictions-of-neural-networks.pdf} -} -``` - -**"A Symbolic Neural Network Representation and its Application to -Understanding, Verifying, and Patching Networks" ([2])** - -Links: [Preprint](https://arxiv.org/abs/1908.06223) -``` -@article{sotoudeh:symbolic_networks, - author = {Matthew Sotoudeh and Aditya V. Thakur}, - title = {A Symbolic Neural Network Representation and its Application to - Understanding, Verifying, and Patching Networks}, - journal = {CoRR}, - volume = {abs/1908.06223}, - year = {2019}, - url = {https://arxiv.org/abs/1908.06223}, - archivePrefix = {arXiv}, - eprint = {1908.06223}, -} -``` - -**"Correcting Deep Neural Networks with Small, Generalizing Patches ([3])** - -[NeurIPS 2019 Workshop on Safety and Robustness in Decision -Making](https://sites.google.com/view/neurips19-safe-robust-workshop) - -Links: [Paper](https://drive.google.com/file/d/0B3mY6u_lryzdNTFaZnkzUzhuRDNnZG9rdDV5aDA2WFpBOWhN/view) -``` -@inproceedings{sotoudeh:correcting_dnns_srdm19, - title={Correcting Deep Neural Networks with Small, Generalizing Patches}, - author={Sotoudeh, Matthew and Thakur, Aditya V}, - booktitle={NeurIPS 2019 Workshop on Safety and Robustness in Decision Making}, - year={2019} -} -``` - -We will refer to these as ``[1]``, ``[2]``, and ``[3]`` respectively in -comments and code. +1. **Computing Linear Restrictions of Neural Networks**, Conference on Neural + Information Processing Systems (NeurIPS) 2019 + + Links: + [Paper](https://papers.nips.cc/paper/9562-computing-linear-restrictions-of-neural-networks), + [Slides](https://zenodo.org/record/3520104), + [Poster](https://zenodo.org/record/3520102), + [BibTeX](http://thakur.cs.ucdavis.edu/bibliography/NeurIPS2019.html) +2. **A Symbolic Neural Network Representation and its Application to + Understanding, Verifying, and Patching Networks** + + Links: [arXiv](https://arxiv.org/abs/1908.06223) +3. **Correcting Deep Neural Networks with Small, Generalizing Patches**, + NeurIPS 2019 Workshop on Safety and Robustness in Decision Making + + Links: + [Paper](https://drive.google.com/file/d/0B3mY6u_lryzdNTFaZnkzUzhuRDNnZG9rdDV5aDA2WFpBOWhN/view), + [BibTeX](http://thakur.cs.ucdavis.edu/bibliography/SRDM2019.html) +4. **SyReNN: A Tool for Analyzing Deep Neural Networks**, Tools and Algorithms + for the Construction and Analysis of Systems (TACAS), 2021 + + Links: + [Paper](http://dx.doi.org/10.1007/978-3-030-72013-1_15), + [BibTeX](http://thakur.cs.ucdavis.edu/bibliography/TACAS2021.html) +5. **Provable Repair of Deep Neural Networks**, Programming Language Design and + Implementation (PLDI), 2021 + + Links: + [Preprint](https://arxiv.org/abs/2104.04413), + [BibTeX](http://thakur.cs.ucdavis.edu/bibliography/PLDI2021.html) ## Notation and Naming In the papers, we often use mathematical notation that can be hard/impossible @@ -135,7 +108,8 @@ issues with Bazel and generated file permissions. **NOTE:** Docker-in-docker does not seem to currently be working. This means some builds will fail, although you should be able to run `make start_server`, -`bazel test //pysyrenn/...`, and `bazel test //syrenn_server/...`. +`bazel test //pysyrenn/...`, and `bazel test //syrenn_server/...`. See [this +issue](https://github.com/95616ARG/SyReNN/issues/7) for a suggested fix. ### Configuration There are two major things to configure before using the code: diff --git a/WORKSPACE b/WORKSPACE index 09c71ae..e48cbef 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -5,7 +5,7 @@ load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") git_repository( name = "bazel_python", - commit = "5edcf3b97c4c77b654af177bfa27558d9b88b52f", + commit = "f99ab8738dced7257c97dc719457f50a601ed84c", remote = "https://github.com/95616ARG/bazel_python.git", ) diff --git a/coverage_report.sh b/coverage_report.sh deleted file mode 100755 index 87bdf6c..0000000 --- a/coverage_report.sh +++ /dev/null @@ -1,29 +0,0 @@ -COVTEMP=$PWD/coverage_tmp -rm -rf $COVTEMP -mkdir $COVTEMP - -source bazel_python_venv_installed/bin/activate - -# Go to the main workspace directory and run the coverage-report. -pushd $BUILD_WORKSPACE_DIRECTORY - -# We find all .cov files, which should be generated by tests/helpers.py -cov_zips=$(ls bazel-out/*/testlogs/pysyrenn/*/tests/*/test.outputs/outputs.zip) -i=1 -for cov_zip in $cov_zips -do - echo $cov_zip - unzip -p $cov_zip coverage.cov > $COVTEMP/$i.cov - i=$((i+1)) -done - -# Remove old files -rm -rf .coverage htmlcov - -# Then we build a new .coverage as well as export to HTML -python3 -m coverage combine $COVTEMP/*.cov -python3 -m coverage html pysyrenn/*/*.py - -# Remove temporaries and go back to where Bazel started us. -rm -r $COVTEMP -popd diff --git a/experiments/BUILD b/experiments/BUILD index beef753..3310228 100644 --- a/experiments/BUILD +++ b/experiments/BUILD @@ -90,16 +90,6 @@ py_binary( ], ) -py_binary( - name = "netpatch", - srcs = ["netpatch.py"], - deps = [ - ":acas_planes", - ":polar_image", - "//pysyrenn", - ], -) - py_binary( name = "toy_examples", srcs = ["toy_examples.py"], diff --git a/experiments/experiment.py b/experiments/experiment.py index 1448dd4..4004f2e 100644 --- a/experiments/experiment.py +++ b/experiments/experiment.py @@ -10,7 +10,7 @@ import numpy as np from PIL import Image import imageio -from pysyrenn import Network, MaskingNetwork +from pysyrenn import Network from syrenn_proto import syrenn_pb2 as syrenn_pb class Experiment: @@ -318,7 +318,7 @@ def write_pb(path, pb_serialized): elif artifact_type == "matplotlib": filename += ".pdf" artifact.savefig(filename) - elif artifact_type in ("network", "masking_network"): + elif artifact_type in "network": filename += ".pb" write_pb(filename, artifact.serialize()) elif artifact_type == "svg": @@ -378,9 +378,6 @@ def read_pb(path, pb_type): if artifact["type"] == "network": return Network.deserialize( read_pb(artifact["path"], syrenn_pb.Network)) - if artifact["type"] == "masking_network": - return MaskingNetwork.deserialize( - read_pb(artifact["path"], syrenn_pb.MaskingNetwork)) raise NotImplementedError @staticmethod diff --git a/experiments/model_checking.py b/experiments/model_checking.py index 18b4bc8..6055db4 100644 --- a/experiments/model_checking.py +++ b/experiments/model_checking.py @@ -1,5 +1,6 @@ """Methods for performing bounded model checking on neural net controllers. """ +from collections import defaultdict import itertools from timeit import default_timer as timer import numpy as np @@ -97,7 +98,7 @@ def transition_plane(cls, model, network, plane, transition_partitions): """ A_ub, b_ub = cls.facet_enumeration(plane) resulting_planes = [] - for pre_plane in transition_partitions: + for pre_plane in transition_partitions.possibly_intersecting(plane): pre_intersection = cls.compute_intersection(pre_plane, plane) if pre_intersection: actions = network.compute(pre_intersection) @@ -121,13 +122,15 @@ def in_h_rep(plane, faces): value = product.T + faces[:, -1] # (n_points x n_constraints) return np.all(value <= 0) - def run_for_model(self, model_name, timeout_minutes): + def run_for_model(self, model_name, timeout_minutes, + eval_network=None, write_name=None): """Runs the BMC for a particular model. """ network = self.load_network("vrl_%s" % model_name) + eval_network = eval_network or network model = VRLModel(model_name) - out_file = self.begin_csv("%s/data" % model_name, + out_file = self.begin_csv(write_name or "%s/data" % model_name, ["Step", "Cumulative Time", "Counter Example?"]) safe = model.safe_set(as_box=False) @@ -140,9 +143,9 @@ def run_for_model(self, model_name, timeout_minutes): safe_transformed = network.transform_planes(disjunctive_safe, compute_preimages=True, include_post=False) - safe_transitions = [] - for transformed_plane in safe_transformed: - safe_transitions.extend(transformed_plane) + safe_transitions = TransformerLUT([ + vpolytope for upolytope in safe_transformed + for vpolytope in upolytope]) time_taken += (timer() - start_time) planes = [model.init_set(as_vertices=True)] @@ -153,12 +156,13 @@ def run_for_model(self, model_name, timeout_minutes): new_planes = [] for plane in planes: new_planes.extend( - self.transition_plane(model, network, plane, + self.transition_plane(model, eval_network, plane, safe_transitions)) new_planes = list(map(np.array, new_planes)) print("Before removing in init_x/y:", len(new_planes)) - new_planes = [plane for plane in new_planes + new_planes = [np.array(plane) for plane in new_planes if not self.in_h_rep(plane, init)] + print("After removing:", len(new_planes)) found_bad_state = False for plane in new_planes: if not self.in_h_rep(plane, safe): @@ -194,5 +198,70 @@ def analyze(self): """ return False +class TransformerLUT: + """Stores a set of polytopes. + + Optimized to quickly over-approximate the set of polytopes which may + intersect with a given polytope. + """ + def __init__(self, polytopes=None): + """Initializes the TransformerLUT. + """ + polytopes = polytopes or [] + self.polytope_lut = defaultdict(set) + self.all_polytopes = set() + if polytopes: + self.initialize_stats(polytopes) + else: + self.grid_lb = np.array([-1., -1.]) + self.grid_ub = np.array([1., 1.]) + self.grid_delta = np.array([1., 1.]) + for polytope in polytopes: + self.register_polytope(polytope) + + def initialize_stats(self, polytopes): + """Sets the partitioning used by the TransformerLUT hash table. + """ + self.grid_lb = np.array([np.inf, np.inf]) + self.grid_ub = np.array([-np.inf, -np.inf]) + deltas = [] + self.grid_delta = np.array([np.inf, np.inf]) + for polytope in polytopes: + box_lb = np.min(polytope, axis=0) + box_ub = np.max(polytope, axis=0) + self.grid_lb = np.minimum(self.grid_lb, box_lb) + self.grid_ub = np.maximum(self.grid_ub, box_ub) + deltas.append(box_ub - box_lb) + deltas = np.array(deltas) + self.grid_delta = np.percentile(deltas, 25, axis=0) + + def keys_for(self, polytope): + """Returns the keys in the hash table corresponding to @polytope. + """ + keys = set() + box_lb = np.min(polytope, axis=0) + box_ub = np.max(polytope, axis=0) + min_key = np.floor((box_lb - self.grid_lb) / self.grid_delta).astype(int) + max_key = np.ceil((box_ub - self.grid_lb) / self.grid_delta).astype(int) + for x in range(min_key[0], max_key[0] + 1): + for y in range(min_key[1], max_key[1] + 1): + yield (x, y) + + def register_polytope(self, polytope): + """Adds @polytope to the TransformerLUT. + """ + polytope = tuple(map(tuple, polytope)) + self.all_polytopes.add(polytope) + for key in self.keys_for(polytope): + self.polytope_lut[key].add(polytope) + + def possibly_intersecting(self, polytope): + """Returns a superset of the polytopes which may intersect @polytope. + """ + polytopes = set() + for key in self.keys_for(polytope): + polytopes |= self.polytope_lut[key] + return sorted(polytopes) + if __name__ == "__main__": ModelCheckingExperiment("model_checking").main() diff --git a/experiments/netpatch.py b/experiments/netpatch.py deleted file mode 100644 index 88fa95d..0000000 --- a/experiments/netpatch.py +++ /dev/null @@ -1,222 +0,0 @@ -"""Methods for patching the ACAS Xu network with ExactLine. -""" -import numpy as np -from pysyrenn import NetPatcher, PlanesClassifier -from experiments.acas_planes import ACASPlanesExperiment -from polar_image import PolarImage - -class NetPatchExperiment(ACASPlanesExperiment): - """Experiment that attempts to apply 3 patches to the ACAS Xu network. - - NOTE: Region of interest is specified in experiment/acas_lines.py. - """ - def spec_pockets(self, inputs): - """Spec function for getting rid of the "pockets" of SR/SL. - """ - labels = np.argmax(self.network.compute(inputs), axis=1) - inputs = self.reset(inputs) - velocity = inputs[0][-1] - assert np.isclose(velocity, 150) or np.isclose(velocity, 200) - is_generalized = np.isclose(velocity, 200) - if not is_generalized: - for i, point in enumerate(inputs): - # Change strong right to weak right. - if 1.0 < point[1] < 1.5 and 2750 < point[0] < 3750 and labels[i] == 4: - labels[i] = 2 - # Change strong left to weak left. - if -1.5 < point[1] < -1.1 and 2250 < point[0] < 3500 and labels[i] == 3: - labels[i] = 1 - else: - for i, point in enumerate(inputs): - # Change strong right to weak right. - if 0.94 < point[1] < 2.0 and 2750 < point[0] < 4500 and labels[i] == 4: - labels[i] = 2 - # Change strong left to weak left. - if -2 < point[1] < -1.05 and 2250 < point[0] < 4500 and labels[i] == 3: - labels[i] = 1 - return labels - - def spec_bands(self, inputs): - """Spec function for getting rid of the "back bands" behind the plane. - """ - labels = np.argmax(self.network.compute(inputs), axis=1) - inputs = self.reset(inputs) - velocity = inputs[0][-1] - assert np.isclose(velocity, 150) or np.isclose(velocity, 200) - is_generalized = np.isclose(velocity, 200) - # Fixes the "back-bands." - if not is_generalized: - for i, point in enumerate(inputs): - if np.degrees(point[1]) > 0 and labels[i] == 1: - # Change weak left to weak right. - labels[i] = 2 - else: - for i, point in enumerate(inputs): - dist = point[0] - rad = point[1] - if (2.0 < rad < np.radians(+180) and - 0 <= dist <= 5000 and - not (950 <= dist <= 1100 and 2 <= rad <= 2.55) and - labels[i] == 1): - # Change weak left to weak right. - labels[i] = 2 - return labels - - def spec_symmetry(self, inputs): - """Spec function for making the SL/SR boundary symmetric. - """ - labels = np.argmax(self.network.compute(inputs), axis=1) - inputs = self.reset(inputs) - for i, point in enumerate(inputs): - if np.degrees(point[1]) > 0 and labels[i] == 3: - labels[i] = 4 - return labels - - def run_for_spec(self, spec_name, spec_fn, layer_index, steps): - """Patches the network for a given spec and stores the patched network. - - We also record: - 1. The spec for *both* the patching and generalizing regions. - 2. The polytopes from the SyReNN partitioning (as these are the same - for the patched network) for *both* the patching and generalizing - regions. - 3. The patched networks for all iterations of the patching algorithm. - """ - # First, we use NetPatcher.from_spec_function to extract the desired - # constraints for the patched region. - patcher = NetPatcher.from_spec_function( - self.network, layer_index, self.patch_region, spec_fn) - key = "%s/spec" % spec_name - patch_spec = (patcher.inputs, patcher.labels) - self.record_artifact(patch_spec, key, "pickle") - # Then get the SyReNN. - syrenn = self.network.transform_plane(self.patch_region, - compute_preimages=True, - include_post=False) - # We only need the preimages. - key = "%s/syrenn" % spec_name - self.record_artifact(syrenn, key, "pickle") - - # Then we do the same for the generalized region. - generalized_patcher = NetPatcher.from_spec_function( - self.network, layer_index, - self.generalized_patch_region, spec_fn) - generalized_patch_spec = (generalized_patcher.inputs, - generalized_patcher.labels) - key = "%s/spec_generalized" % spec_name - self.record_artifact(generalized_patch_spec, key, "pickle") - # Then get the SyReNN. - syrenn = self.network.transform_plane(self.generalized_patch_region, - compute_preimages=True, - include_post=False) - # We only need the preimages. - key = "%s/syrenn_generalized" % spec_name - self.record_artifact(syrenn, key, "pickle") - - # Now, we do the actual patching and record the patched networks. - patcher.compute(steps=steps) - data = self.begin_csv("%s/data" % spec_name, ["steps", "network", "time"]) - for step, patched_net in enumerate(patcher.intermediates): - key = "%s/step_%03d" % (spec_name, step) - self.record_artifact(patched_net, key, "masking_network") - self.write_csv(data, { - "steps": step, - "network": key, - "time": patcher.times[step] - }) - - def run(self): - """Patches the network for a variety of specs and records the results. - """ - # We only ever use one network for this Experiment, so we pre-load the - # network, input helpers, and patch regions. - self.network = self.load_network("acas_1_1") - input_helpers = self.load_input_data("acas") - self.process = input_helpers["process"] - self.reset = input_helpers["reset"] - - self.patch_region = self.process(self.region_of_interest()) - - self.own_velocity += 50 - self.intruder_velocity += 50 - self.generalized_patch_region = self.process(self.region_of_interest()) - - specs = [ - ("spec_pockets", self.spec_pockets, 10), - ("spec_bands", self.spec_bands, 10), - ("spec_symmetry", self.spec_symmetry, 12) - ][:int(input("Number of specs (1-3): "))] - steps = int(input("Number of steps (per spec): ")) - self.record_artifact(list(zip(*specs))[0], "specs", "pickle") - for spec_name, spec_fn, layer in specs: - self.run_for_spec(spec_name, spec_fn, layer, steps) - - def plot_from_pre_syrenn(self, pre_syrenn, network, reset, key): - """Given the SyReNN partitioning of an ACAS network, plot it. - - Note that @pre_syrenn should be *just* the partitioning, i.e. - polytopes, not the post-images. - """ - post_syrenn = list(map(network.compute, pre_syrenn)) - syrenn = list(zip(pre_syrenn, post_syrenn)) - classifier = PlanesClassifier.from_syrenn([syrenn]) - - polytopes, labels = classifier.compute()[0] - polytopes = [reset(polytope)[:, :2] for polytope in polytopes] - color_labels = list(map(self.color, labels)) - - max_rho = np.sqrt(max(self.min_x**2, self.max_x**2) + - max(self.min_y**2, self.max_y**2)) - - polar_plot = PolarImage((2*max_rho, 2*max_rho), (1001, 1001)) - polar_plot.plot_polygons(polytopes, color_labels, 30) - polar_plot.circle_frame(max_rho, "#ffffff") - self.record_artifact(polar_plot.image, key, "rgb_image") - - def compute_percent_met(self, network, spec): - """Computes the percent of @spec is met by @network. - - @spec should be a tuple (processed-inputs, corresponding-labels). - """ - inputs, labels = spec - real_labels = np.argmax(network.compute(inputs), axis=1) - return np.count_nonzero(labels == real_labels) / len(labels) - - def analyze(self): - """Plots the patched networks and records constraints met. - """ - specs = self.read_artifact("specs") - reset = self.load_input_data("acas")["reset"] - for spec in specs: - patch_met = self.begin_csv("%s/patch_met" % spec, - ["Step", "Percent Met"]) - patch_spec = self.read_artifact("%s/spec" % spec) - patch_syrenn = self.read_artifact("%s/syrenn" % spec) - - gen_met = self.begin_csv("%s/gen_met" % spec, - ["Step", "Percent Met"]) - gen_spec = self.read_artifact("%s/spec_generalized" % spec) - gen_syrenn = self.read_artifact( - "%s/syrenn_generalized" % spec) - - data = self.read_csv("%s/data" % spec) - for step_data in data: - steps = int(step_data["steps"]) - network = self.read_artifact(step_data["network"]) - print("Analyzing for step:", steps) - # First, compute how many of the specs are met. - pct_met = self.compute_percent_met(network, patch_spec) - self.write_csv(patch_met, - {"Step": steps, "Percent Met": pct_met}) - pct_met = self.compute_percent_met(network, gen_spec) - self.write_csv(gen_met, - {"Step": steps, "Percent Met": pct_met}) - # Then, plot them. - self.plot_from_pre_syrenn(patch_syrenn, network, reset, - "%s/patch_%03d" % (spec, steps)) - self.plot_from_pre_syrenn(gen_syrenn, network, reset, - "%s/gen_%03d" % (spec, steps)) - return True - -if __name__ == "__main__": - NetPatchExperiment("netpatch").main() diff --git a/pip_info/__metadata__.py b/pip_info/__metadata__.py index a08fc75..d87a6f9 100644 --- a/pip_info/__metadata__.py +++ b/pip_info/__metadata__.py @@ -1,4 +1,4 @@ -__version__ = "1.0" +__version__ = "1.1" __title__ = "pysyrenn" __description__ = "Symbolic Representations for Neural Networks" __uri__ = "https://github.com/95616ARG/SyReNN" diff --git a/pysyrenn/frontend/BUILD b/pysyrenn/frontend/BUILD index c52a68a..fc3de99 100644 --- a/pysyrenn/frontend/BUILD +++ b/pysyrenn/frontend/BUILD @@ -144,6 +144,7 @@ py_library( ], ) +# NOTE: .deserialize(...) requires you also include :network. py_library( name = "concat_layer", srcs = ["concat_layer.py"], diff --git a/pysyrenn/frontend/concat_layer.py b/pysyrenn/frontend/concat_layer.py index a64cad1..38b066d 100644 --- a/pysyrenn/frontend/concat_layer.py +++ b/pysyrenn/frontend/concat_layer.py @@ -20,6 +20,14 @@ def serialize(self): full_name = "CONCAT_ALONG_{}".format(self.name) return transformer_pb.ConcatLayerData.ConcatAlong.Value(full_name) + @classmethod + def deserialize(cls, serialized): + if serialized == 1: + return ConcatAlong.CHANNELS + if serialized == 2: + return ConcatAlong.FLAT + raise NotImplementedError + # pylint: disable=abstract-method class ConcatLayer(NetworkLayer): """Represents a concat layer in a network. @@ -68,3 +76,15 @@ def serialize(self): ]) serialized.concat_data.concat_along = self.concat_along.serialize() return serialized + + @classmethod + def deserialize(cls, serialized): + """Deserializes the layer. + """ + if serialized.WhichOneof("layer_data") == "concat_data": + from pysyrenn.frontend.network import Network + layers = Network.deserialize_layers(serialized.concat_data.layers) + concat_along = ConcatAlong.deserialize( + serialized.concat_data.concat_along) + return cls(layers, concat_along) + return None diff --git a/pysyrenn/frontend/conv2d_layer.py b/pysyrenn/frontend/conv2d_layer.py index a6bb444..f5ad6e8 100644 --- a/pysyrenn/frontend/conv2d_layer.py +++ b/pysyrenn/frontend/conv2d_layer.py @@ -16,8 +16,12 @@ def __init__(self, window_data, filter_weights, biases): self.filter_weights = torch.tensor(filter_weights, dtype=torch.float32) self.biases = torch.tensor(biases, dtype=torch.float32) - def compute(self, inputs): + def compute(self, inputs, jacobian=False): """Computes the 2D convolution given an input vector. + + If @jacobian=True, it only computes the homogeneous portion (i.e., does + not add biases). This can be used to compute the product of the + Jacobian of the layer with its inputs. """ is_np = isinstance(inputs, np.ndarray) if is_np: @@ -31,6 +35,8 @@ def compute(self, inputs): # ERAN gives us HWIcOc, but Pytorch takes OcIcHW filters = self.filter_weights.permute((3, 2, 0, 1)) biases = self.biases + if jacobian: + biases = torch.zeros_like(biases) output = torch.nn.functional.conv2d(inputs, filters, biases, self.window_data.strides, @@ -39,7 +45,7 @@ def compute(self, inputs): output = output.permute((0, 2, 3, 1)) output = output.reshape((output_batches, -1)) if is_np: - return output.numpy() + return output.detach().numpy() return output def serialize(self): diff --git a/pysyrenn/frontend/fullyconnected_layer.py b/pysyrenn/frontend/fullyconnected_layer.py index 1b27224..6ee8dbe 100644 --- a/pysyrenn/frontend/fullyconnected_layer.py +++ b/pysyrenn/frontend/fullyconnected_layer.py @@ -16,13 +16,19 @@ def __init__(self, weights, biases): if biases is not None: self.biases = torch.tensor(biases, dtype=torch.float32) - def compute(self, inputs): + def compute(self, inputs, jacobian=False): """Returns the output of the layer on @inputs. + + If @jacobian=True, it only computes the homogeneous portion (i.e., does + not add biases). This can be used to compute the product of the + Jacobian of the layer with its inputs. """ is_np = isinstance(inputs, np.ndarray) if is_np: inputs = torch.tensor(inputs, dtype=torch.float32) - output = torch.mm(inputs, self.weights) + self.biases + output = torch.mm(inputs, self.weights) + if not jacobian: + output += self.biases if is_np: return output.numpy() return output diff --git a/pysyrenn/frontend/maxpool_layer.py b/pysyrenn/frontend/maxpool_layer.py index b2641b7..684cb99 100644 --- a/pysyrenn/frontend/maxpool_layer.py +++ b/pysyrenn/frontend/maxpool_layer.py @@ -15,8 +15,12 @@ def __init__(self, window_data): assert len(window_data.window_shape) == 2 self.window_data = window_data - def compute(self, inputs): + def compute(self, inputs, return_indices=False): """Computes the MaxPool given an input vector. + + If @return_indices=True, it returns a tuple (output, indices) where + @indices specifies which input index each output came from. It can + later be used in self.from_indices. """ is_np = isinstance(inputs, np.ndarray) if is_np: @@ -30,10 +34,47 @@ def compute(self, inputs): output = torch.nn.functional.max_pool2d(inputs, self.window_data.window_shape, self.window_data.strides, - self.window_data.padding) + self.window_data.padding, + return_indices=return_indices) + if return_indices: + output, indices = output # Pytorch gives us NCHW, ERAN uses NHWC output = output.permute((0, 2, 3, 1)) output = output.reshape((output_batches, -1)) + if is_np: + if return_indices: + return output.numpy(), indices.numpy() + return output.numpy() + if return_indices: + return output, indices + return output + + def from_indices(self, inputs, indices): + """Computes the MaxPool given an input vector and indices of the maxes. + + This can be used to implement a Decoupled DNN, where you want to do + effectively a MaxPool, but use the maxes computed based on a different + input vector. + """ + is_np = isinstance(inputs, np.ndarray) + if is_np: + inputs = torch.tensor(inputs, dtype=torch.float32) + indices = torch.tensor(indices, dtype=torch.long) + + # ERAN gives us NHWC + inputs = self.window_data.unflatten_inputs(inputs) + output_batches = inputs.shape[0] + # PyTorch takes NCHW + inputs = inputs.permute((0, 3, 1, 2)) + + # https://discuss.pytorch.org/t/maxpool2d-indexing-order/8281/3 + inputs = torch.flatten(inputs, 2) + output = torch.gather(inputs, 2, torch.flatten(indices, 2)) + output = output.view(indices.shape) + + output = output.permute((0, 2, 3, 1)) + output = output.reshape((output_batches, -1)) + if is_np: return output.numpy() return output diff --git a/pysyrenn/frontend/tests/BUILD b/pysyrenn/frontend/tests/BUILD index 2f89338..ed7babd 100644 --- a/pysyrenn/frontend/tests/BUILD +++ b/pysyrenn/frontend/tests/BUILD @@ -1,16 +1,10 @@ -py_library( - name = "helpers", - srcs = ["helpers.py"], - visibility = ["//:__subpackages__"], -) - py_test( name = "relu_layer", size = "small", srcs = ["relu_layer.py"], deps = [ - ":helpers", "//pysyrenn/frontend:relu_layer", + "@bazel_python//:pytest_helper", ], ) @@ -19,8 +13,8 @@ py_test( size = "small", srcs = ["hard_tanh_layer.py"], deps = [ - ":helpers", "//pysyrenn/frontend:hard_tanh_layer", + "@bazel_python//:pytest_helper", ], ) @@ -29,8 +23,8 @@ py_test( size = "small", srcs = ["argmax_layer.py"], deps = [ - ":helpers", "//pysyrenn/frontend:argmax_layer", + "@bazel_python//:pytest_helper", ], ) @@ -39,8 +33,8 @@ py_test( size = "small", srcs = ["fullyconnected_layer.py"], deps = [ - ":helpers", "//pysyrenn/frontend:fullyconnected_layer", + "@bazel_python//:pytest_helper", ], ) @@ -49,8 +43,8 @@ py_test( size = "small", srcs = ["normalize_layer.py"], deps = [ - ":helpers", "//pysyrenn/frontend:normalize_layer", + "@bazel_python//:pytest_helper", ], ) @@ -59,9 +53,9 @@ py_test( size = "small", srcs = ["averagepool_layer.py"], deps = [ - ":helpers", "//pysyrenn/frontend:averagepool_layer", "//pysyrenn/frontend:strided_window_data", + "@bazel_python//:pytest_helper", ], ) @@ -70,9 +64,9 @@ py_test( size = "small", srcs = ["maxpool_layer.py"], deps = [ - ":helpers", "//pysyrenn/frontend:maxpool_layer", "//pysyrenn/frontend:strided_window_data", + "@bazel_python//:pytest_helper", ], ) @@ -81,9 +75,9 @@ py_test( size = "small", srcs = ["conv2d_layer.py"], deps = [ - ":helpers", "//pysyrenn/frontend:conv2d_layer", "//pysyrenn/frontend:strided_window_data", + "@bazel_python//:pytest_helper", ], ) @@ -92,13 +86,14 @@ py_test( size = "small", srcs = ["concat_layer.py"], deps = [ - ":helpers", "//pysyrenn/frontend:averagepool_layer", "//pysyrenn/frontend:concat_layer", "//pysyrenn/frontend:conv2d_layer", "//pysyrenn/frontend:fullyconnected_layer", + "//pysyrenn/frontend:network", "//pysyrenn/frontend:normalize_layer", "//pysyrenn/frontend:strided_window_data", + "@bazel_python//:pytest_helper", ], ) @@ -107,8 +102,8 @@ py_test( size = "small", srcs = ["strided_window_data.py"], deps = [ - ":helpers", "//pysyrenn/frontend:strided_window_data", + "@bazel_python//:pytest_helper", ], ) @@ -118,12 +113,12 @@ py_test( srcs = ["network.py"], data = ["@onnx_squeezenet//:all"], deps = [ - ":helpers", "//pysyrenn/frontend:argmax_layer", "//pysyrenn/frontend:fullyconnected_layer", "//pysyrenn/frontend:network", "//pysyrenn/frontend:relu_layer", "//pysyrenn/frontend:transformer_client", + "@bazel_python//:pytest_helper", ], ) @@ -132,8 +127,8 @@ py_test( size = "small", srcs = ["layer.py"], deps = [ - ":helpers", "//pysyrenn/frontend:layer", + "@bazel_python//:pytest_helper", ], ) @@ -142,10 +137,10 @@ py_test( size = "small", srcs = ["transformer_client.py"], deps = [ - ":helpers", "//pysyrenn/frontend:fullyconnected_layer", "//pysyrenn/frontend:network", "//pysyrenn/frontend:relu_layer", "//pysyrenn/frontend:transformer_client", + "@bazel_python//:pytest_helper", ], ) diff --git a/pysyrenn/frontend/tests/argmax_layer.py b/pysyrenn/frontend/tests/argmax_layer.py index 1fbb928..3b215ef 100644 --- a/pysyrenn/frontend/tests/argmax_layer.py +++ b/pysyrenn/frontend/tests/argmax_layer.py @@ -2,7 +2,7 @@ """ import numpy as np import torch -from helpers import main +from external.bazel_python.pytest_helper import main from pysyrenn.frontend.argmax_layer import ArgMaxLayer def test_compute(): diff --git a/pysyrenn/frontend/tests/averagepool_layer.py b/pysyrenn/frontend/tests/averagepool_layer.py index ae9b185..7774544 100644 --- a/pysyrenn/frontend/tests/averagepool_layer.py +++ b/pysyrenn/frontend/tests/averagepool_layer.py @@ -2,7 +2,7 @@ """ import numpy as np import torch -from helpers import main +from external.bazel_python.pytest_helper import main from pysyrenn.frontend.strided_window_data import StridedWindowData from pysyrenn.frontend.averagepool_layer import AveragePoolLayer diff --git a/pysyrenn/frontend/tests/concat_layer.py b/pysyrenn/frontend/tests/concat_layer.py index 0527534..e86c95b 100644 --- a/pysyrenn/frontend/tests/concat_layer.py +++ b/pysyrenn/frontend/tests/concat_layer.py @@ -2,7 +2,7 @@ """ import numpy as np import torch -from helpers import main +from external.bazel_python.pytest_helper import main from pysyrenn.frontend.strided_window_data import StridedWindowData from pysyrenn.frontend.fullyconnected_layer import FullyConnectedLayer from pysyrenn.frontend.normalize_layer import NormalizeLayer @@ -114,8 +114,8 @@ def test_compute_invalid(): def test_serialize(): """Tests that the Concat layer correctly serializes itself. """ - n_inputs = 1025 - fullyconnected_outputs = 2046 + n_inputs = 125 + fullyconnected_outputs = 246 weights = np.random.uniform(size=(n_inputs, fullyconnected_outputs)) biases = np.random.uniform(size=(fullyconnected_outputs)) @@ -130,7 +130,6 @@ def test_serialize(): serialized = concat_layer.serialize() assert serialized.WhichOneof("layer_data") == "concat_data" - assert (serialized.concat_data.concat_along == transformer_pb.ConcatLayerData.ConcatAlong.Value( "CONCAT_ALONG_FLAT")) @@ -138,4 +137,24 @@ def test_serialize(): assert serialized.concat_data.layers[0] == fullyconnected_layer.serialize() assert serialized.concat_data.layers[1] == normalize_layer.serialize() + # TODO: This does not check that deserialized.input_layers was done + # correctly, but that should be the case as long as their deserialize + # methods work (tested in their respective files). + deserialized = ConcatLayer.deserialize(serialized) + assert deserialized.concat_along == ConcatAlong.FLAT + + serialized.relu_data.SetInParent() + deserialized = ConcatLayer.deserialize(serialized) + assert deserialized is None + + concat_layer.concat_along = ConcatAlong.CHANNELS + deserialized = ConcatLayer.deserialize(concat_layer.serialize()) + assert deserialized.concat_along == ConcatAlong.CHANNELS + + try: + ConcatAlong.deserialize(5) + assert False, "Should have errored on unrecognized serialization." + except NotImplementedError: + pass + main(__name__, __file__) diff --git a/pysyrenn/frontend/tests/conv2d_layer.py b/pysyrenn/frontend/tests/conv2d_layer.py index 672f002..a3ef5d6 100644 --- a/pysyrenn/frontend/tests/conv2d_layer.py +++ b/pysyrenn/frontend/tests/conv2d_layer.py @@ -2,7 +2,7 @@ """ import numpy as np import torch -from helpers import main +from external.bazel_python.pytest_helper import main from pysyrenn.frontend.strided_window_data import StridedWindowData from pysyrenn.frontend.conv2d_layer import Conv2DLayer @@ -34,6 +34,8 @@ def test_compute(): stride, pad, out_channels) conv2d_layer = Conv2DLayer(window_data, filters, biases) assert np.allclose(conv2d_layer.compute(inputs), true_outputs) + assert np.allclose(conv2d_layer.compute(inputs, jacobian=True), + np.zeros_like(true_outputs)) torch_inputs = torch.FloatTensor(inputs) torch_outputs = conv2d_layer.compute(torch_inputs).numpy() diff --git a/pysyrenn/frontend/tests/fullyconnected_layer.py b/pysyrenn/frontend/tests/fullyconnected_layer.py index 4bf55e3..f13e5fa 100644 --- a/pysyrenn/frontend/tests/fullyconnected_layer.py +++ b/pysyrenn/frontend/tests/fullyconnected_layer.py @@ -2,7 +2,7 @@ """ import numpy as np import torch -from helpers import main +from external.bazel_python.pytest_helper import main from pysyrenn.frontend.fullyconnected_layer import FullyConnectedLayer def test_compute(): diff --git a/pysyrenn/frontend/tests/hard_tanh_layer.py b/pysyrenn/frontend/tests/hard_tanh_layer.py index 2867980..1f72e1a 100644 --- a/pysyrenn/frontend/tests/hard_tanh_layer.py +++ b/pysyrenn/frontend/tests/hard_tanh_layer.py @@ -2,7 +2,7 @@ """ import numpy as np import torch -from helpers import main +from external.bazel_python.pytest_helper import main from pysyrenn.frontend.hard_tanh_layer import HardTanhLayer def test_compute(): diff --git a/pysyrenn/frontend/tests/helpers.py b/pysyrenn/frontend/tests/helpers.py deleted file mode 100644 index 9499efa..0000000 --- a/pysyrenn/frontend/tests/helpers.py +++ /dev/null @@ -1,24 +0,0 @@ -"""Helper methods for the other tests. -""" -import sys -import numpy as np -import pytest -import coverage -import os -# We need to do this here, otherwise it won't catch method/class declarations. -# Also, helpers imports should be before all other local imports. -cov = coverage.Coverage(data_file="%s/coverage.cov" % os.environ["TEST_UNDECLARED_OUTPUTS_DIR"]) -cov.start() - -def main(script_name, file_name): - """Test runner that supports Bazel test and the coverage_report.sh script. - - Tests should import heplers before importing any other local scripts, then - call main(__file__) after declaring their tests. - """ - if script_name != "__main__": - return - exit_code = pytest.main([file_name, "-s"]) - cov.stop() - cov.save() - sys.exit(exit_code) diff --git a/pysyrenn/frontend/tests/layer.py b/pysyrenn/frontend/tests/layer.py index c48f707..0d2bf49 100644 --- a/pysyrenn/frontend/tests/layer.py +++ b/pysyrenn/frontend/tests/layer.py @@ -1,6 +1,6 @@ """Tests the methods in relu_layer.py """ -from helpers import main +from external.bazel_python.pytest_helper import main from pysyrenn.frontend.layer import NetworkLayer def test_stubs(): diff --git a/pysyrenn/frontend/tests/maxpool_layer.py b/pysyrenn/frontend/tests/maxpool_layer.py index 1e6334c..a4dd801 100644 --- a/pysyrenn/frontend/tests/maxpool_layer.py +++ b/pysyrenn/frontend/tests/maxpool_layer.py @@ -2,7 +2,7 @@ """ import numpy as np import torch -from helpers import main +from external.bazel_python.pytest_helper import main from pysyrenn.frontend.strided_window_data import StridedWindowData from pysyrenn.frontend.maxpool_layer import MaxPoolLayer @@ -25,10 +25,19 @@ def test_compute(): (2, 2), (2, 2), (0, 0), channels) maxpool_layer = MaxPoolLayer(window_data) assert np.allclose(maxpool_layer.compute(inputs), true_outputs) + output, indices = maxpool_layer.compute(inputs, return_indices=True) + assert np.allclose(output, true_outputs) + # TODO: Actually check true_indices itself. + assert np.allclose(maxpool_layer.from_indices(inputs, indices), output) torch_inputs = torch.FloatTensor(inputs) torch_outputs = maxpool_layer.compute(torch_inputs).numpy() assert np.allclose(torch_outputs, true_outputs) + torch_outputs, torch_indices = maxpool_layer.compute(torch_inputs, + return_indices=True) + assert np.allclose(torch_outputs.numpy(), true_outputs) + torch_outputs = maxpool_layer.from_indices(torch_inputs, indices) + assert np.allclose(torch_outputs.numpy(), true_outputs) def test_serialize(): """Tests that the MaxPool layer correctly [de]serializes itself. diff --git a/pysyrenn/frontend/tests/network.py b/pysyrenn/frontend/tests/network.py index f1da61d..b394235 100644 --- a/pysyrenn/frontend/tests/network.py +++ b/pysyrenn/frontend/tests/network.py @@ -3,7 +3,7 @@ import numpy as np import torch import pytest -from helpers import main +from external.bazel_python.pytest_helper import main from pysyrenn.frontend.network import Network from pysyrenn.frontend.conv2d_layer import Conv2DLayer from pysyrenn.frontend.fullyconnected_layer import FullyConnectedLayer diff --git a/pysyrenn/frontend/tests/normalize_layer.py b/pysyrenn/frontend/tests/normalize_layer.py index 9545a2f..8eca3b9 100644 --- a/pysyrenn/frontend/tests/normalize_layer.py +++ b/pysyrenn/frontend/tests/normalize_layer.py @@ -2,7 +2,7 @@ """ import numpy as np import torch -from helpers import main +from external.bazel_python.pytest_helper import main from pysyrenn.frontend.normalize_layer import NormalizeLayer def test_compute(): diff --git a/pysyrenn/frontend/tests/relu_layer.py b/pysyrenn/frontend/tests/relu_layer.py index 252605b..a85b099 100644 --- a/pysyrenn/frontend/tests/relu_layer.py +++ b/pysyrenn/frontend/tests/relu_layer.py @@ -2,7 +2,7 @@ """ import numpy as np import torch -from helpers import main +from external.bazel_python.pytest_helper import main from pysyrenn.frontend.relu_layer import ReluLayer def test_compute(): diff --git a/pysyrenn/frontend/tests/strided_window_data.py b/pysyrenn/frontend/tests/strided_window_data.py index 8ce4237..8b29428 100644 --- a/pysyrenn/frontend/tests/strided_window_data.py +++ b/pysyrenn/frontend/tests/strided_window_data.py @@ -2,7 +2,7 @@ """ import numpy as np import torch -from helpers import main +from external.bazel_python.pytest_helper import main from pysyrenn.frontend.strided_window_data import StridedWindowData def test_out_shape(): diff --git a/pysyrenn/frontend/tests/transformer_client.py b/pysyrenn/frontend/tests/transformer_client.py index 0cc988b..be686c4 100644 --- a/pysyrenn/frontend/tests/transformer_client.py +++ b/pysyrenn/frontend/tests/transformer_client.py @@ -2,7 +2,7 @@ """ import numpy as np import torch -from helpers import main +from external.bazel_python.pytest_helper import main from pysyrenn.frontend import transformer_client from pysyrenn.frontend.network import Network from pysyrenn.frontend.conv2d_layer import Conv2DLayer diff --git a/pysyrenn/helpers/BUILD b/pysyrenn/helpers/BUILD index 5e97de8..8b7edc2 100644 --- a/pysyrenn/helpers/BUILD +++ b/pysyrenn/helpers/BUILD @@ -41,22 +41,10 @@ py_library( name = "netpatch", srcs = ["netpatch.py"], visibility = [":__subpackages__"], - deps = [ - ":masking_network", - "//pysyrenn/frontend:fullyconnected_layer", - "//pysyrenn/frontend:network", - "//pysyrenn/frontend:relu_layer", - ], ) py_library( name = "masking_network", srcs = ["masking_network.py"], visibility = [":__subpackages__"], - deps = [ - "//pysyrenn/frontend:fullyconnected_layer", - "//pysyrenn/frontend:network", - "//pysyrenn/frontend:relu_layer", - "//syrenn_proto:syrenn_py_grpc", - ], ) diff --git a/pysyrenn/helpers/README.md b/pysyrenn/helpers/README.md index a4747b2..92dfc30 100644 --- a/pysyrenn/helpers/README.md +++ b/pysyrenn/helpers/README.md @@ -9,9 +9,7 @@ compute desired properties of networks. points along a set of two-dimensional subspaces of the input space. - [IntegratedGradients](integrated_gradients.py) supports exactly computing integrated gradients for a network and baseline/image pairs. -- [NetPatcher](netpatch.py) uses an interval-MAX-SMT solver and "MaskingNetwork" - architecture to "patch" neural networks to have particular classifications. ## Documentation Primary usage examples are available in [../experiments](../experiments) along -with the test cases in [tests](tests). \ No newline at end of file +with the test cases in [tests](tests). diff --git a/pysyrenn/helpers/masking_network.py b/pysyrenn/helpers/masking_network.py index 28001ef..ca287a5 100644 --- a/pysyrenn/helpers/masking_network.py +++ b/pysyrenn/helpers/masking_network.py @@ -1,104 +1,10 @@ -"""Methods for interacting with Masking Networks. -""" -import numpy as np -from pysyrenn.frontend.network import Network -from pysyrenn.frontend.fullyconnected_layer import FullyConnectedLayer -from pysyrenn.frontend.relu_layer import ReluLayer -import syrenn_proto.syrenn_pb2 as transformer_pb +"""Stub for MaskingNetwork.""" class MaskingNetwork: - """Implements a Masking Network. - - Currently supports: - - Arbitrary Layers as long as the activation and values parameters are - equal up to that layer. - - Once the activation and values parameters differ, only FullyConnected and - ReLU layers are supported (note that other layers can be supported, but - it would require extending the .compute method and are not needed for the - ACAS networks). - """ + """Stub for MaskingNetwork.""" def __init__(self, activation_layers, value_layers): - """Constructs the new PatchedNetwork. - - @activation_layers is a list of layers defining the values of the - activation vectors. - @value_layers is a list of layers defining the values of the value - vectors. - - Note that: - 1. To match the notation in the paper, you should consider the ith - activation layer and the ith value layer being ``intertwined`` into - a single layer that produces an (activation, value) tuple. - 2. To that end, the number, types, and output sizes of the layers in - @activation_layers and @values_layers should match. - 3. ReluLayers are interpreted as MReLU layers. - """ - self.activation_layers = activation_layers - self.value_layers = value_layers - - self.n_layers = len(activation_layers) - assert self.n_layers == len(value_layers) - - try: - self.differ_index = next( - l for l in range(self.n_layers) - if activation_layers[l] is not value_layers[l] - ) - except StopIteration: - self.differ_index = len(value_layers) - - def compute(self, inputs): - """Computes the output of the Masking Network on @inputs. - - @inputs should be a Numpy array of inputs. - """ - # Up to differ_index, the values and activation vectors are the same. - pre_network = Network(self.activation_layers[:self.differ_index]) - mid_inputs = pre_network.compute(inputs) - # Now we have to actually separately handle the masking when - # activations != values. - activation_vector = mid_inputs - value_vector = mid_inputs - for layer_index in range(self.differ_index, self.n_layers): - activation_layer = self.activation_layers[layer_index] - value_layer = self.value_layers[layer_index] - if isinstance(activation_layer, FullyConnectedLayer): - activation_vector = activation_layer.compute(activation_vector) - value_vector = value_layer.compute(value_vector) - elif isinstance(activation_layer, ReluLayer): - mask = np.maximum(np.sign(activation_vector), 0.0) - value_vector *= mask - activation_vector *= mask - else: - raise NotImplementedError - return value_vector - - def serialize(self): - """Serializes the MaskingNetwork to the Protobuf format. - - Notably, the value_net only includes layers after differ_index. - """ - serialized = transformer_pb.MaskingNetwork() - serialized.activation_layers.extend([ - layer.serialize() for layer in self.activation_layers - ]) - serialized.value_layers.extend([ - layer.serialize() - for layer in self.value_layers[self.differ_index:] - ]) - serialized.differ_index = self.differ_index - return serialized - - @classmethod - def deserialize(cls, serialized): - """Deserializes the MaskingNetwork from the Protobuf format. - """ - activation_layers = serialized.activation_layers - activation_layers = Network.deserialize_layers(activation_layers) - - value_layers = serialized.value_layers - value_layers = Network.deserialize_layers(value_layers) - - differ_index = serialized.differ_index - value_layers = activation_layers[:differ_index] + value_layers - return cls(activation_layers, value_layers) + """Warns that MaskingNetworks are no longer in this package.""" + print("Patching and MaskingNetworks are no longer part of pysyrenn.") + print("We are working to provide a new patching-specific package.") + print("\nIn the meantime, please use pysyrenn==1.0") + raise NotImplementedError diff --git a/pysyrenn/helpers/netpatch.py b/pysyrenn/helpers/netpatch.py index bcdca55..c027ad1 100644 --- a/pysyrenn/helpers/netpatch.py +++ b/pysyrenn/helpers/netpatch.py @@ -1,405 +1,10 @@ -"""Methods for patching networks with SyReNN. -""" -import itertools -from timeit import default_timer as timer -import numpy as np -from tqdm import tqdm -from pysyrenn.frontend import Network, FullyConnectedLayer -from pysyrenn.helpers.masking_network import MaskingNetwork +"""Stubs for netpatch.""" class NetPatcher: - """Helper for patching a neural network with SyReNN. - """ + """Stubs for NetPatcher.""" def __init__(self, network, layer_index, inputs, labels): - """Initializes a new NetPatcher. - - This initializer assumes that you want to patch finitely many points - --- if that is not the case, use NetPatcher.from_planes or - NetPatcher.from_spec_function instead. - - @network should be the Network to patch. - @layer_index should be index of the FullyConnected layer in the Network - to patch. - @inputs should be a list of the input points to patch against, while - @labels should be a list of their corresponding labels. - """ - self.network = network - self.layer_index = layer_index - self.inputs = np.array(inputs) - self.labels = labels - original_layer = network.layers[layer_index] - # intermediates[i] is the MaskingNetwork after i patching steps, so - # intermediates[0] is the original network. - self.intermediates = [self.construct_patched(original_layer)] - # times[i] is the time taken to run the ith iteration. - self.times = [0.0] - - @classmethod - def from_planes(cls, network, layer_index, planes, labels): - """Constructs a NetPatcher to patch 2D regions. - - @planes should be a list of input 2D planes (Numpy arrays of their - vertices in counter-clockwise order) - @labels a list of the corresponding desired labels (integers). - - Internally, SyReNN is used to lower the problem to that of finitely - many points. - - NOTE: This function requires one to have a particularly precise - representation of the desired network output; in most cases, - NetPatcher.from_spec_function is more useful (see below). - """ - transformed = network.transform_planes(planes, - compute_preimages=True, - include_post=False) - all_inputs = [] - all_labels = [] - for upolytope, label in zip(transformed, labels): - # include_post=False so the upolytope is just a list of Numpy - # arrays. - points = [] - for vertices in upolytope: - points.extend(vertices) - # Remove duplicate points. - points = list(set(map(tuple, points))) - all_inputs.extend(points) - all_labels.extend([label for i in range(len(points))]) - all_inputs, indices = np.unique(all_inputs, return_index=True, axis=0) - all_labels = np.array(all_labels)[indices] - return cls(network, layer_index, all_inputs, all_labels) - - @classmethod - def from_spec_function(cls, network, layer_index, - region_plane, spec_function): - """Constructs a NetPatcher for an input region and "Spec Function." - - @region_plane should be a single plane (Numpy array of - counter-clockwise vertices) that defines the "region of interest" - to patch over. - @spec_function should take a set of input points (Numpy array) and - return the desired corresponding labels (list/Numpy array of ints). - - Here we use a slightly in-exact algorithm; we get all partition - endpoints using SyReNN, then use those for the NetPatcher. - - If the @spec_function classifies all points on a linear partition the - same way, then this exactly encodes the corresponding problem for the - NetPatcher (i.e., if the NetPatcher reports all constraints met then - the patched network exactly matches the @spec_function). - - If the @spec_function classifies some points on a linear partition - differently than others, the encoding may not be exact (i.e., - NetPatcher may report all constraints met even when some input has a - different output in the patched network than the @spec_function). - However, in practice, this works *very* well and is significantly more - efficient than computing the exact encoding and resulting patches. - """ - upolytope = network.transform_plane(region_plane, - compute_preimages=True, - include_post=False) - inputs = [] - for polytope in upolytope: - inputs.extend(list(polytope)) - inputs = np.unique(np.array(inputs), axis=0) - labels = spec_function(inputs) - return cls(network, layer_index, inputs, labels) - - @staticmethod - def linearize_network(inputs, network): - """Linearizes a network for a set of inputs. - - For each input in @inputs (n_inputs x in_dims), the linearization of - the network around that input point is computed and returned as a - FullyConnectedLayer. - - Linearization formula about point P (see: Wikipedia): - - f(x) = f(P) + nabla-f(p) * (x - p) - = (f(p) - nabla-f(p) * p) + (nabla-f(p) * x) - W = nabla-f(p), B = f(p) - (nabla-f(p) * p) - """ - if not network.layers: - return [FullyConnectedLayer(np.eye(inputs.shape[1]), - np.zeros(shape=(inputs.shape[1]))) - for i in range(len(inputs))] - - biases = network.compute(inputs) - # We get (output_dims x n_inputs x mid_dims) then reshape to - # (n_inputs x mid_dims x output_dims) - normals = np.array([ - network.compute_gradients(inputs, output_i) - for output_i in range(biases.shape[1]) - ]).transpose((1, 2, 0)) - biases -= np.einsum("imo,im->io", normals, inputs) - linearized = [FullyConnectedLayer(normal, bias) - for normal, bias in zip(normals, biases)] - return linearized - - def linearize_around(self, inputs, network, layer_index): - """Linearizes a network before/after a certain layer. - - We return a 3-tuple, (pre_lins, mid_inputs, post_lins) satisfying: - - 1) For all x in the same linear region as x_i: - Network(x) = PostLin_i(Layer_l(PreLin_i(x))) - 2) For all x_i: - mid_inputs_i = PreLin_i(x_i) - - pre_lins and post_lins are lists of FullyConnectedLayers, one per - input, and mid_inputs is a Numpy array. - """ - pre_network = Network(network.layers[:layer_index]) - pre_linearized = self.linearize_network(inputs, pre_network) - - mid_inputs = pre_network.compute(inputs) - pre_post_inputs = network.layers[layer_index].compute(mid_inputs) - post_network = Network(network.layers[(layer_index + 1):]) - post_linearized = self.linearize_network(pre_post_inputs, post_network) - return pre_linearized, mid_inputs, post_linearized - - @staticmethod - def compute_intervals(inputs_to_patch_layer, labels, - patch_layer, post_linearized): - """Computes weight bounds that cause the desired classifications. - - @input_to_patch_layer should be a list of inputs that are ready to be - fed into @patch_layer. - @labels should have one integer label (desired classification) for each - input in @inputs_to_patch_layer. - @patch_layer should be the FullyConnectedLayer to patch. - @post_linearized should be the rest of the network linearized after - @patch_layer (a list of AffineLayers). - """ - in_dims, mid_dims = patch_layer.weights.shape - # (n_inputs, mid_dims, out_dims) - linearized_normals = np.array([linearized.weights.numpy() - for linearized in post_linearized]) - n_inputs, mid_dims, out_dims = linearized_normals.shape - # (n_inputs, out_dims) - linearized_biases = np.array([linearized.biases.numpy() - for linearized in post_linearized]) - - # Compute the outputs of the unmodified network. - # Final Shape: (n_inputs, n_outputs) - original_outputs = patch_layer.compute(inputs_to_patch_layer) - original_outputs = np.einsum("imo,im->io", - linearized_normals, original_outputs) - original_outputs += linearized_biases - - # For each (input_image, weight) pair, we find the derivative with - # respect to that weight. - # Shape: (n_inputs, in_dims, mid_dims, out_dims) - weight_derivatives = np.einsum("ik,imo->ikmo", - inputs_to_patch_layer, - linearized_normals) - # Add the biases to get: (n_inputs, in_dims + 1, mid_dims, out_dims) - bias_derivatives = linearized_normals.reshape((n_inputs, 1, - mid_dims, out_dims)) - weight_derivatives = np.concatenate((weight_derivatives, - bias_derivatives), 1) - # Transpose to: (n_inputs, out_dims, in_dims + 1, mid_dims) - weight_derivatives = weight_derivatives.transpose((0, 3, 1, 2)) - - # Initial weight bounds, we will fill this in the loop below. - weight_bounds = np.zeros(shape=(n_inputs, 2, in_dims + 1, mid_dims)) - weight_bounds[:, 0, :, :] = -np.Infinity - weight_bounds[:, 1, :, :] = +np.Infinity - for input_index in tqdm(range(n_inputs), desc="Computing Bounds"): - label = labels[input_index] - for other_label in range(out_dims): - if other_label == label: - continue - is_met = (original_outputs[input_index, label] > - original_outputs[input_index, other_label]) - # At this point, for each weight w, we effectively have two - # functions in terms of the weight delta 'dw': - # y1 = a1*dw + o1, y2 = a2*dw + o2 - # We're interested first in their intersection: - # a1*dw + o1 = a2*dw + o2 => dw = (o2 - o1) / (a1 - a2) - a_delta = (weight_derivatives[input_index, label] - - weight_derivatives[input_index, other_label]) - o_delta = (original_outputs[input_index, label] - - original_outputs[input_index, other_label]) - # Shape: (in_dims + 1, mid_dims) - intersections = -o_delta / a_delta - - # First, we deal with the weights that have non-NaN - # intersections. - finite_is, finite_js = np.isfinite(intersections).nonzero() - finite_bounds = intersections[finite_is, finite_js] - if is_met: - is_upper_bound = finite_bounds >= 0.0 - else: - is_upper_bound = finite_bounds <= 0.0 - - # First, update the upper bounds. - upper_bounds = finite_bounds[is_upper_bound] - upper_is = finite_is[is_upper_bound] - upper_js = finite_js[is_upper_bound] - bounds_slice = (input_index, 1, upper_is, upper_js) - weight_bounds[bounds_slice] = np.minimum( - weight_bounds[bounds_slice], upper_bounds) - # Then, update the lower bounds. - lower_bounds = finite_bounds[~is_upper_bound] - lower_is = finite_is[~is_upper_bound] - lower_js = finite_js[~is_upper_bound] - bounds_slice = (input_index, 0, lower_is, lower_js) - weight_bounds[bounds_slice] = np.maximum( - weight_bounds[bounds_slice], lower_bounds) - - # Finally, if it is unmet and there are non-finite bounds, - # NaN-ify those weights. - if not is_met: - nan_is, nan_js = (~np.isfinite(intersections)).nonzero() - weight_bounds[input_index, :, nan_is, nan_js] = np.nan - weight_bounds = weight_bounds.transpose((2, 3, 0, 1)) - to_nanify = weight_bounds[:, :, :, 0] > weight_bounds[:, :, :, 1] - weight_bounds[to_nanify, :] = np.nan - # Return (in_dims + 1, mid_dims, n_inputs, {min, max}) - return weight_bounds - - @staticmethod - def interval_MAX_SMT(intervals): - """Computes the MAX-SMT for a set of intervals. - - @intervals should be a Numpy array of shape: - (n_intervals, {lower, upper}). - - The return value is a 3-tuple: (lower, upper, n_met) such that any - value x satisfying lower <= x <= upper will maximize the MAX-SMT - problem by meeting n_met constraints. - """ - lower_indices = np.argsort(intervals[:, 0]) - lower_sorted = intervals[lower_indices, 0] - - upper_indices = np.argsort(intervals[:, 1]) - upper_sorted = intervals[upper_indices, 1] - - best_lower, best_upper = 0, 0 - upper_i = 0 - best_met = -1 - n_met = 0 - for lower_i, lower in enumerate(lower_sorted): - # First, we update upper -- everything in this loop is an interval - # we were meeting before but not anymore. - while upper_sorted[upper_i] < lower: - n_met -= 1 - upper_i += 1 - # We now meet the interval that this lower is from. - n_met += 1 - if n_met > best_met: - best_lower, best_upper = lower, upper_sorted[upper_i] - best_met = n_met - elif (len(lower_sorted) - lower_i) < (best_met - n_met): - # Each iteration adds *at most* 1 to n_met. For us to even have - # a chance of updating best_met, then, we will have to do at - # least (best_met - n_met) more iterations. - break - return best_lower, best_upper, best_met - - def propose_patch(self, weight_bounds, learn_rate=1.0): - """Proposes a weight-patch for the patch_layer based on @weight_bounds. - - @weight_bounds should be of shape: - (in_dims, mid_dims, n_inputs, {min,max}) - """ - in_dims, mid_dims, _, _ = weight_bounds.shape - - best_index = (None, None) - best_constraints = -1 - best_delta = 0.0 - indices = itertools.product(range(in_dims), range(mid_dims)) - for in_dim, mid_dim in tqdm(indices, total=(in_dims * mid_dims), - desc="Computing Patch"): - bounds = weight_bounds[in_dim, mid_dim, :, :] - # We focus on the bounds that are non-NaN - non_nan_bounds = bounds[~np.isnan(bounds[:, 0])] - if len(non_nan_bounds) < best_constraints: - continue - lower, upper, n_met = self.interval_MAX_SMT(non_nan_bounds) - - if n_met <= best_constraints: - continue - best_constraints = n_met - best_index = (in_dim, mid_dim) - - if lower <= 0.0 <= upper: - best_delta = 0.0 - else: - # True if the interval suggests to increase the weight. - is_increase = lower > 0.0 - # If the interval suggests to increase the weight, suggest a - # delta slightly above lower. Otherwise, suggest one slightly - # below upper. Either way, we're trying to stay as close to 0 - # as possible. - ratio = 0.1 if is_increase else 0.9 - best_delta = lower + (ratio * (upper - lower)) - if not np.isfinite(best_delta): - eps = 0.1 - if is_increase: # => upper == np.Infinity - assert np.isfinite(lower + eps) - best_delta = lower + eps - elif upper < 0.0: # => lower == -np.Infinity - assert np.isfinite(upper - eps) - best_delta = upper - eps - else: - assert False - assert np.isfinite(best_delta) - print("Would be satisfying", best_constraints, "constraints.") - print("Updating weight", best_index) - best_delta *= learn_rate - return best_index, best_delta, best_constraints - - def construct_patched(self, patched_layer): - """Constructs a MaskingNetwork given the patched layer. - """ - activation_layers = self.network.layers - value_layers = activation_layers.copy() - value_layers[self.layer_index] = patched_layer - return MaskingNetwork(activation_layers, value_layers) - - def compute(self, steps): - """Performs the Network patching for @steps steps. - - Returns the patched network as an instance of MaskingNetwork. - Intermediate networks (i.e., the network after i steps) are stored in - self.intermediates. - """ - if len(self.intermediates) > steps: - return self.intermediates[steps] - - # Linearize the network around this layer. - _, layer_inputs, lin_post = self.linearize_around( - self.inputs, self.network, self.layer_index) - # We allow restarting from a partially-patched state. - start_step = len(self.intermediates) - start_net = self.intermediates[start_step - 1] - # Keep track of the latest version of the weights/biases as we patch - # it. - layer = start_net.value_layers[self.layer_index] - weights = layer.weights.numpy().copy() - biases = layer.biases.numpy().copy() - # We routinely divide by zero and utilize NaN values; Numpy's warning - # are not particularly useful here. - np.seterr(divide="ignore", invalid="ignore") - for step in range(start_step, steps + 1): - print("Step:", step) - start_time = timer() - # First, we compute the interval on each weight that makes each - # constraint met. - bounds = self.compute_intervals(layer_inputs, self.labels, - layer, lin_post) - # Then, we use those intervals to compute the optimal single-weight - # change. - index, delta, _ = self.propose_patch(bounds) - # Finally, we make the update and save the new MaskingNetwork. - if index[0] < weights.shape[0]: - weights[index] += delta - else: - assert index[0] == weights.shape[0] - biases[index[1]] += delta - layer = FullyConnectedLayer(weights.copy(), biases.copy()) - self.intermediates.append(self.construct_patched(layer)) - self.times.append(timer() - start_time) - np.seterr(divide="warn", invalid="warn") - return self.intermediates[steps] + """Warns that NetPatcher is no longer in this package.""" + print("Patching and MaskingNetworks are no longer part of pysyrenn.") + print("We are working to provide a new patching-specific package.") + print("\nIn the meantime, please use pysyrenn==1.0") + raise NotImplementedError diff --git a/pysyrenn/helpers/tests/BUILD b/pysyrenn/helpers/tests/BUILD index 2800717..cf9581d 100644 --- a/pysyrenn/helpers/tests/BUILD +++ b/pysyrenn/helpers/tests/BUILD @@ -4,8 +4,8 @@ py_test( srcs = ["classify_lines.py"], deps = [ "//pysyrenn/frontend", - "//pysyrenn/frontend/tests:helpers", "//pysyrenn/helpers:classify_lines", + "@bazel_python//:pytest_helper", ], ) @@ -15,8 +15,8 @@ py_test( srcs = ["classify_planes.py"], deps = [ "//pysyrenn/frontend", - "//pysyrenn/frontend/tests:helpers", "//pysyrenn/helpers:classify_planes", + "@bazel_python//:pytest_helper", ], ) @@ -26,29 +26,7 @@ py_test( srcs = ["integrated_gradients.py"], deps = [ "//pysyrenn/frontend", - "//pysyrenn/frontend/tests:helpers", "//pysyrenn/helpers:integrated_gradients", - ], -) - -py_test( - name = "masking_network", - size = "small", - srcs = ["masking_network.py"], - deps = [ - "//pysyrenn/frontend", - "//pysyrenn/frontend/tests:helpers", - "//pysyrenn/helpers:masking_network", - ], -) - -py_test( - name = "netpatch", - size = "small", - srcs = ["netpatch.py"], - deps = [ - "//pysyrenn/frontend", - "//pysyrenn/frontend/tests:helpers", - "//pysyrenn/helpers:netpatch", + "@bazel_python//:pytest_helper", ], ) diff --git a/pysyrenn/helpers/tests/classify_lines.py b/pysyrenn/helpers/tests/classify_lines.py index c574404..1845987 100644 --- a/pysyrenn/helpers/tests/classify_lines.py +++ b/pysyrenn/helpers/tests/classify_lines.py @@ -3,7 +3,7 @@ import numpy as np import torch import pytest -from pysyrenn.frontend.tests.helpers import main +from external.bazel_python.pytest_helper import main from pysyrenn.frontend import Network, ReluLayer from pysyrenn.helpers.classify_lines import LinesClassifier diff --git a/pysyrenn/helpers/tests/classify_planes.py b/pysyrenn/helpers/tests/classify_planes.py index 2ec01cd..a0bba01 100644 --- a/pysyrenn/helpers/tests/classify_planes.py +++ b/pysyrenn/helpers/tests/classify_planes.py @@ -3,7 +3,7 @@ import numpy as np import torch import pytest -from pysyrenn.frontend.tests.helpers import main +from external.bazel_python.pytest_helper import main from pysyrenn.frontend import Network, ReluLayer from pysyrenn.helpers.classify_planes import PlanesClassifier diff --git a/pysyrenn/helpers/tests/integrated_gradients.py b/pysyrenn/helpers/tests/integrated_gradients.py index 4f66c18..99450cd 100644 --- a/pysyrenn/helpers/tests/integrated_gradients.py +++ b/pysyrenn/helpers/tests/integrated_gradients.py @@ -3,7 +3,7 @@ import numpy as np import torch import pytest -from pysyrenn.frontend.tests.helpers import main +from external.bazel_python.pytest_helper import main from pysyrenn.frontend import Network, ReluLayer from pysyrenn.helpers.integrated_gradients import IntegratedGradients diff --git a/pysyrenn/helpers/tests/masking_network.py b/pysyrenn/helpers/tests/masking_network.py deleted file mode 100644 index 34b553c..0000000 --- a/pysyrenn/helpers/tests/masking_network.py +++ /dev/null @@ -1,102 +0,0 @@ -"""Tests the methods in masking_network.py -""" -import numpy as np -import torch -import pytest -from pysyrenn.frontend.tests.helpers import main -from pysyrenn.frontend import ReluLayer, FullyConnectedLayer, ArgMaxLayer -from pysyrenn.helpers.masking_network import MaskingNetwork - -def test_compute(): - """Tests that it works for a simple example. - """ - activation_layers = [ - FullyConnectedLayer(np.eye(2), np.ones(shape=(2,))), - ReluLayer(), - FullyConnectedLayer(2.0 * np.eye(2), np.zeros(shape=(2,))), - ReluLayer(), - ] - value_layers = activation_layers[:2] + [ - FullyConnectedLayer(3.0 * np.eye(2), np.zeros(shape=(2,))), - ReluLayer(), - ] - network = MaskingNetwork(activation_layers, value_layers) - assert network.differ_index == 2 - output = network.compute([[-2.0, 1.0]]) - assert np.allclose(output, [[0.0, 6.0]]) - -def test_nodiffer(): - """Tests the it works if activation and value layers are identical. - """ - activation_layers = [ - FullyConnectedLayer(np.eye(2), np.ones(shape=(2,))), - ReluLayer(), - FullyConnectedLayer(2.0 * np.eye(2), np.zeros(shape=(2,))), - ReluLayer(), - ] - value_layers = activation_layers - network = MaskingNetwork(activation_layers, value_layers) - assert network.differ_index == 4 - output = network.compute([[-2.0, 1.0]]) - assert np.allclose(output, [[0.0, 4.0]]) - -def test_bad_layer(): - """Tests that non-ReLU/FullyConnected layers only after differ_index fail. - """ - # It should work if it's before the differ_index. - activation_layers = [ - FullyConnectedLayer(np.eye(2), np.ones(shape=(2,))), - ReluLayer(), - FullyConnectedLayer(2.0 * np.eye(2), np.zeros(shape=(2,))), - ArgMaxLayer(), - ] - value_layers = activation_layers - network = MaskingNetwork(activation_layers, value_layers) - assert network.differ_index == 4 - output = network.compute([[-2.0, 1.0]]) - assert np.allclose(output, [[1.0]]) - # But not after the differ_index. - activation_layers = [ - FullyConnectedLayer(np.eye(2), np.ones(shape=(2,))), - ReluLayer(), - FullyConnectedLayer(2.0 * np.eye(2), np.zeros(shape=(2,))), - ArgMaxLayer(), - ] - value_layers = activation_layers[:2] + [ - FullyConnectedLayer(3.0 * np.eye(2), np.zeros(shape=(2,))), - ReluLayer(), - ] - network = MaskingNetwork(activation_layers, value_layers) - assert network.differ_index == 2 - try: - output = network.compute([[-2.0, 1.0]]) - assert False - except NotImplementedError: - pass - -def test_serialization(): - """Tests that it correctly (de)serializes. - """ - activation_layers = [ - FullyConnectedLayer(np.eye(2), np.ones(shape=(2,))), - ReluLayer(), - FullyConnectedLayer(2.0 * np.eye(2), np.zeros(shape=(2,))), - ReluLayer(), - ] - value_layers = activation_layers[:2] + [ - FullyConnectedLayer(3.0 * np.eye(2), np.zeros(shape=(2,))), - ReluLayer(), - ] - network = MaskingNetwork(activation_layers, value_layers) - serialized = network.serialize() - assert all(serialized == layer.serialize() - for serialized, layer in zip(serialized.activation_layers, - activation_layers)) - assert all(serialized == layer.serialize() - for serialized, layer in zip(serialized.value_layers, - value_layers[2:])) - assert serialized.differ_index == 2 - - assert MaskingNetwork.deserialize(serialized).serialize() == serialized - -main(__name__, __file__) diff --git a/pysyrenn/helpers/tests/netpatch.py b/pysyrenn/helpers/tests/netpatch.py deleted file mode 100644 index 7677f86..0000000 --- a/pysyrenn/helpers/tests/netpatch.py +++ /dev/null @@ -1,161 +0,0 @@ -"""Tests the methods in netpatch.py -""" -import numpy as np -import torch -import pytest -from pysyrenn.frontend.tests.helpers import main -from pysyrenn.frontend import Network, ReluLayer, FullyConnectedLayer -from pysyrenn.helpers.netpatch import NetPatcher - -def test_already_good(): - """Point-wise test case where all constraints are already met. - - We want to make sure that it doesn't change any weights unnecessarily. - """ - network = Network([ - FullyConnectedLayer(np.eye(2), np.zeros(shape=(2,))), - ReluLayer(), - ]) - layer_index = 0 - points = [[1.0, 0.5], [2.0, -0.5], [4.0, 5.0]] - labels = [0, 0, 1] - patcher = NetPatcher(network, layer_index, points, labels) - patched = patcher.compute(steps=1) - patched_layer = patched.value_layers[0] - assert np.allclose(patched_layer.weights.numpy(), np.eye(2)) - assert np.allclose(patched_layer.biases.numpy(), np.zeros(shape=(2,))) - -def test_one_point(): - """Point-wise test case with only one constraint. - - This leads to weight bounds that are unbounded-on-one-side. - """ - # Where the weight is too big. - network = Network([ - FullyConnectedLayer(np.eye(2), np.zeros(shape=(2,))), - ReluLayer(), - FullyConnectedLayer(np.array([[1.0, 0.0], [1.0, 0.0]]), - np.array([0.0, 0.0])), - ]) - layer_index = 0 - points = [[1.0, 0.5]] - labels = [1] - patcher = NetPatcher(network, layer_index, points, labels) - patched = patcher.compute(steps=1) - assert np.argmax(patched.compute(points)) == 1 - - # Where the weight is too small. - network = Network([ - FullyConnectedLayer(np.eye(2), np.array([0.0, 0.0])), - ReluLayer(), - FullyConnectedLayer(np.array([[1.0, 0.0], [1.0, 0.0]]), np.array([0.0, 2.0])), - ]) - layer_index = 0 - points = [[1.0, 0.0]] - labels = [0] - patcher = NetPatcher(network, layer_index, points, labels) - patched = patcher.compute(steps=1) - assert np.argmax(patched.compute(points)) == 0 - -def test_optimal(): - """Point-wise test case that the greedy algorithm can solve in 1 step. - - All it needs to do is triple the second component. - """ - network = Network([ - FullyConnectedLayer(np.eye(2), np.zeros(shape=(2,))), - ReluLayer(), - ]) - layer_index = 0 - points = [[1.0, 0.5], [2.0, -0.5], [5.0, 4.0]] - labels = [1, 0, 1] - patcher = NetPatcher(network, layer_index, points, labels) - patched = patcher.compute(steps=1) - assert patched.differ_index == 0 - assert np.count_nonzero( - np.argmax(patched.compute(points), axis=1) == labels) == 3 - # Ensure it doesn't re-do work its already done. - assert patcher.compute(steps=1) == patched - -def test_from_planes(): - """Test case to load key points from a set of labeled 2D polytopes. - """ - if not Network.has_connection(): - pytest.skip("No server connected.") - - network = Network([ - ReluLayer(), - FullyConnectedLayer(np.eye(2), np.zeros(shape=(2,))) - ]) - layer_index = 1 - planes = [ - np.array([[-1.0, -3.0], [-0.5, -3.0], [-0.5, 9.0], [-1.0, 9.0]]), - np.array([[8.0, -2.0], [16.0, -2.0], [16.0, 6.0], [8.0, 6.0]]), - ] - labels = [1, 0] - patcher = NetPatcher.from_planes(network, layer_index, planes, labels) - assert patcher.network is network - assert patcher.layer_index is layer_index - assert len(patcher.inputs) == (4 + 2) + (4 + 2) - true_key_points = list(planes[0]) - true_key_points += [np.array([-1.0, 0.0]), np.array([-0.5, 0.0])] - true_key_points += list(planes[1]) - true_key_points += [np.array([8.0, 0.0]), np.array([16.0, 0.0])] - true_labels = ([1] * 6) + ([0] * 6) - for true_point, true_label in zip(true_key_points, true_labels): - try: - i = next(i for i, point in enumerate(patcher.inputs) - if np.allclose(point, true_point)) - except StopIteration: - assert False - assert true_label == patcher.labels[i] - -def test_from_spec(): - """Test case to load key points from a spec function. - """ - if not Network.has_connection(): - pytest.skip("No server connected.") - - network = Network([ - ReluLayer(), - FullyConnectedLayer(np.eye(2), np.zeros(shape=(2,))) - ]) - layer_index = 1 - region_of_interest = np.array([ - [0.5, -3.0], - [1.0, -3.0], - [1.0, 9.0], - [0.5, 9.0], - ]) - spec_fn = lambda i: np.isclose(i[:, 0], 1.0).astype(np.float32) - patcher = NetPatcher.from_spec_function(network, layer_index, - region_of_interest, spec_fn) - assert patcher.network is network - assert patcher.layer_index is layer_index - assert len(patcher.inputs) == (4 + 2) - true_key_points = list(region_of_interest) - true_key_points += [np.array([0.5, 0.0]), np.array([1.0, 0.0])] - true_labels = [0, 1, 1, 0, 0, 1] - for true_point, true_label in zip(true_key_points, true_labels): - try: - i = next(i for i, point in enumerate(patcher.inputs) - if np.allclose(point, true_point)) - except StopIteration: - assert False - assert true_label == patcher.labels[i] - -def test_interval_MAX_SMT(): - """Tests the interval MAX SMT (linear sweep) implementation. - """ - intervals = np.array([ - (0.0, 1.0), - (-1.0, 0.5), - (-2.0, 3.0), - (3.5, 4.0), - (5.0, 6.0), - ]) - lower, upper, n_meeting = NetPatcher.interval_MAX_SMT(intervals) - assert (lower, upper) == (0.0, 0.5) - assert n_meeting == 3 - -main(__name__, __file__) diff --git a/requirements.txt b/requirements.txt index 77eabe0..4e5eba6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,7 +12,7 @@ coverage==4.5.4 cairosvg==2.5.1 pytest==5.2.0 imageio==2.5.0 -Pillow==7.1.0 +Pillow==8.1.1 matplotlib==3.1.1 svgwrite==1.3.1 # PyClipper dependencies. diff --git a/third_party/eran_bmc/BUILD b/third_party/eran_bmc/BUILD new file mode 100644 index 0000000..853992c --- /dev/null +++ b/third_party/eran_bmc/BUILD @@ -0,0 +1,9 @@ +sh_binary( + name = "experiment", + srcs = ["experiment.sh"], + data = [ + "experiment.py", + "//models:vrl_models", + "//third_party/eran_preconditions:eran_image.tgz", + ], +) diff --git a/third_party/eran_bmc/README.md b/third_party/eran_bmc/README.md new file mode 100644 index 0000000..efc052b --- /dev/null +++ b/third_party/eran_bmc/README.md @@ -0,0 +1,18 @@ +# ERAN-BMC +This directory includes a Dockerfile and code to demonstrate imprecision when +using the [ERAN project](https://github.com/eth-sri/eran) for Bounded Model +Checking. + +We currently only test the [DeepPoly](https://doi.org/10.1145/3290354) domain +on the Pendulum model. Other domains may be implemented in the future. + +# Building & Running +1. Install Bazel 1.1.0, Docker (tested with version 18.09.6, build 481bc77), + and any prerequisites mentioned in [../../README.md](../../README.md). +2. Ensure that the ``runexec`` line in [../../.bazelrc](../../.bazelrc) is + commented out (we run ``runexec`` directly in the Docker containers). +3. From the root of this repository, run ``bazel run + third_party/eran_bmc:experiment``. This target will build the Docker + container then run the experiment; you should see the output ``ERAN reported + initLB unsafe after the first step.``, which indicates ERAN found a spurious + counterexample before it could verify any steps. diff --git a/third_party/eran_bmc/experiment.py b/third_party/eran_bmc/experiment.py new file mode 100644 index 0000000..43dff2e --- /dev/null +++ b/third_party/eran_bmc/experiment.py @@ -0,0 +1,215 @@ +"""Demonstrates imprecision when using ERAN for BMC. + +Code adapted from the ERAN project: https://github.com/eth-sri/eran +""" +import sys +sys.path.insert(0, "../ELINA/python_interface/") +sys.path.insert(0, ".") +from PIL import Image +import math +import numpy as np +import matplotlib +import matplotlib.image +import os +from eran import ERAN +from fppoly import * +from elina_coeff import * +from elina_linexpr0 import * +from read_net_file import * +from analyzer import * +import tensorflow as tf +import csv +import time +import argparse +from timeit import default_timer as timer +from tqdm import tqdm + +args = { + "complete": False, + "timeout_lp": 1, + "timeout_milp": 1, + "use_area_heuristic": True, +} + + +def main(): + """Runs the ERAN analysis on the pendulum_continuous model. + + This happens in a few steps: + 1. ERAN doesn't specifically support HardTanh layers, so we translate the + HardTanh into an equivalent set of ReLU layers using convert_htanh(). + 2. We then read the controller network into an ERAN model. + 3. We use ERAN/DeepPoly to extract an abstract value describing the + network's behavior over the initial set. Module float imprecision + handling, this abstract value is basically two affine transform A and B + such that Ax <= f(x) <= Bx for all x in the initial set. + 4. We compute Ax and Bx for a particular point (0.35, 0.35) right on the + edge of the initial set, and show that the range determined by DeepPoly + (even after applying a concrete HardTanh at the end) is wide enough to + mark that point as unsafe even on the first iteration. + """ + # (1) Translate the model into an equivalent one without HardTanh. + with_htanh_filename = sys.argv[1] + no_htanh_filename = "/ovol/pendulum_continuous.no_htanh.eran" + convert_htanh(with_htanh_filename, no_htanh_filename) + # (2) Read it into ERAN. + num_pixels = 2 + model, _, _, _ = read_net(no_htanh_filename, num_pixels, False) + eran = ERAN(model) + + # (3) Extract an abstract value over the initial set. + # (3a) Load model and init set into ERAN. + initLB = np.array([-0.35, -0.35]) + initUB = np.array([0.35, 0.35]) + + nn = layers() + nn.specLB = initLB + nn.specUB = initUB + execute_list = eran.optimizer.get_deeppoly(initLB, initUB) + # NOTE: 9 is just a placeholder specnumber to tell it we're using + # ACAS. + analyzer = Analyzer(execute_list, nn, "deeppoly", args["timeout_lp"], + args["timeout_milp"], 9, args["use_area_heuristic"]) + + # (3b) Perform the analysis and extract the abstract values. + element, _, _ = analyzer.get_abstract0() + + lexpr = get_lexpr_for_output_neuron(analyzer.man, element, 0) + uexpr = get_uexpr_for_output_neuron(analyzer.man, element, 0) + + lexpr = np.array(extract_from_expr(lexpr)) + uexpr = np.array(extract_from_expr(uexpr)) + + # (3c) Extract the output range for initLB based on the abstract value. + lower_bound, upper_bound = compute_output_range(initLB, lexpr, uexpr) + + # Apply extra knowledge that -1 <= lower_bound <= upper_bound <= 1. + lower_bound = max(lower_bound, -1.) + upper_bound = min(upper_bound, 1.) + + post_lower, post_upper = post_bounds(initLB, lower_bound, upper_bound) + post_lower, post_upper = post_lower.flatten(), post_upper.flatten() + + lower_safe = np.min(post_lower) >= -0.35 + upper_safe = np.max(post_upper) <= 0.35 + is_safe = lower_safe and upper_safe + print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") + if not is_safe: + print("ERAN reported initLB unsafe after the first step.") + else: + print("Something changed; ERAN used to report initLB unsafe, but now" + "it says it's safe.") + print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") + + elina_abstract0_free(analyzer.man, element) + +def convert_htanh(with_htanh_filename, no_htanh_filename): + """Converts a network using HardTanh to one using only ReLUs. + + @with_htanh_filename is the path to the ERAN file describing the network + using HardTanh, while @no_htanh_filename is the path to the ERAN file this + function should write the ReLU-only version to. + """ + # y = ReLU(x + 1) + # z = ReLU(-y + 2) + with open(with_htanh_filename, "r") as original_network: + with open(no_htanh_filename, "w") as no_htanh_network: + in_hard_tanh = False + weights = None + biases = None + for line in original_network: + if line.strip() == "HardTanh": + in_hard_tanh = True + no_htanh_network.write("ReLU\n") + elif in_hard_tanh and not weights: + weights = line + no_htanh_network.write(line) + elif in_hard_tanh and not biases: + # HTanh(x) = -ReLU(-ReLU(x + 1) + 2) + 1 + assert "," not in line + bias = float(line.strip("\n[]")) + no_htanh_network.write("[{}]\n".format(bias + 1.0)) + no_htanh_network.write("ReLU\n") + no_htanh_network.write("[[-1.0]]\n") + no_htanh_network.write("[2.0]\n") + no_htanh_network.write("Affine\n") + no_htanh_network.write("[[-1.0]]\n") + no_htanh_network.write("[1.0]\n") + in_hard_tanh = False + else: + no_htanh_network.write(line) + +def compute_output_range(point, lexpr, uexpr): + """Computes the range of possible outputs at @point. + + lexpr[:, 0] gives lower bounds on the values of A while lexpr[:, 1] gives + upper bounds on the values of A. Similarly for uexpr and B. + + Together, they form A, B such that Ax <= f(x) <= Bx. + + This function computes a lower bound on Ax and an upper bound on Bx. + """ + lower_bound = np.min(lexpr[-1, :]) + for i in range(point.size): + assert np.sign(lexpr[i, 0]) == np.sign(lexpr[i, 1]) + if np.sign(lexpr[i, 0]) == np.sign(point[i]): + lower_bound += point[i] * np.min(lexpr[i, :]) + else: + lower_bound += point[i] * np.max(lexpr[i, :]) + upper_bound = np.max(uexpr[-1, :]) + for i in range(point.size): + assert np.sign(uexpr[i, 0]) == np.sign(uexpr[i, 1]) + if np.sign(uexpr[i, 0]) == np.sign(point[i]): + upper_bound += point[i] * np.max(uexpr[i, :]) + else: + upper_bound += point[i] * np.min(uexpr[i, :]) + return lower_bound, upper_bound + +def post_bounds(original_state, action_lower_bound, action_upper_bound): + """Finds the tightest bounds on the post-state given bounds on the action. + + A, B are environment descriptions for the Pendulum model. See + ../../experiments/vrl_models.py for more details. Notably, B is positive so + we get a lower bound by multiplying action_lower_bound and an upper bound + by multiplying action_upper_bound. + """ + A = 0.01 * np.array([[0., 1.], [10.0/1.0, 0.]]) + B = 0.01 * 15.0 * np.array([[0.], [1.0]]) + delta_B_lower = action_lower_bound * B + delta_B_upper = action_upper_bound * B + original_state = np.array([original_state]).transpose() + delta_lower = (np.matmul(A, original_state) + delta_B_lower) + delta_upper = (np.matmul(A, original_state) + delta_B_upper) + post_lower = original_state + delta_lower + post_upper = original_state + delta_upper + return post_lower, post_upper + +def extract_from_expr(expr, coeffs=2): + """Helper method to extract a vector from the ERAN internal representation. + + It returns a vector of size (n + 1, 2), where the last row is the bias and + vec[:, 0] = inf, vec[:, 1] = sup for the coefficients (used to handle + floating point imprecision). + """ + coefficients = [] + for i in range(coeffs): + coeff = elina_linexpr0_coeffref(expr, i) + assert coeff.contents.discr == 1 + interval = coeff.contents.val.interval.contents + assert interval.inf.contents.discr == 0 + assert interval.sup.contents.discr == 0 + inf = interval.inf.contents.val.dbl + sup = interval.sup.contents.val.dbl + coefficients.append([inf, sup]) + cst = elina_linexpr0_cstref(expr) + assert cst.contents.discr == 1 + interval = cst.contents.val.interval.contents + assert interval.inf.contents.discr == 0 + assert interval.sup.contents.discr == 0 + inf = interval.inf.contents.val.dbl + sup = interval.sup.contents.val.dbl + coefficients.append([inf, sup]) + return np.array(coefficients) + +if __name__ == "__main__": + main() diff --git a/third_party/eran_bmc/experiment.sh b/third_party/eran_bmc/experiment.sh new file mode 100755 index 0000000..4d73dc8 --- /dev/null +++ b/third_party/eran_bmc/experiment.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +builddir=$BUILD_WORKING_DIRECTORY +model="$PWD/models/vrl/eran/pendulum_continuous.eran" +experiment_py="$PWD/third_party/eran_bmc/experiment.py" +image_file="$PWD/third_party/eran_preconditions/eran_image.tgz" + +# Copy everything to the local directory and load the Docker image. +cp $model pendulum_continuous.eran +cp $experiment_py experiment.py +docker load -i $image_file + +# Run the experiment and save results to local_outdir. +rm -rf local_outdir +mkdir local_outdir + +# The Docker container has a habit of messing with __pycache__ permissions in +# /ivol, so we keep /ivol read-only and only give it permission to write to +# /ovol. +docker run --rm -t -i \ + -v $PWD:/ivol:ro \ + -v $PWD/local_outdir:/ovol:rw \ + -v /sys/fs/cgroup:/sys/fs/cgroup:rw \ + -w /eran/tf_verify \ + eran_preconditions \ + runexec --no-container --walltimelimit 172800 --cores 0-15 --memlimit 17179869184 --output /dev/stdout --input - -- \ + python3 /ivol/experiment.py /ivol/pendulum_continuous.eran /ovol diff --git a/third_party/marabou_model_checking/.gitignore b/third_party/marabou_model_checking/.gitignore new file mode 100644 index 0000000..aa1ec1e --- /dev/null +++ b/third_party/marabou_model_checking/.gitignore @@ -0,0 +1 @@ +*.tgz diff --git a/third_party/marabou_model_checking/BUILD b/third_party/marabou_model_checking/BUILD new file mode 100644 index 0000000..c5b7f31 --- /dev/null +++ b/third_party/marabou_model_checking/BUILD @@ -0,0 +1,26 @@ +genrule( + name = "build_marabou_docker", + srcs = [ + "Dockerfile", + "bmc.cpp", + "Makefile", + ], + outs = ["marabou_bmc.tgz"], + cmd = """ + cp $(location Dockerfile) Dockerfile + cp $(location bmc.cpp) bmc.cpp + cp $(location Makefile) Makefile + docker build --force-rm -t marabou_bmc . + docker save -o marabou_bmc.tgz marabou_bmc + cp marabou_bmc.tgz $@ + """, +) + +sh_binary( + name = "experiment", + srcs = ["experiment.sh"], + data = [ + ":marabou_bmc.tgz", + "//third_party/reluplex_model_checking:specs.tgz", + ], +) diff --git a/third_party/marabou_model_checking/Dockerfile b/third_party/marabou_model_checking/Dockerfile new file mode 100644 index 0000000..3daa0a6 --- /dev/null +++ b/third_party/marabou_model_checking/Dockerfile @@ -0,0 +1,31 @@ +FROM ubuntu:18.04 + +LABEL maintainer="masotoud@ucdavis.edu" +LABEL autodelete="True" + +RUN apt-get update && apt-get install -y \ + wget unzip build-essential + +# Install benchexec. +RUN wget https://github.com/sosy-lab/benchexec/releases/download/2.0/benchexec_2.0-1_all.deb -O benchexec.deb +RUN apt install -y --install-recommends ./benchexec.deb + +# Get the Marabou source. +WORKDIR / +RUN wget https://github.com/NeuralNetworkVerification/Marabou/archive/0a6b4638f0df700876d5a04e7e6661a34a649231.zip -O marabou.zip +RUN unzip marabou.zip -d marabou +RUN rm marabou.zip +RUN mv marabou/**/* marabou +RUN rm -r marabou/Marabou-* + +# Build Marabou. +WORKDIR /marabou +RUN make + +# Build our executable. +WORKDIR /marabou/cpp_interface_example +COPY Makefile Makefile +COPY bmc.cpp bmc.cpp +RUN make + +RUN rm -rf /var/lib/apt/lists/* diff --git a/third_party/marabou_model_checking/Makefile b/third_party/marabou_model_checking/Makefile new file mode 100644 index 0000000..cceb5c2 --- /dev/null +++ b/third_party/marabou_model_checking/Makefile @@ -0,0 +1,51 @@ +# Based on +# https://github.com/NeuralNetworkVerification/Marabou/blob/cav_artifact/cpp_interface_example/Makefile +ROOT_DIR = .. + +include $(ROOT_DIR)/Places.mk + +SUBDIRS += \ + +LOCAL_INCLUDES += \ + .. \ + $(BASIS_FACTORIZATION_DIR) \ + $(CONFIGURATION_DIR) \ + $(ENGINE_DIR) \ + $(INPUT_PARSER_DIR) \ + +LINK_FLAGS += \ + +LOCAL_LIBRARIES += \ + +CFLAGS += \ + +include $(BASIS_FACTORIZATION_DIR)/Sources.mk +include $(COMMON_DIR)/Sources.mk +include $(COMMON_REAL_DIR)/Sources.mk +include $(CONFIGURATION_DIR)/Sources.mk +include $(ENGINE_DIR)/Sources.mk +include $(ENGINE_REAL_DIR)/Sources.mk +include $(INPUT_PARSER_DIR)/Sources.mk + +vpath %.cpp $(BASIS_FACTORIZATION_DIR) +vpath %.cpp $(COMMON_DIR) +vpath %.cpp $(COMMON_REAL_DIR) +vpath %.cpp $(CONFIGURATION_DIR) +vpath %.cpp $(ENGINE_DIR) +vpath %.cpp $(ENGINE_REAL_DIR) +vpath %.cpp $(INPUT_PARSER_DIR) + +SOURCES += \ + bmc.cpp \ + +TARGET = bmc.elf + +include $(ROOT_DIR)/Rules.mk + +# +# Local Variables: +# compile-command: "make -C . " +# tags-file-name: "../TAGS" +# c-basic-offset: 4 +# End: +# diff --git a/third_party/marabou_model_checking/README.md b/third_party/marabou_model_checking/README.md new file mode 100644 index 0000000..9d0123b --- /dev/null +++ b/third_party/marabou_model_checking/README.md @@ -0,0 +1,43 @@ +# Marabou-BMC +Code to use Marabou for bounded-model-checking. This is the Marabou +counter-part to [../reluplex_model_checking](../reluplex_model_checking). + +# Building & Running +1. Install Bazel 1.1.0, Docker (tested with version 18.09.6, build 481bc77), + and any prerequisites mentioned in [../../README.md](../../README.md). +2. Ensure that the ``runexec`` line in [../../.bazelrc](../../.bazelrc) is + commented out (we run ``runexec`` directly in the Docker containers). +3. From the root of this repository, run ``bazel run + third_party/marabou_model_checking:experiment``. This target will build the + Docker container then run the experiment; once finished, results will be + copied into a ``.tgz`` file in this directory. + +# Notes +## Timeouts +The duration of the experiment can be controlled in ``experiment.sh`` by +changing the variable ``sh_timeout`` in ``run_for_model``. Please note that +there is some overhead, so if you wish to see how many steps it can verify in, +say, an hour, you should set the timeout to something closer to 1.5 hours. + +## Results +Results will be written to a ``.tgz`` file in this directory with one CSV for +each of the three models. + +The CSV needs significant pre-processing before its results can be compared to +that of the paper: + +1. Verification for each step is broken into 16 different specs, because both + our unsafe region and initial region are non-convex polytopes (each is, + however, a union of 4 convex polytopes). Therefore, the times from each of + the 16 specs per step should be added together to get the overall time of + each step. +2. Each step must be verified individually with Marabou, so the time needed to + verify that _all steps up to i_ are safe, you need the cumulative time of + each individual step up to i. Thus, cumulative time should be taken by + essentially "adding down the columns" of the generated CSV. + +The basic takeaways are: (1) 16 rows in the CSV -> 1 timestep and (2) you need +to take the cumulative sum down the time column. + +This script cannot be safely early-quitted, instead please use the +``sh_timeout`` variable as described above. diff --git a/third_party/marabou_model_checking/bmc.cpp b/third_party/marabou_model_checking/bmc.cpp new file mode 100644 index 0000000..e690d86 --- /dev/null +++ b/third_party/marabou_model_checking/bmc.cpp @@ -0,0 +1,300 @@ +/********************* */ +/*! \file main.cpp +** \verbatim +** Top contributors (to current version): +** Guy Katz +** This file is part of the Reluplex project. +** Copyright (c) 2016-2017 by the authors listed in the file AUTHORS +** (in the top-level source directory) and their institutional affiliations. +** All rights reserved. See the file COPYING in the top-level source +** directory for licensing information.\endverbatim +**/ + +#include +#include +#include +#include +#include + +#include "Engine.h" +#include "FloatUtils.h" +#include "InputQuery.h" +#include "ReluConstraint.h" +#include "TimeUtils.h" +#include "ReluplexError.h" + +using VecMatrix = std::vector>; +VecMatrix readMatrix(std::ifstream *file) { + int rows = -1, cols = -1; + (*file) >> rows >> cols; + VecMatrix mat(rows, std::vector(cols)); + for (int i = 0; i < rows; i++) { + for (int j = 0; j < cols; j++) { + (*file) >> mat[i][j]; + } + } + return mat; +} + +struct Index +{ + Index(unsigned layer, unsigned node) + : layer(layer), node(node) + { + } + + unsigned layer; + unsigned node; + + bool operator<( const Index &other ) const + { + if (layer != other.layer) + return layer < other.layer; + if (node != other.node) + return node < other.node; + return false; + } +}; + +int main(int argc, char **argv) { + if (argc != 5) { + std::cerr << "Usage: " + << argv[0] << " " + << std::endl; + return 1; + } + + char *netPath = argv[1]; + char *specPath = argv[2]; + unsigned steps = std::atoi(argv[3]); + + std::ofstream out_file(argv[4]); + out_file << netPath << "," << specPath << "," << steps << "," << std::flush; + + std::ifstream net(netPath); + std::vector layer_types; + std::vector layer_weights; + std::vector> layer_biases; + + unsigned weighted_nodes = 0, + relu_pairs = 0; + + while (layer_types.empty() || layer_types.back() != 'T') { + char layer_type = '\0'; + while (!(layer_type == 'A' || layer_type == 'T' || layer_type == 'R')) { + net >> layer_type; + } + layer_types.push_back(layer_type); + switch (layer_type) { + case 'A': + layer_weights.push_back(readMatrix(&net)); + layer_biases.push_back(readMatrix(&net).front()); + weighted_nodes += layer_biases.back().size(); + break; + case 'T': + layer_weights.push_back(readMatrix(&net)); + layer_weights.push_back(readMatrix(&net)); + weighted_nodes += layer_weights.back().size(); + break; + case 'R': + relu_pairs += layer_biases.back().size(); + break; + } + } + + std::ifstream spec(specPath); + VecMatrix inputConstraints = readMatrix(&spec); + VecMatrix safeConstraints = readMatrix(&spec); + VecMatrix outputConstraints = readMatrix(&spec); + spec.close(); + + const unsigned net_inputs = 2; + + unsigned tableau_size = + // 1. Input vars appear once. + net_inputs + + // 2. Each weighted node has a var *PER STEP*. + (steps * weighted_nodes) + + // 3. Each pre-ReLU var has a post-ReLU pair var. + (steps * relu_pairs) + + // 4. One aux var per input constraint. + inputConstraints.size() + + // 5. One aux var per output safe constraint *PER STEP*. + ((steps - 1) * safeConstraints.size()) + + // 6. One aux var per output constraint *AT THE LAST STEP*. + outputConstraints.size(); + // NOTE: Used to have a single extra for the constants. + + InputQuery inputQuery; + inputQuery.setNumberOfVariables(tableau_size); + + unsigned running_index = 0; + + Map nodeToVars; + + // First, add the input vars. + for (unsigned i = 0; i < net_inputs; i++) { + nodeToVars[Index(0, i)] = running_index++; + // NOTE: This *ASSUMES* these are valid bounds on the input state! + inputQuery.setLowerBound(nodeToVars[Index(0, i)], -100.0); + inputQuery.setUpperBound(nodeToVars[Index(0, i)], +100.0); + } + + unsigned layer = 1; + std::vector transition_layers; + + for (unsigned step = 0; step < steps; step++) { + unsigned input_layer = layer - 1; + unsigned w_i = 0, b_i = 0; + for (char layer_type : layer_types) { + if (layer_type == 'A') { + auto &weights = layer_weights[w_i++]; + auto &biases = layer_biases[b_i++]; + // Weights is (n_inputs x n_outputs), biases is (n_outputs) + for (unsigned output = 0; output < biases.size(); output++) { + nodeToVars[Index(layer, output)] = running_index++; + unsigned var = nodeToVars[Index(layer, output)]; + inputQuery.setLowerBound(var, FloatUtils::negativeInfinity()); + inputQuery.setUpperBound(var, FloatUtils::infinity()); + + Equation node_equation; + node_equation.addAddend(-1., var); + for (unsigned input = 0; input < weights.size(); input++) { + unsigned from_var = nodeToVars[Index(layer - 1, input)]; + node_equation.addAddend(weights[input][output], from_var); + } + node_equation.setScalar(-biases[output]); + inputQuery.addEquation(node_equation); + } + } else if (layer_type == 'T') { + auto &transition_A = layer_weights[w_i++]; + auto &transition_B = layer_weights[w_i++]; + // transition_A is (out_state x in_state), transition_B is (state x action) + for (unsigned output = 0; output < transition_A.size(); output++) { + nodeToVars[Index(layer, output)] = running_index++; + unsigned var = nodeToVars[Index(layer, output)]; + inputQuery.setLowerBound(var, FloatUtils::negativeInfinity()); + inputQuery.setUpperBound(var, FloatUtils::infinity()); + + Equation node_equation; + node_equation.addAddend(-1., var); + + for (unsigned state_input = 0; state_input < transition_A.front().size(); state_input++) { + unsigned from_var = nodeToVars[Index(input_layer, state_input)]; + if (state_input == output) { + node_equation.addAddend( + 1.0 + transition_A[output][state_input], from_var); + } else { + node_equation.addAddend( + transition_A[output][state_input], from_var); + } + } + for (unsigned action_input = 0; action_input < transition_B.front().size(); action_input++) { + unsigned from_var = nodeToVars[Index(layer - 1, action_input)]; + node_equation.addAddend( + transition_B[output][action_input], from_var); + } + node_equation.setScalar(0.0); + inputQuery.addEquation(node_equation); + } + transition_layers.push_back(layer); + } else if (layer_type == 'R') { + unsigned last_nodes = layer_biases[w_i - 1].size(); + for (unsigned output = 0; output < last_nodes; output++) { + nodeToVars[Index(layer, output)] = running_index++; + unsigned preNode = nodeToVars[Index(layer - 1, output)]; + unsigned postNode = nodeToVars[Index(layer, output)]; + ReluConstraint *relu_pair = new ReluConstraint(preNode, postNode); + inputQuery.addPiecewiseLinearConstraint(relu_pair); + inputQuery.setLowerBound(postNode, 0.0); + inputQuery.setUpperBound(postNode, FloatUtils::infinity()); + } + } + layer++; + } + } + + // Set constraints for inputs. + for (unsigned i = 0; i < inputConstraints.size(); i++) { + unsigned constraint_index = running_index++; + inputQuery.setUpperBound(constraint_index, 0.0); + Equation input_equation; + input_equation.addAddend(-1., constraint_index); + for (unsigned int j = 0; j < inputConstraints[i].size() - 1; j++) { + input_equation.addAddend( + inputConstraints[i][j], nodeToVars[Index(0, j)]); + } + input_equation.setScalar(-inputConstraints[i].back()); + inputQuery.addEquation(input_equation); + } + + for (unsigned l = 0; l < transition_layers.size(); l++) { + auto &layer = transition_layers[l]; + if (l < (transition_layers.size() - 1)) { + // Set constraints for non-final outputs. + for (unsigned i = 0; i < safeConstraints.size(); i++) { + unsigned constraint_index = running_index++; + inputQuery.setUpperBound(constraint_index, 0.0); + Equation output_equation; + output_equation.addAddend(-1., constraint_index); + for (unsigned int j = 0; j < safeConstraints[i].size() - 1; j++) { + output_equation.addAddend( + safeConstraints[i][j], nodeToVars[Index(layer, j)]); + } + output_equation.setScalar(-safeConstraints[i].back()); + inputQuery.addEquation(output_equation); + } + } else { + // Set constraints for final outputs. + for (unsigned i = 0; i < outputConstraints.size(); i++) { + unsigned constraint_index = running_index++; + inputQuery.setUpperBound(constraint_index, 0.0); + Equation output_equation; + output_equation.addAddend(-1., constraint_index); + for (unsigned int j = 0; j < outputConstraints[i].size() - 1; j++) { + output_equation.addAddend( + outputConstraints[i][j], + nodeToVars[Index(layer, j)]); + } + output_equation.setScalar(-outputConstraints[i].back()); + inputQuery.addEquation(output_equation); + } + } + } + + timespec start = TimeUtils::sampleMicro(); + timespec end; + + try { + Engine engine; + if (engine.processInputQuery(inputQuery) && engine.solve()) { + engine.extractSolution(inputQuery); + + out_file << "SAT"; + printf("Solution found!\n\n"); + } + else { + out_file << "UNS"; + printf("Can't solve!\n"); + } + } catch (ReluplexError &e) { + printf("Error: %d\n", e.getCode()); + } + + end = TimeUtils::sampleMicro(); + + unsigned microPassed = TimeUtils::timePassed(start, end); + unsigned seconds = microPassed / 1000000; + unsigned minutes = seconds / 60; + unsigned hours = minutes / 60; + out_file << "," << (((double)microPassed) / 1000000) << std::endl; + out_file.close(); + + printf("Total run time: %llu micro (%02u:%02u:%02u)\n", + TimeUtils::timePassed(start, end), + hours, + minutes - (hours * 60), + seconds - (minutes * 60)); + + return 0; +} diff --git a/third_party/marabou_model_checking/experiment.sh b/third_party/marabou_model_checking/experiment.sh new file mode 100755 index 0000000..8cbb72e --- /dev/null +++ b/third_party/marabou_model_checking/experiment.sh @@ -0,0 +1,73 @@ +#!/bin/bash + +image_file=$PWD/third_party/marabou_model_checking/marabou_bmc.tgz +specs_tgz=$PWD/third_party/reluplex_model_checking/specs.tgz +builddir=$BUILD_WORKING_DIRECTORY +outdir=$builddir/third_party/marabou_model_checking +local_outdir=$PWD/local_results + +docker load -i $image_file +# First, extract all of the specs (we assume their names). +tar -xzf $specs_tgz +models="pendulum_continuous satelite quadcopter" + +function run_for_model { + model=$1 + # This is the amount of overall-wallclock time (in seconds) to spend per + # spec. NOTE: This should be greater than the desired experiment timeout, + # as there is overhead in starting the scripts. + sh_timeout=$((90*60)) + # https://stackoverflow.com/questions/17066250 + SECONDS=0 + net_file="$model/stepnet.nnet" + result_csv="$local_outdir/$model.csv" + mkdir -p $(dirname $result_csv) + echo "Model,Spec,Steps,Result,NON-CUMULATIVE Time" > $result_csv + for steps in {1..100} + do + echo "Verifying step: $steps" + for i in {0..3} + do + for j in {0..3} + do + echo "NONE" > $local_outdir/bmc_output + spec_file="$model/0"$i"_0"$j"_spec.nspec" + echo "Running for spec: $spec_file" + # Docker file-permissions get a bit ugly, so we want to have a + # clean separation between input/output directories. + docker run --rm -t -i \ + -v $PWD:/ivol:ro \ + -v $local_outdir:/ovol:rw \ + -v /sys/fs/cgroup:/sys/fs/cgroup:rw \ + -w /marabou marabou_bmc \ + runexec --no-container --walltimelimit 172800 --cores 0-15 --memlimit 17179869184 --output /dev/stdout --input - -- \ + ./cpp_interface_example/bmc.elf \ + /ivol/$model/stepnet.nnet /ivol/$spec_file $steps /ovol/bmc_output \ + > /dev/null + cat $local_outdir/bmc_output >> $result_csv + if [ "$SECONDS" -gt "$sh_timeout" ]; then + echo "Timeout. Finishing model..." + return + fi + done + done + done +} + +rm -rf $local_outdir + +for model in $models +do + run_for_model $model +done + +# https://crunchify.com/shell-script-append-timestamp-to-file-name/ +filename="$PWD/results_$(date "+%Y.%m.%d-%H.%M.%S").tgz" + +# https://stackoverflow.com/questions/939982 +cd $local_outdir +tar -zcvf $filename *.csv +rm -rf $local_outdir + +# Move the results to the user-visible location. +cp $filename $outdir