diff --git a/CMakeLists_Template b/CMakeLists_Template index 001384b..3988f8e 100644 --- a/CMakeLists_Template +++ b/CMakeLists_Template @@ -8,47 +8,47 @@ set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -frtti -fexceptions") -# Find LLVM +# find llvm set(LLVM_DIR "/path/to/custom/llvm_home/llvm-project/build/lib/cmake/llvm") find_package(LLVM REQUIRED CONFIG) message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") -# Include LLVM headers +# include llvm headers include_directories(${LLVM_INCLUDE_DIRS}) link_directories(${LLVM_LIBRARY_DIRS}) -# Find Z3 +# find z3 find_package(Z3 REQUIRED) include_directories(${Z3_INCLUDE_DIRS}) add_definitions(${Z3_DEFINITIONS}) -# Set the path to Alive2 +# set the path to alive2 set(ALIVE2_DIR "/path/to/alive2") -# Include Alive2 headers +# include alive2 headers include_directories(${ALIVE2_DIR}) -# Add your source files +# add source files add_executable(translation_validator src/main.cpp) -# Link against LLVM, Z3, and Alive2 +# link against llvm, z3, and alive2 target_link_libraries(translation_validator PRIVATE - # note, you may need to change `.dylib` to `.so` or `.a` depending on your OS + # note: you may need to change `.dylib` to `.so` or `.a` depending on your OS /path/to/custom/llvm_home/llvm-project/build/lib/libLLVMCore.dylib /path/to/custom/llvm_home/llvm-project/build/lib/libLLVMSupport.dylib /path/to/custom/llvm_home/llvm-project/build/lib/libLLVMIRReader.dylib /path/to/custom/llvm_home/llvm-project/build/lib/libLLVMBitReader.dylib /path/to/custom/llvm_home/llvm-project/build/lib/libLLVMMC.dylib /path/to/custom/llvm_home/llvm-project/build/lib/libLLVMOption.dylib - /path/to/custom/llvm_home/llvm-project/build/lib/libLLVMAnalysis.dylib # For TargetLibraryInfo - /path/to/custom/llvm_home/llvm-project/build/lib/libLLVMTarget.dylib # For Triple - /path/to/custom/llvm_home/llvm-project/build/lib/libLLVMTargetParser.dylib # For Triple parsing + /path/to/custom/llvm_home/llvm-project/build/lib/libLLVMAnalysis.dylib # for `TargetLibraryInfo` + /path/to/custom/llvm_home/llvm-project/build/lib/libLLVMTarget.dylib # for `Triple` + /path/to/custom/llvm_home/llvm-project/build/lib/libLLVMTargetParser.dylib # for `Triple` parsing ${Z3_LIBRARIES} ${ALIVE2_DIR}/build/libir.a ${ALIVE2_DIR}/build/libsmt.a ${ALIVE2_DIR}/build/libutil.a ${ALIVE2_DIR}/build/libtools.a - ${ALIVE2_DIR}/build/libllvm_util.a # For llvm_util::initializer + ${ALIVE2_DIR}/build/libllvm_util.a # for `llvm_util::initializer` ) diff --git a/README.md b/README.md index 0830de3..84d5f15 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,22 @@ ## translation validator +### overview +this tool compares the source function in cpp with the target function in rust, and verifies whether the rust function is a correct translation/semantic equivalent of the cpp function using alive2 in llvm ir level. + +**ps**. you could find all source files to be verified in the `examples/source` directory. + ### prerequisites -- follow the instructions in [alive2](https://github.com/AliveToolkit/alive2) to build and configure alive2, also the specific llvm with RTTI and exceptions turned on. - - I basically build the llvm from source (i.e., the latest main branch) and build the alive2 upon it. +- follow the instructions in [alive2](https://github.com/AliveToolkit/alive2) to build and configure alive2, and also the specific llvm version. + - you should build the llvm from source (i.e., the latest main branch), with RTTI and exceptions turned on. -- based on the provided `CMakeLists_Template`, create your own `CMakeLists.txt` by replacing the placeholder for the paths. +- based on the provided `CMakeLists_Template`, create your own `CMakeLists.txt` by replacing the placeholder for the paths, you may need to change the `.dylib` to `.so` or `.a` depending on your OS. -### build -```bash -bash scripts/build.sh -``` +### build and run +through `make build`, `make run`, or `make build_and_run`. note: a `compile_commands.json` will be automatically generated in the build directory and moved to the root directory, this is generally used by clangd for code navigation, you may need to reload the window to make it work. -### run -```bash -bash scripts/run.sh -``` +### generate the ir files +either through `make generate_ir` or `make clean_and_generate_ir`. + +**note**: the source file, e.g., `examples/source/add.rs`, will be converted to `add_rs.ll` and put in the `examples/ir/add/add_rs.ll` path, check the `scripts/src2ir.py` for more details. diff --git a/examples/source/access_array/access_array.cpp b/examples/source/access_array/access_array.cpp index ebb0c2f..796d2cc 100644 --- a/examples/source/access_array/access_array.cpp +++ b/examples/source/access_array/access_array.cpp @@ -1,4 +1,4 @@ -int access_array(int index) { +int access_array(unsigned int index) { int arr[10] {}; // to preserve the memory layout as rust's @@ -13,9 +13,9 @@ int access_array(int index) { arr[8] = 8; arr[9] = 9; - if (index < 0 || index >= 10) { - return -1; - } - - return arr[index]; + // a subtle unsigned integer overflow "feature" in cpp, + // i.e., implicitly wraps around. + // e.g., when considering the input `index` as 0xfffffffc (4294967292, -4), + // this function is more *defined* than the rust (panic) version. + return arr[10 + index]; } diff --git a/examples/source/access_array/access_array.rs b/examples/source/access_array/access_array.rs index a5d8df7..1cc57e8 100644 --- a/examples/source/access_array/access_array.rs +++ b/examples/source/access_array/access_array.rs @@ -1,9 +1,5 @@ -pub fn access_array(index: i32) -> i32 { +pub fn access_array(index: u32) -> i32 { let arr = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; - if index < 0 || index >= 10 { - return -1; - } - - arr[index as usize] + arr[10 + index as usize] } diff --git a/examples/source/div_by_zero/div_by_zero.cpp b/examples/source/div_by_zero/div_by_zero.cpp index 8956613..b1aeae8 100644 --- a/examples/source/div_by_zero/div_by_zero.cpp +++ b/examples/source/div_by_zero/div_by_zero.cpp @@ -1,3 +1,5 @@ -short div(short a, short b) { - return a / b; +/// ub will be detected by alive2, this is also considered semantically +/// different from the rust version. +int div_by_zero(int a, int b) { + return a / (b - b); } diff --git a/examples/source/div_by_zero/div_by_zero.rs b/examples/source/div_by_zero/div_by_zero.rs index 1c1ae72..0284c4d 100644 --- a/examples/source/div_by_zero/div_by_zero.rs +++ b/examples/source/div_by_zero/div_by_zero.rs @@ -1,3 +1,5 @@ -pub fn div(a: i16, b: i16) -> i16 { - a / b +/// the source cpp module is ub, and the function attrs are different +/// so the verifier fails to compare the two even after switching the order. +pub fn div_by_zero(a: i32, b: i32) -> i32 { + a / (b - b) } diff --git a/examples/source/use_uninit/use_uninit.cpp b/examples/source/use_uninit/use_uninit.cpp index 9d5e7b6..765c145 100644 --- a/examples/source/use_uninit/use_uninit.cpp +++ b/examples/source/use_uninit/use_uninit.cpp @@ -1,3 +1,4 @@ +/// ub, order will be switched during verification. int use_uninit() { int arr[2]; arr[1] = 1030; diff --git a/scripts/build.sh b/scripts/build.sh index 25c4646..728b9d9 100644 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -1,28 +1,28 @@ #!/bin/bash -# Enable stricter error handling +# enable stricter error handling set -euo pipefail -# Remove existing build directory and compile_commands.json +# remove existing build directory and compile_commands.json rm -rf build rm -f compile_commands.json -# Create new build directory and change to it +# create new build directory and change to it mkdir build && cd build || exit 1 -# Run CMake +# run cmake if ! cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON ..; then echo "[build.sh] cmake configuration failed" exit 1 fi -# Run make +# run make if ! make; then echo "[build.sh] build failed" exit 1 fi -# Move compile_commands.json to root directory +# move `compile_commands.json` to root directory if [ -f compile_commands.json ]; then mv compile_commands.json .. echo "[build.sh] moved \`compile_commands.json\` to root directory" diff --git a/scripts/run.sh b/scripts/run.sh index 999f8ae..b3c5eae 100644 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -1,17 +1,15 @@ #!/bin/bash -# Enable stricter error handling +# enable stricter error handling set -euo pipefail -# Define the path to your executable -# Replace 'your_executable' with the actual name of your program EXECUTABLE="./build/translation_validator" -# Check if the executable exists +# check if the executable exists if [ ! -f "$EXECUTABLE" ]; then - echo "Error: Executable not found. Did you build the project?" + echo "error: executable not found, did you build the project?" exit 1 fi -# Run the executable +# run the executable "$EXECUTABLE" diff --git a/src/Comparer.h b/src/Comparer.h index 58a8906..d81e123 100644 --- a/src/Comparer.h +++ b/src/Comparer.h @@ -8,15 +8,10 @@ #include "llvm/Support/CommandLine.h" #include "llvm_util/compare.h" +#include "Printer.h" + class Comparer { public: - struct ComparisonResult { - bool success; - std::string cpp_name; - std::string rust_name; - std::string error_message; - }; - Comparer(llvm::Module &cpp_module, llvm::Module &rust_module, llvm::cl::opt &cpp_pattern, llvm::cl::opt &rust_pattern, @@ -25,63 +20,101 @@ class Comparer { rust_module_(&rust_module), cpp_pattern_(cpp_pattern), rust_pattern_(rust_pattern), - verifier_(verifier) {} + verifier_(verifier), + printer_(std::cout) {} - // Compare all functions and return results - std::vector compare() { - std::vector results; + /// compare the source function in `cpp_module` with the target function + /// in `rust_module`. + /// note: currently only supports one-to-one comparison. + ComparisonResult compare() { + std::vector cpp_funcs {}; + std::vector rust_funcs {}; for (auto &cpp_func : *cpp_module_) { - if (should_skip_function(cpp_func)) continue; - - auto result = find_and_compare_matching_function(cpp_func); - if (result.has_value()) { - results.push_back(std::move(*result)); + if (should_skip_function(cpp_func, cpp_pattern_)) { + continue; + } + cpp_funcs.push_back(&cpp_func); + } + for (auto &rust_func : *rust_module_) { + if (should_skip_function(rust_func, rust_pattern_)) { + continue; } + rust_funcs.push_back(&rust_func); } - return results; - } + if (auto cpp_empty = check_empty(cpp_funcs)) { + return *cpp_empty; + } + if (auto rust_empty = check_empty(rust_funcs)) { + return *rust_empty; + } + if (auto cpp_multiple = check_multiple(cpp_funcs)) { + return *cpp_multiple; + } + if (auto rust_multiple = check_multiple(rust_funcs)) { + return *rust_multiple; + } - private: - bool should_skip_function(const llvm::Function &func) const { - return func.isDeclaration() || - !func.getName().starts_with(cpp_pattern_); - } + auto cpp_func = cpp_funcs[0]; + auto rust_func = rust_funcs[0]; - std::optional find_and_compare_matching_function( - llvm::Function &cpp_func) { - for (auto &rust_func : *rust_module_) { - if (rust_func.isDeclaration()) continue; - if (!rust_func.getName().starts_with(rust_pattern_)) continue; + bool success { false }; + try { + success = verifier_.compareFunctions(*cpp_func, *rust_func); + } catch (const std::exception &e) { + printer_.print_error("comparer", e.what()); + return ComparisonResult { + .success = false, + .error_message = e.what() + }; + } - ComparisonResult result; - result.cpp_name = cpp_func.getName().str(); - result.rust_name = rust_func.getName().str(); + return ComparisonResult { + .success = success, + .cpp_name = cpp_func->getName().str(), + .rust_name = rust_func->getName().str(), + .error_message = success ? "" : "functions are not semantically equivalent" + }; + } - try { - result.success = - verifier_.compareFunctions(cpp_func, rust_func); - if (!result.success) { - result.error_message = "Functions are not equivalent"; - } - return result; - } catch (const std::exception &e) { - result.success = false; - result.error_message = e.what(); - return result; - } +private: + /// check if the function should be skipped + auto should_skip_function(const llvm::Function &func, + const std::string &pattern) -> bool const { + return func.isDeclaration() || !func.getName().starts_with(pattern); + } + + auto check_empty(const std::vector &funcs) + -> std::optional { + if (funcs.empty()) { + printer_.print_error("comparer", "no functions found"); + return ComparisonResult { + .success = false, + .error_message = "no functions found" + }; } + return std::nullopt; + } - return std::nullopt; // No matching Rust function found + auto check_multiple(const std::vector &funcs) + -> std::optional { + if (funcs.size() > 1) { + printer_.print_error("comparer", "multiple functions found"); + return ComparisonResult { + .success = false, + .error_message = "multiple functions found" + }; + } + return std::nullopt; } -private: llvm::Module *cpp_module_; llvm::Module *rust_module_; std::string cpp_pattern_; std::string rust_pattern_; llvm_util::Verifier &verifier_; + Printer printer_; }; #endif // COMPARER_H diff --git a/src/Printer.h b/src/Printer.h index b2af261..6f7c27c 100644 --- a/src/Printer.h +++ b/src/Printer.h @@ -11,18 +11,34 @@ #define BOLD_BLUE "\033[1;34m" #define RESET_COLOR "\033[0m" +struct ComparisonResult { + bool success; + std::string cpp_name; + std::string rust_name; + std::string error_message; +}; + class Printer { public: explicit Printer(std::ostream &os) : os(os) {} /// print verification summary void print_summary(const llvm_util::Verifier &verifier, + const ComparisonResult &result, const std::string &verifier_output = "") const { if (!verifier_output.empty()) { os << verifier_output << std::endl; } os << BOLD_BLUE << "========================================\n"; + os << "COMPARING:\n" + << " " << result.cpp_name << BOLD_GREEN << " (source)" << BOLD_BLUE + << " <-> " << result.rust_name << BOLD_GREEN << " (target)" + << RESET_COLOR << "\n"; + if (!result.error_message.empty()) { + os << BOLD_RED << " error: " << result.error_message << "\n"; + } + os << BOLD_BLUE; os << "SUMMARY:\n" << " " << BOLD_GREEN << verifier.num_correct << " correct translations\n" << " " << BOLD_RED << verifier.num_unsound << " incorrect translations\n" diff --git a/src/main.cpp b/src/main.cpp index a51c590..63409e9 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,4 +1,3 @@ -#include #include #include "llvm_util/compare.h" @@ -40,12 +39,7 @@ namespace { /// the patterns (i.e., function prefix) to match cpp and rust functions /// in the provided ir files. -/// note that this could be changed by manually specifying the `-cpp-pattern` -/// and -/// `-rust-pattern` options on the command line when running the translation -/// validator. -/// -/// ps. the default values are `_Z` for cpp and `_ZN` for rust. +/// ps. the values are `_Z` for cpp and `_ZN` for rust. llvm::cl::opt opt_cpp_pattern { "cpp-pattern", llvm::cl::desc("pattern to match cpp functions in ir file"), llvm::cl::init(CPP_MANGLING_PREFIX) @@ -116,7 +110,7 @@ int main(int argc, char *argv[]) { opt_rust_pattern, verifier }; auto results = comparer.compare(); - // check for source undefined behavior + // check for potential source undefined behavior std::string verifier_output { verifier_buffer.str() }; if (verifier_output.find(SRC_UB_PROMPT) != std::string::npos) { printer.print_src_ub_prompt(); @@ -125,15 +119,15 @@ int main(int argc, char *argv[]) { smt_initializer, std::cout }; - // Switch the order of modules + // switch the order of modules Comparer reversed_comparer { *rust_module, *cpp_module, opt_rust_pattern, opt_cpp_pattern, reversed_verifier }; auto reversed_results = reversed_comparer.compare(); - printer.print_summary(reversed_verifier); + printer.print_summary(reversed_verifier, reversed_results); return reversed_verifier.num_errors > 0; } else { - printer.print_summary(verifier, verifier_output); + printer.print_summary(verifier, results, verifier_output); return verifier.num_errors > 0; } }