Skip to content

Commit

Permalink
finalize project checkpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
xzhseh committed Oct 25, 2024
1 parent 6cf8431 commit be273cc
Show file tree
Hide file tree
Showing 12 changed files with 152 additions and 107 deletions.
24 changes: 12 additions & 12 deletions CMakeLists_Template
Original file line number Diff line number Diff line change
Expand Up @@ -8,47 +8,47 @@ set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -frtti -fexceptions")

# Find LLVM
# find llvm
set(LLVM_DIR "/path/to/custom/llvm_home/llvm-project/build/lib/cmake/llvm")
find_package(LLVM REQUIRED CONFIG)
message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")

# Include LLVM headers
# include llvm headers
include_directories(${LLVM_INCLUDE_DIRS})
link_directories(${LLVM_LIBRARY_DIRS})

# Find Z3
# find z3
find_package(Z3 REQUIRED)
include_directories(${Z3_INCLUDE_DIRS})
add_definitions(${Z3_DEFINITIONS})

# Set the path to Alive2
# set the path to alive2
set(ALIVE2_DIR "/path/to/alive2")

# Include Alive2 headers
# include alive2 headers
include_directories(${ALIVE2_DIR})

# Add your source files
# add source files
add_executable(translation_validator src/main.cpp)

# Link against LLVM, Z3, and Alive2
# link against llvm, z3, and alive2
target_link_libraries(translation_validator PRIVATE
# note, you may need to change `.dylib` to `.so` or `.a` depending on your OS
# note: you may need to change `.dylib` to `.so` or `.a` depending on your OS
/path/to/custom/llvm_home/llvm-project/build/lib/libLLVMCore.dylib
/path/to/custom/llvm_home/llvm-project/build/lib/libLLVMSupport.dylib
/path/to/custom/llvm_home/llvm-project/build/lib/libLLVMIRReader.dylib
/path/to/custom/llvm_home/llvm-project/build/lib/libLLVMBitReader.dylib
/path/to/custom/llvm_home/llvm-project/build/lib/libLLVMMC.dylib
/path/to/custom/llvm_home/llvm-project/build/lib/libLLVMOption.dylib
/path/to/custom/llvm_home/llvm-project/build/lib/libLLVMAnalysis.dylib # For TargetLibraryInfo
/path/to/custom/llvm_home/llvm-project/build/lib/libLLVMTarget.dylib # For Triple
/path/to/custom/llvm_home/llvm-project/build/lib/libLLVMTargetParser.dylib # For Triple parsing
/path/to/custom/llvm_home/llvm-project/build/lib/libLLVMAnalysis.dylib # for `TargetLibraryInfo`
/path/to/custom/llvm_home/llvm-project/build/lib/libLLVMTarget.dylib # for `Triple`
/path/to/custom/llvm_home/llvm-project/build/lib/libLLVMTargetParser.dylib # for `Triple` parsing

${Z3_LIBRARIES}
${ALIVE2_DIR}/build/libir.a
${ALIVE2_DIR}/build/libsmt.a
${ALIVE2_DIR}/build/libutil.a
${ALIVE2_DIR}/build/libtools.a
${ALIVE2_DIR}/build/libllvm_util.a # For llvm_util::initializer
${ALIVE2_DIR}/build/libllvm_util.a # for `llvm_util::initializer`
)
25 changes: 14 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
## translation validator

### overview
this tool compares the source function in cpp with the target function in rust, and verifies whether the rust function is a correct translation/semantic equivalent of the cpp function using alive2 in llvm ir level.

**ps**. you could find all source files to be verified in the `examples/source` directory.

### prerequisites
- follow the instructions in [alive2](https://github.com/AliveToolkit/alive2) to build and configure alive2, also the specific llvm with RTTI and exceptions turned on.
- I basically build the llvm from source (i.e., the latest main branch) and build the alive2 upon it.
- follow the instructions in [alive2](https://github.com/AliveToolkit/alive2) to build and configure alive2, and also the specific llvm version.
- you should build the llvm from source (i.e., the latest main branch), with RTTI and exceptions turned on.

- based on the provided `CMakeLists_Template`, create your own `CMakeLists.txt` by replacing the placeholder for the paths.
- based on the provided `CMakeLists_Template`, create your own `CMakeLists.txt` by replacing the placeholder for the paths, you may need to change the `.dylib` to `.so` or `.a` depending on your OS.

### build
```bash
bash scripts/build.sh
```
### build and run
through `make build`, `make run`, or `make build_and_run`.

note: a `compile_commands.json` will be automatically generated in the build directory and moved to the root directory, this is generally used by clangd for code navigation, you may need to reload the window to make it work.

### run
```bash
bash scripts/run.sh
```
### generate the ir files
either through `make generate_ir` or `make clean_and_generate_ir`.

**note**: the source file, e.g., `examples/source/add.rs`, will be converted to `add_rs.ll` and put in the `examples/ir/add/add_rs.ll` path, check the `scripts/src2ir.py` for more details.
12 changes: 6 additions & 6 deletions examples/source/access_array/access_array.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
int access_array(int index) {
int access_array(unsigned int index) {
int arr[10] {};

// to preserve the memory layout as rust's
Expand All @@ -13,9 +13,9 @@ int access_array(int index) {
arr[8] = 8;
arr[9] = 9;

if (index < 0 || index >= 10) {
return -1;
}

return arr[index];
// a subtle unsigned integer overflow "feature" in cpp,
// i.e., implicitly wraps around.
// e.g., when considering the input `index` as 0xfffffffc (4294967292, -4),
// this function is more *defined* than the rust (panic) version.
return arr[10 + index];
}
8 changes: 2 additions & 6 deletions examples/source/access_array/access_array.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
pub fn access_array(index: i32) -> i32 {
pub fn access_array(index: u32) -> i32 {
let arr = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];

if index < 0 || index >= 10 {
return -1;
}

arr[index as usize]
arr[10 + index as usize]
}
6 changes: 4 additions & 2 deletions examples/source/div_by_zero/div_by_zero.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
short div(short a, short b) {
return a / b;
/// ub will be detected by alive2, this is also considered semantically
/// different from the rust version.
int div_by_zero(int a, int b) {
return a / (b - b);
}
6 changes: 4 additions & 2 deletions examples/source/div_by_zero/div_by_zero.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
pub fn div(a: i16, b: i16) -> i16 {
a / b
/// the source cpp module is ub, and the function attrs are different
/// so the verifier fails to compare the two even after switching the order.
pub fn div_by_zero(a: i32, b: i32) -> i32 {
a / (b - b)
}
1 change: 1 addition & 0 deletions examples/source/use_uninit/use_uninit.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/// ub, order will be switched during verification.
int use_uninit() {
int arr[2];
arr[1] = 1030;
Expand Down
12 changes: 6 additions & 6 deletions scripts/build.sh
Original file line number Diff line number Diff line change
@@ -1,28 +1,28 @@
#!/bin/bash

# Enable stricter error handling
# enable stricter error handling
set -euo pipefail

# Remove existing build directory and compile_commands.json
# remove existing build directory and compile_commands.json
rm -rf build
rm -f compile_commands.json

# Create new build directory and change to it
# create new build directory and change to it
mkdir build && cd build || exit 1

# Run CMake
# run cmake
if ! cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON ..; then
echo "[build.sh] cmake configuration failed"
exit 1
fi

# Run make
# run make
if ! make; then
echo "[build.sh] build failed"
exit 1
fi

# Move compile_commands.json to root directory
# move `compile_commands.json` to root directory
if [ -f compile_commands.json ]; then
mv compile_commands.json ..
echo "[build.sh] moved \`compile_commands.json\` to root directory"
Expand Down
10 changes: 4 additions & 6 deletions scripts/run.sh
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
#!/bin/bash

# Enable stricter error handling
# enable stricter error handling
set -euo pipefail

# Define the path to your executable
# Replace 'your_executable' with the actual name of your program
EXECUTABLE="./build/translation_validator"

# Check if the executable exists
# check if the executable exists
if [ ! -f "$EXECUTABLE" ]; then
echo "Error: Executable not found. Did you build the project?"
echo "error: executable not found, did you build the project?"
exit 1
fi

# Run the executable
# run the executable
"$EXECUTABLE"
123 changes: 78 additions & 45 deletions src/Comparer.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,10 @@
#include "llvm/Support/CommandLine.h"
#include "llvm_util/compare.h"

#include "Printer.h"

class Comparer {
public:
struct ComparisonResult {
bool success;
std::string cpp_name;
std::string rust_name;
std::string error_message;
};

Comparer(llvm::Module &cpp_module, llvm::Module &rust_module,
llvm::cl::opt<std::string> &cpp_pattern,
llvm::cl::opt<std::string> &rust_pattern,
Expand All @@ -25,63 +20,101 @@ class Comparer {
rust_module_(&rust_module),
cpp_pattern_(cpp_pattern),
rust_pattern_(rust_pattern),
verifier_(verifier) {}
verifier_(verifier),
printer_(std::cout) {}

// Compare all functions and return results
std::vector<ComparisonResult> compare() {
std::vector<ComparisonResult> results;
/// compare the source function in `cpp_module` with the target function
/// in `rust_module`.
/// note: currently only supports one-to-one comparison.
ComparisonResult compare() {
std::vector<llvm::Function *> cpp_funcs {};
std::vector<llvm::Function *> rust_funcs {};

for (auto &cpp_func : *cpp_module_) {
if (should_skip_function(cpp_func)) continue;

auto result = find_and_compare_matching_function(cpp_func);
if (result.has_value()) {
results.push_back(std::move(*result));
if (should_skip_function(cpp_func, cpp_pattern_)) {
continue;
}
cpp_funcs.push_back(&cpp_func);
}
for (auto &rust_func : *rust_module_) {
if (should_skip_function(rust_func, rust_pattern_)) {
continue;
}
rust_funcs.push_back(&rust_func);
}

return results;
}
if (auto cpp_empty = check_empty(cpp_funcs)) {
return *cpp_empty;
}
if (auto rust_empty = check_empty(rust_funcs)) {
return *rust_empty;
}
if (auto cpp_multiple = check_multiple(cpp_funcs)) {
return *cpp_multiple;
}
if (auto rust_multiple = check_multiple(rust_funcs)) {
return *rust_multiple;
}

private:
bool should_skip_function(const llvm::Function &func) const {
return func.isDeclaration() ||
!func.getName().starts_with(cpp_pattern_);
}
auto cpp_func = cpp_funcs[0];
auto rust_func = rust_funcs[0];

std::optional<ComparisonResult> find_and_compare_matching_function(
llvm::Function &cpp_func) {
for (auto &rust_func : *rust_module_) {
if (rust_func.isDeclaration()) continue;
if (!rust_func.getName().starts_with(rust_pattern_)) continue;
bool success { false };
try {
success = verifier_.compareFunctions(*cpp_func, *rust_func);
} catch (const std::exception &e) {
printer_.print_error("comparer", e.what());
return ComparisonResult {
.success = false,
.error_message = e.what()
};
}

ComparisonResult result;
result.cpp_name = cpp_func.getName().str();
result.rust_name = rust_func.getName().str();
return ComparisonResult {
.success = success,
.cpp_name = cpp_func->getName().str(),
.rust_name = rust_func->getName().str(),
.error_message = success ? "" : "functions are not semantically equivalent"
};
}

try {
result.success =
verifier_.compareFunctions(cpp_func, rust_func);
if (!result.success) {
result.error_message = "Functions are not equivalent";
}
return result;
} catch (const std::exception &e) {
result.success = false;
result.error_message = e.what();
return result;
}
private:
/// check if the function should be skipped
auto should_skip_function(const llvm::Function &func,
const std::string &pattern) -> bool const {
return func.isDeclaration() || !func.getName().starts_with(pattern);
}

auto check_empty(const std::vector<llvm::Function *> &funcs)
-> std::optional<ComparisonResult> {
if (funcs.empty()) {
printer_.print_error("comparer", "no functions found");
return ComparisonResult {
.success = false,
.error_message = "no functions found"
};
}
return std::nullopt;
}

return std::nullopt; // No matching Rust function found
auto check_multiple(const std::vector<llvm::Function *> &funcs)
-> std::optional<ComparisonResult> {
if (funcs.size() > 1) {
printer_.print_error("comparer", "multiple functions found");
return ComparisonResult {
.success = false,
.error_message = "multiple functions found"
};
}
return std::nullopt;
}

private:
llvm::Module *cpp_module_;
llvm::Module *rust_module_;
std::string cpp_pattern_;
std::string rust_pattern_;
llvm_util::Verifier &verifier_;
Printer printer_;
};

#endif // COMPARER_H
16 changes: 16 additions & 0 deletions src/Printer.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,34 @@
#define BOLD_BLUE "\033[1;34m"
#define RESET_COLOR "\033[0m"

struct ComparisonResult {
bool success;
std::string cpp_name;
std::string rust_name;
std::string error_message;
};

class Printer {
public:
explicit Printer(std::ostream &os) : os(os) {}

/// print verification summary
void print_summary(const llvm_util::Verifier &verifier,
const ComparisonResult &result,
const std::string &verifier_output = "") const {
if (!verifier_output.empty()) {
os << verifier_output << std::endl;
}

os << BOLD_BLUE << "========================================\n";
os << "COMPARING:\n"
<< " " << result.cpp_name << BOLD_GREEN << " (source)" << BOLD_BLUE
<< " <-> " << result.rust_name << BOLD_GREEN << " (target)"
<< RESET_COLOR << "\n";
if (!result.error_message.empty()) {
os << BOLD_RED << " error: " << result.error_message << "\n";
}
os << BOLD_BLUE;
os << "SUMMARY:\n"
<< " " << BOLD_GREEN << verifier.num_correct << " correct translations\n"
<< " " << BOLD_RED << verifier.num_unsound << " incorrect translations\n"
Expand Down
Loading

0 comments on commit be273cc

Please sign in to comment.