diff --git a/.codecov.yml b/.codecov.yml index 7695d5ee3..c13b119d1 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -5,7 +5,7 @@ coverage: status: project: default: - target: 64.3% # If auto, it will compare with the last commit. This could be set to an exact number such as 70% or higher. + target: 67.75% # If auto, it will compare with the last commit. This could be set to an exact number such as 70% or higher. threshold: 0.01% # Allow the coverage to drop by 0.01%, and posting a success status. base: auto patch: diff --git a/.github/workflows/github-action.yml b/.github/workflows/github-action.yml index 0ce0b04ad..343017d65 100644 --- a/.github/workflows/github-action.yml +++ b/.github/workflows/github-action.yml @@ -90,11 +90,21 @@ jobs: run: ctest -R double_free -VV - - name: ctest symabs + - name: ctest ae_symabs working-directory: ${{github.workspace}}/Release-build run: ctest -R symabs -VV + - name: ctest ae_assert + working-directory: ${{github.workspace}}/Release-build + run: + ctest -R ae_assert_test -VV + + - name: ctest ae_overflow + working-directory: ${{github.workspace}}/Release-build + run: + ctest -R ae_overflow_test -VV + - name: ctest cfl_tests working-directory: ${{github.workspace}}/Release-build run: @@ -122,8 +132,6 @@ jobs: lcov --remove coverage.info '${{github.workspace}}/llvm-*.obj/*' --output-file coverage.info lcov --remove coverage.info '${{github.workspace}}/svf/include/FastCluster/*' --output-file coverage.info lcov --remove coverage.info '${{github.workspace}}/svf/lib/FastCluster/*' --output-file coverage.info - lcov --remove coverage.info '${{github.workspace}}/svf/include/AbstractExecution/*' --output-file coverage.info - lcov --remove coverage.info '${{github.workspace}}/svf/lib/AbstractExecution/*' --output-file coverage.info - name: upload-coverage if: runner.os == 'Linux' diff --git a/svf-llvm/tools/AE/CMakeLists.txt b/svf-llvm/tools/AE/CMakeLists.txt index 4fa0417da..938e10875 100644 --- a/svf-llvm/tools/AE/CMakeLists.txt +++ b/svf-llvm/tools/AE/CMakeLists.txt @@ -1,2 +1,3 @@ add_llvm_executable(ae ae.cpp) target_link_libraries(ae PUBLIC ${llvm_libs} SvfLLVM) + diff --git a/svf-llvm/tools/AE/ae.cpp b/svf-llvm/tools/AE/ae.cpp index e662e0d7b..14baeb429 100644 --- a/svf-llvm/tools/AE/ae.cpp +++ b/svf-llvm/tools/AE/ae.cpp @@ -1,17 +1,43 @@ +//===- ae.cpp -- Abstract Execution -------------------------------------// // -// Created by Jiawei Ren on 11/28/23. +// SVF: Static Value-Flow Analysis // -#include "AbstractExecution/IntervalExeState.h" -#include "AbstractExecution/RelExeState.h" -#include "AbstractExecution/RelationSolver.h" -#include "SVF-LLVM/LLVMUtil.h" -#include "Util/Z3Expr.h" +// Copyright (C) <2013-2017> +// + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. + +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . +// +//===-----------------------------------------------------------------------===// + +/* + // Abstract Execution + // + // Author: Jiawei Wang, Xiao Cheng, Jiawei Yang, Jiawei Ren, Yulei Sui + */ +#include "SVF-LLVM/SVFIRBuilder.h" +#include "WPA/WPAPass.h" +#include "Util/CommandLine.h" #include "Util/Options.h" -#include + +#include "AE/Svfexe/BufOverflowChecker.h" +#include "AE/Core/RelExeState.h" +#include "AE/Core/RelationSolver.h" using namespace SVF; using namespace SVFUtil; + static Option SYMABS( "symabs", "symbolic abstraction test", @@ -32,7 +58,7 @@ class SymblicAbstractionTest void test_print() { - SVFUtil::outs() << "hello print\n"; + outs() << "hello print\n"; } IntervalESBase RSY_time(IntervalESBase& inv, const Z3Expr& phi, @@ -42,7 +68,7 @@ class SymblicAbstractionTest IntervalESBase resRSY = rs.RSY(inv, phi); auto end_time = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast( - end_time - start_time); + end_time - start_time); outs() << "running time of RSY : " << duration.count() << " microseconds\n"; return resRSY; @@ -54,7 +80,7 @@ class SymblicAbstractionTest IntervalESBase resBilateral = rs.bilateral(inv, phi); auto end_time = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast( - end_time - start_time); + end_time - start_time); outs() << "running time of Bilateral: " << duration.count() << " microseconds\n"; return resBilateral; @@ -66,7 +92,7 @@ class SymblicAbstractionTest IntervalESBase resBS = rs.BS(inv, phi); auto end_time = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast( - end_time - start_time); + end_time - start_time); outs() << "running time of BS : " << duration.count() << " microseconds\n"; return resBS; @@ -74,7 +100,7 @@ class SymblicAbstractionTest void testRelExeState1_1() { - SVFUtil::outs() << sucMsg("\t SUCCESS :") << "test1_1 start\n"; + outs() << sucMsg("\t SUCCESS :") << "test1_1 start\n"; IntervalESBase itv; RelExeState relation; // var0 := [0, 1]; @@ -108,7 +134,7 @@ class SymblicAbstractionTest void testRelExeState1_2() { - SVFUtil::outs() << "test1_2 start\n"; + outs() << "test1_2 start\n"; IntervalESBase itv; RelExeState relation; // var0 := [0, 1]; @@ -143,7 +169,7 @@ class SymblicAbstractionTest void testRelExeState2_1() { - SVFUtil::outs() << "test2_1 start\n"; + outs() << "test2_1 start\n"; IntervalESBase itv; RelExeState relation; // var0 := [0, 10]; @@ -177,15 +203,15 @@ class SymblicAbstractionTest } // ground truth IntervalESBase::VarToValMap intendedRes = {{0, IntervalValue(0, 10)}, - {1, IntervalValue(0, 10)}, - {2, IntervalValue(0, 0)} + {1, IntervalValue(0, 10)}, + {2, IntervalValue(0, 0)} }; assert(IntervalESBase::eqVarToValMap(resBS.getVarToVal(), intendedRes) && "inconsistency occurs"); } void testRelExeState2_2() { - SVFUtil::outs() << "test2_2 start\n"; + outs() << "test2_2 start\n"; IntervalESBase itv; RelExeState relation; // var0 := [0, 100]; @@ -220,15 +246,15 @@ class SymblicAbstractionTest } // ground truth IntervalESBase::VarToValMap intendedRes = {{0, IntervalValue(0, 100)}, - {1, IntervalValue(0, 100)}, - {2, IntervalValue(0, 0)} + {1, IntervalValue(0, 100)}, + {2, IntervalValue(0, 0)} }; assert(IntervalESBase::eqVarToValMap(resBS.getVarToVal(), intendedRes) && "inconsistency occurs"); } void testRelExeState2_3() { - SVFUtil::outs() << "test2_3 start\n"; + outs() << "test2_3 start\n"; IntervalESBase itv; RelExeState relation; // var0 := [0, 1000]; @@ -264,15 +290,15 @@ class SymblicAbstractionTest // ground truth // ground truth IntervalESBase::VarToValMap intendedRes = {{0, IntervalValue(0, 1000)}, - {1, IntervalValue(0, 1000)}, - {2, IntervalValue(0, 0)} + {1, IntervalValue(0, 1000)}, + {2, IntervalValue(0, 0)} }; assert(IntervalESBase::eqVarToValMap(resBS.getVarToVal(), intendedRes) && "inconsistency occurs"); } void testRelExeState2_4() { - SVFUtil::outs() << "test2_4 start\n"; + outs() << "test2_4 start\n"; IntervalESBase itv; RelExeState relation; // var0 := [0, 10000]; @@ -308,15 +334,15 @@ class SymblicAbstractionTest // ground truth // ground truth IntervalESBase::VarToValMap intendedRes = {{0, IntervalValue(0, 10000)}, - {1, IntervalValue(0, 10000)}, - {2, IntervalValue(0, 0)} + {1, IntervalValue(0, 10000)}, + {2, IntervalValue(0, 0)} }; assert(IntervalESBase::eqVarToValMap(resBS.getVarToVal(), intendedRes) && "inconsistency occurs"); } void testRelExeState2_5() { - SVFUtil::outs() << "test2_5 start\n"; + outs() << "test2_5 start\n"; IntervalESBase itv; RelExeState relation; // var0 := [0, 100000]; @@ -352,15 +378,15 @@ class SymblicAbstractionTest // ground truth // ground truth IntervalESBase::VarToValMap intendedRes = {{0, IntervalValue(0, 100000)}, - {1, IntervalValue(0, 100000)}, - {2, IntervalValue(0, 0)} + {1, IntervalValue(0, 100000)}, + {2, IntervalValue(0, 0)} }; assert(IntervalESBase::eqVarToValMap(resBS.getVarToVal(), intendedRes) && "inconsistency occurs"); } void testRelExeState3_1() { - SVFUtil::outs() << "test3_1 start\n"; + outs() << "test3_1 start\n"; IntervalESBase itv; RelExeState relation; // var0 := [1, 10]; @@ -394,15 +420,15 @@ class SymblicAbstractionTest } // ground truth IntervalESBase::VarToValMap intendedRes = {{0, IntervalValue(1, 10)}, - {1, IntervalValue(1, 10)}, - {2, IntervalValue(1, 1)} + {1, IntervalValue(1, 10)}, + {2, IntervalValue(1, 1)} }; assert(IntervalESBase::eqVarToValMap(resBS.getVarToVal(), intendedRes) && "inconsistency occurs"); } void testRelExeState3_2() { - SVFUtil::outs() << "test3_2 start\n"; + outs() << "test3_2 start\n"; IntervalESBase itv; RelExeState relation; // var0 := [1, 1000]; @@ -436,15 +462,15 @@ class SymblicAbstractionTest } // ground truth IntervalESBase::VarToValMap intendedRes = {{0, IntervalValue(1, 1000)}, - {1, IntervalValue(1, 1000)}, - {2, IntervalValue(1, 1)} + {1, IntervalValue(1, 1000)}, + {2, IntervalValue(1, 1)} }; assert(IntervalESBase::eqVarToValMap(resBS.getVarToVal(), intendedRes) && "inconsistency occurs"); } void testRelExeState3_3() { - SVFUtil::outs() << "test3_3 start\n"; + outs() << "test3_3 start\n"; IntervalESBase itv; RelExeState relation; // var0 := [1, 10000]; @@ -478,14 +504,14 @@ class SymblicAbstractionTest } // ground truth IntervalESBase::VarToValMap intendedRes = - Map({{0, IntervalValue(1, 10000)}, - {1, IntervalValue(1, 10000)}, - {2, IntervalValue(1, 1)}}); + Map({{0, IntervalValue(1, 10000)}, + {1, IntervalValue(1, 10000)}, + {2, IntervalValue(1, 1)}}); } void testRelExeState3_4() { - SVFUtil::outs() << "test3_4 start\n"; + outs() << "test3_4 start\n"; IntervalESBase itv; RelExeState relation; // var0 := [1, 100000]; @@ -519,15 +545,15 @@ class SymblicAbstractionTest } // ground truth IntervalESBase::VarToValMap intendedRes = {{0, IntervalValue(1, 100000)}, - {1, IntervalValue(1, 100000)}, - {2, IntervalValue(1, 1)} + {1, IntervalValue(1, 100000)}, + {2, IntervalValue(1, 1)} }; assert(IntervalESBase::eqVarToValMap(resBS.getVarToVal(), intendedRes) && "inconsistency occurs"); } void testRelExeState4_1() { - SVFUtil::outs() << "test4_1 start\n"; + outs() << "test4_1 start\n"; IntervalESBase itv; RelExeState relation; // var0 := [0, 10]; @@ -564,8 +590,8 @@ class SymblicAbstractionTest } // ground truth IntervalESBase::VarToValMap intendedRes = {{0, IntervalValue(0, 10)}, - {1, IntervalValue(0, 10)}, - {2, IntervalValue(IntervalValue::minus_infinity(), IntervalValue::plus_infinity())} + {1, IntervalValue(0, 10)}, + {2, IntervalValue(IntervalValue::minus_infinity(), IntervalValue::plus_infinity())} }; assert(IntervalESBase::eqVarToValMap(resBS.getVarToVal(), intendedRes) && "inconsistency occurs"); } @@ -579,29 +605,64 @@ class SymblicAbstractionTest saTest.testRelExeState2_1(); saTest.testRelExeState2_2(); saTest.testRelExeState2_3(); -// saTest.testRelExeState2_4(); /// 10000 -// saTest.testRelExeState2_5(); /// 100000 + // saTest.testRelExeState2_4(); /// 10000 + // saTest.testRelExeState2_5(); /// 100000 saTest.testRelExeState3_1(); saTest.testRelExeState3_2(); -// saTest.testRelExeState3_3(); /// 10000 -// saTest.testRelExeState3_4(); /// 100000 + // saTest.testRelExeState3_3(); /// 10000 + // saTest.testRelExeState3_4(); /// 100000 outs() << "start top\n"; saTest.testRelExeState4_1(); /// top } }; + int main(int argc, char** argv) { + int arg_num = 0; + int extraArgc = 3; + char **arg_value = new char *[argc + extraArgc]; + for (; arg_num < argc; ++arg_num) { + arg_value[arg_num] = argv[arg_num]; + } + // add extra options + int orgArgNum = arg_num; + arg_value[arg_num++] = (char*) "-model-consts=true"; + arg_value[arg_num++] = (char*) "-model-arrays=true"; + arg_value[arg_num++] = (char*) "-pre-field-sensitive=false"; + assert(arg_num == (orgArgNum + extraArgc) && "more extra arguments? Change the value of extraArgc"); + std::vector moduleNameVec; moduleNameVec = OptionBase::parseOptions( - argc, argv, "Source-Sink Bug Detector", "[options] " - ); + arg_num, arg_value, "Static Symbolic Execution", "[options] " + ); + delete[] arg_value; if (SYMABS()) { SymblicAbstractionTest saTest; saTest.testsValidation(); + return 0; } + + SVFModule *svfModule = LLVMModuleSet::getLLVMModuleSet()->buildSVFModule(moduleNameVec); + SVFIRBuilder builder(svfModule); + SVFIR* pag = builder.build(); + AndersenWaveDiff* ander = AndersenWaveDiff::createAndersenWaveDiff(pag); + PTACallGraph* callgraph = ander->getPTACallGraph(); + builder.updateCallGraph(callgraph); + if (Options::BufferOverflowCheck()) { + BufOverflowChecker ae; + ae.initExtAPI(); + ae.runOnModule(pag); + } else { + AE ae; + ae.initExtAPI(); + ae.runOnModule(pag); + } + + LLVMModuleSet::releaseLLVMModuleSet(); + return 0; -} +} \ No newline at end of file diff --git a/svf-llvm/tools/Example/svf-ex.cpp b/svf-llvm/tools/Example/svf-ex.cpp index b2de6f5b3..8d2473e2b 100644 --- a/svf-llvm/tools/Example/svf-ex.cpp +++ b/svf-llvm/tools/Example/svf-ex.cpp @@ -26,14 +26,13 @@ // Author: Yulei Sui, */ -#include "SVF-LLVM/LLVMUtil.h" -#include "AbstractExecution/SVFIR2ItvExeState.h" +#include "AE/Svfexe/SVFIR2ItvExeState.h" #include "Graphs/SVFG.h" -#include "WPA/Andersen.h" +#include "SVF-LLVM/LLVMUtil.h" #include "SVF-LLVM/SVFIRBuilder.h" #include "Util/CommandLine.h" #include "Util/Options.h" - +#include "WPA/Andersen.h" using namespace std; using namespace SVF; diff --git a/svf/include/AbstractExecution/AbstractValue.h b/svf/include/AE/Core/AbstractValue.h similarity index 98% rename from svf/include/AbstractExecution/AbstractValue.h rename to svf/include/AE/Core/AbstractValue.h index bf9728fcd..fbee21c4e 100644 --- a/svf/include/AbstractExecution/AbstractValue.h +++ b/svf/include/AE/Core/AbstractValue.h @@ -32,7 +32,6 @@ #include #include -#include "SVFIR/SVFValue.h" namespace SVF { diff --git a/svf/include/AbstractExecution/AddressValue.h b/svf/include/AE/Core/AddressValue.h similarity index 95% rename from svf/include/AbstractExecution/AddressValue.h rename to svf/include/AE/Core/AddressValue.h index 613c751be..6fa6e2924 100644 --- a/svf/include/AbstractExecution/AddressValue.h +++ b/svf/include/AE/Core/AddressValue.h @@ -32,9 +32,11 @@ #define AddressMask 0x7f000000 #define FlippedAddressMask (AddressMask^0xffffffff) +// the address of the black hole, getVirtualMemAddress(2); +#define BlackHoleAddr 0x7f000000 + 2; -#include "AbstractExecution/AbstractValue.h" -#include "SVFIR/SVFIR.h" +#include "AE/Core/AbstractValue.h" +#include "Util/GeneralType.h" namespace SVF { @@ -181,7 +183,7 @@ class AddressValue : public AbstractValue inline bool isTop() const override { - return *this == getVirtualMemAddress(PAG::getPAG()->getBlackHoleObj()->getId()); + return *this == BlackHoleAddr; } inline bool isBottom() const override @@ -191,7 +193,7 @@ class AddressValue : public AbstractValue inline void setTop() { - *this = getVirtualMemAddress(PAG::getPAG()->getBlackHoleObj()->getId()); + *this = BlackHoleAddr; } inline void setBottom() diff --git a/svf/include/AbstractExecution/BoundedZ3Expr.h b/svf/include/AE/Core/BoundedZ3Expr.h similarity index 100% rename from svf/include/AbstractExecution/BoundedZ3Expr.h rename to svf/include/AE/Core/BoundedZ3Expr.h diff --git a/svf/include/AbstractExecution/CFBasicBlockGWTO.h b/svf/include/AE/Core/CFBasicBlockGWTO.h similarity index 98% rename from svf/include/AbstractExecution/CFBasicBlockGWTO.h rename to svf/include/AE/Core/CFBasicBlockGWTO.h index 287fe5524..4f864a4c6 100644 --- a/svf/include/AbstractExecution/CFBasicBlockGWTO.h +++ b/svf/include/AE/Core/CFBasicBlockGWTO.h @@ -55,6 +55,8 @@ class CFBasicBlockGWTO : public WTO { } + virtual ~CFBasicBlockGWTO() = default; + inline void forEachSuccessor( const CFBasicBlockNode* node, std::function func) const override diff --git a/svf/include/AbstractExecution/ConsExeState.h b/svf/include/AE/Core/ConsExeState.h similarity index 98% rename from svf/include/AbstractExecution/ConsExeState.h rename to svf/include/AE/Core/ConsExeState.h index 4b45ee660..6b595fe0e 100644 --- a/svf/include/AbstractExecution/ConsExeState.h +++ b/svf/include/AE/Core/ConsExeState.h @@ -26,8 +26,10 @@ #ifndef SVF_CONSEXESTATE_H #define SVF_CONSEXESTATE_H -#include "AbstractExecution/SingleAbsValue.h" -#include "AbstractExecution/ExeState.h" +#include "AE/Core/ExeState.h" +#include "AE/Core/SingleAbsValue.h" + +#define NullptrID 0 namespace SVF { @@ -355,7 +357,7 @@ class ConsExeState final : public ExeState { VarToValMap mp; ConsExeState exeState(mp, SVFUtil::move(mp)); - exeState._varToVal[PAG::getPAG()->getNullPtr()] = -1; + exeState._varToVal[NullptrID] = -1; return SVFUtil::move(exeState); } diff --git a/svf/include/AbstractExecution/ExeState.h b/svf/include/AE/Core/ExeState.h similarity index 98% rename from svf/include/AbstractExecution/ExeState.h rename to svf/include/AE/Core/ExeState.h index 0d2e1b595..accd1b891 100644 --- a/svf/include/AbstractExecution/ExeState.h +++ b/svf/include/AE/Core/ExeState.h @@ -31,9 +31,10 @@ #ifndef Z3_EXAMPLE_EXESTATE_H #define Z3_EXAMPLE_EXESTATE_H -#include "AbstractExecution/AddressValue.h" -#include "AbstractExecution/NumericLiteral.h" +#include "AE/Core/NumericLiteral.h" +#include "AE/Core/AddressValue.h" #include "Util/Z3Expr.h" +#include "Util/SVFUtil.h" namespace SVF { diff --git a/svf/include/AbstractExecution/ICFGWTO.h b/svf/include/AE/Core/ICFGWTO.h similarity index 100% rename from svf/include/AbstractExecution/ICFGWTO.h rename to svf/include/AE/Core/ICFGWTO.h diff --git a/svf/include/AbstractExecution/IntervalExeState.h b/svf/include/AE/Core/IntervalExeState.h similarity index 99% rename from svf/include/AbstractExecution/IntervalExeState.h rename to svf/include/AE/Core/IntervalExeState.h index 8848879a6..6b1be7244 100644 --- a/svf/include/AbstractExecution/IntervalExeState.h +++ b/svf/include/AE/Core/IntervalExeState.h @@ -43,8 +43,8 @@ #ifndef Z3_EXAMPLE_INTERVAL_DOMAIN_H #define Z3_EXAMPLE_INTERVAL_DOMAIN_H -#include "AbstractExecution/ExeState.h" -#include "AbstractExecution/IntervalValue.h" +#include "AE/Core/ExeState.h" +#include "AE/Core/IntervalValue.h" #include "Util/Z3Expr.h" #include diff --git a/svf/include/AbstractExecution/IntervalValue.h b/svf/include/AE/Core/IntervalValue.h similarity index 99% rename from svf/include/AbstractExecution/IntervalValue.h rename to svf/include/AE/Core/IntervalValue.h index d115f7a61..169ac725f 100644 --- a/svf/include/AbstractExecution/IntervalValue.h +++ b/svf/include/AE/Core/IntervalValue.h @@ -31,8 +31,8 @@ #ifndef Z3_EXAMPLE_IntervalValue_H #define Z3_EXAMPLE_IntervalValue_H -#include "AbstractExecution/NumericLiteral.h" -#include "AbstractExecution/AbstractValue.h" +#include "AE/Core/AbstractValue.h" +#include "AE/Core/NumericLiteral.h" namespace SVF { diff --git a/svf/include/AbstractExecution/NumericLiteral.h b/svf/include/AE/Core/NumericLiteral.h similarity index 99% rename from svf/include/AbstractExecution/NumericLiteral.h rename to svf/include/AE/Core/NumericLiteral.h index 87ddde7cd..5dcb3bbcd 100644 --- a/svf/include/AbstractExecution/NumericLiteral.h +++ b/svf/include/AE/Core/NumericLiteral.h @@ -30,10 +30,8 @@ #ifndef Z3_EXAMPLE_Number_H #define Z3_EXAMPLE_Number_H -#include - -#include "SVFIR/SVFType.h" -#include "AbstractExecution/BoundedZ3Expr.h" +#include "Util/GeneralType.h" +#include "AE/Core/BoundedZ3Expr.h" namespace SVF { diff --git a/svf/include/AbstractExecution/RelExeState.h b/svf/include/AE/Core/RelExeState.h similarity index 99% rename from svf/include/AbstractExecution/RelExeState.h rename to svf/include/AE/Core/RelExeState.h index 108ea0b4f..4809f5eff 100644 --- a/svf/include/AbstractExecution/RelExeState.h +++ b/svf/include/AE/Core/RelExeState.h @@ -30,8 +30,8 @@ #ifndef Z3_EXAMPLE_RELEXESTATE_H #define Z3_EXAMPLE_RELEXESTATE_H +#include "AE/Core/AddressValue.h" #include "Util/Z3Expr.h" -#include "AbstractExecution/AddressValue.h" namespace SVF { diff --git a/svf/include/AbstractExecution/RelationSolver.h b/svf/include/AE/Core/RelationSolver.h similarity index 98% rename from svf/include/AbstractExecution/RelationSolver.h rename to svf/include/AE/Core/RelationSolver.h index 33e6b7ae2..de5974fef 100644 --- a/svf/include/AbstractExecution/RelationSolver.h +++ b/svf/include/AE/Core/RelationSolver.h @@ -30,7 +30,7 @@ #ifndef Z3_EXAMPLE_RELATIONSOLVER_H #define Z3_EXAMPLE_RELATIONSOLVER_H -#include "AbstractExecution/IntervalExeState.h" +#include "AE/Core/IntervalExeState.h" #include "Util/Z3Expr.h" namespace SVF diff --git a/svf/include/AbstractExecution/SingleAbsValue.h b/svf/include/AE/Core/SingleAbsValue.h similarity index 99% rename from svf/include/AbstractExecution/SingleAbsValue.h rename to svf/include/AE/Core/SingleAbsValue.h index b85ed2fd3..d2faadd54 100644 --- a/svf/include/AbstractExecution/SingleAbsValue.h +++ b/svf/include/AE/Core/SingleAbsValue.h @@ -5,7 +5,7 @@ #ifndef SVF_SINGLEABSVALUE_H #define SVF_SINGLEABSVALUE_H -#include "AbstractExecution/BoundedZ3Expr.h" +#include "AE/Core/BoundedZ3Expr.h" namespace SVF { diff --git a/svf/include/AbstractExecution/SymState.h b/svf/include/AE/Core/SymState.h similarity index 99% rename from svf/include/AbstractExecution/SymState.h rename to svf/include/AE/Core/SymState.h index 8bef61ee9..cccc41a63 100644 --- a/svf/include/AbstractExecution/SymState.h +++ b/svf/include/AE/Core/SymState.h @@ -27,7 +27,7 @@ #ifndef SVF_SYMSTATE_H #define SVF_SYMSTATE_H -#include "AbstractExecution/ConsExeState.h" +#include "AE/Core/ConsExeState.h" namespace SVF { diff --git a/svf/include/AE/Svfexe/AE.h b/svf/include/AE/Svfexe/AE.h new file mode 100644 index 000000000..c9348c768 --- /dev/null +++ b/svf/include/AE/Svfexe/AE.h @@ -0,0 +1,411 @@ +//===- AE.cpp -- Abstract Execution---------------------------------// +// +// SVF: Static Value-Flow Analysis +// +// Copyright (C) <2013-> +// + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. + +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . +// +//===----------------------------------------------------------------------===// + + +// +// Created by Jiawei Wang on 2024/1/10. +// +#include +#include +#include "AE/Svfexe/SVFIR2ItvExeState.h" +#include "Util/WorkList.h" +#include "MSSA/SVFGBuilder.h" +#include "AE/Core/CFBasicBlockGWTO.h" +#include "WPA/Andersen.h" +#include "Util/SVFBugReport.h" + +namespace SVF { +class AE; +class AEStat; +class AEAPI; + + +enum class AEKind { + AE, + BufOverflowChecker +}; + +/// AEStat: Statistic for AE +class AEStat : public SVFStat { +public: + void countStateSize(); + AEStat(AE *ae): _ae(ae) { + startTime = getClk(true); + } + ~AEStat() { + } + inline std::string getMemUsage() { + u32_t vmrss, vmsize; + return SVFUtil::getMemoryUsageKB(&vmrss, &vmsize) ? std::to_string(vmsize) + "KB" : "cannot read memory usage"; + } + + void finializeStat(); + void performStat() override; + void reportBug(); + +public: + AE *_ae; + s32_t count{0}; + std::string memory_usage; + std::string memUsage; + std::string bugStr; + + + u32_t& getFunctionTrace() { + if (generalNumMap.count("Function_Trace") == 0) { + generalNumMap["Function_Trace"] = 0; + } + return generalNumMap["Function_Trace"]; + } + u32_t& getBlockTrace() { + if (generalNumMap.count("Block_Trace") == 0) { + generalNumMap["Block_Trace"] = 0; + } + return generalNumMap["Block_Trace"]; + } + u32_t& getICFGNodeTrace() { + if (generalNumMap.count("ICFG_Node_Trace") == 0) { + generalNumMap["ICFG_Node_Trace"] = 0; + } + return generalNumMap["ICFG_Node_Trace"]; + } + +}; + +// AE: Abstract Execution +class AE { + friend class AEStat; + friend class AEAPI; + +public: + typedef SCCDetection CallGraphSCC; + /// Constructor + AE(); + + virtual void initExtAPI(); + + virtual void runOnModule(SVFIR* svfModule); + + + /// Destructor + virtual ~AE(); + + /// Program entry + void analyse(); + + static bool classof(const AE* ae) { + return ae->getKind() == AEKind::AE; + } + + AEKind getKind() const { return _kind; } + +protected: + /// Global ICFGNode is handled at the entry of the program, + virtual void handleGlobalNode(); + + /// mark recursive functions by detecting SCC in callgraph + void markRecursiveFuns(); + + /** + * Check if execution state exist by merging states of predecessor blocks + * + * @param block The basic block to analyse + * @return if this block has preceding execution state + */ + bool hasInEdgesES(const CFBasicBlockNode *block); + + /** + * Check if execution state exist at the branch edge + * + * @param intraEdge the edge from CmpStmt to the next Block + * @return if this edge is feasible + */ + bool hasBranchES(const IntraCFGEdge* intraEdge, IntervalExeState& es); + + /** + * handle instructions in svf basic blocks + * + * @param block basic block that has a series of instructions + */ + void handleBlock(const CFBasicBlockNode *block); + + /** + * handle one instruction in svf basic blocks + * + * @param node ICFGNode which has a single instruction + */ + virtual void handleICFGNode(const ICFGNode *node); + + /** + * handle call node in svf basic blocks + * + * @param node ICFGNode which has a single CallICFGNode + */ + virtual void handleCallSite(const ICFGNode* node); + + /** + * handle wto cycle (loop) + * + * @param cycle WTOCycle which has weak topo order of basic blocks and nested cycles + */ + virtual void handleCycle(const CFBasicBlockGWTOCycle *cycle); + + /** + * handle user defined function, ext function is not included. + * + * @param func SVFFunction which has a series of basic blocks + */ + virtual void handleFunc(const SVFFunction *func); + + /** + * handle SVF Statement like CmpStmt, CallStmt, GepStmt, LoadStmt, StoreStmt, etc. + * + * @param stmt SVFStatement which is a value flow of instruction + */ + virtual void handleSVFStatement(const SVFStmt *stmt); + + /** + * Check if this callnode is recursive call and skip it. + * + * @param callnode CallICFGNode which calls a recursive function + */ + virtual void SkipRecursiveCall(const CallICFGNode *callnode); + + /** + * Check if this function is recursive function and skip it. + * + * @param func SVFFunction is a recursive function + */ + virtual void SkipRecursiveFunc(const SVFFunction *func); + + /** + * Check if this cmpStmt and succ are satisfiable to the execution state. + * + * @param cmpStmt CmpStmt is a conditional branch statement + * @param succ the value of cmpStmt (True or False) + * @return if this block has preceding execution state + */ + bool hasCmpBranchES(const CmpStmt* cmpStmt, s64_t succ, IntervalExeState& es); + + /** + * Check if this SwitchInst and succ are satisfiable to the execution state. + * + * @param var var in switch inst + * @param succ the case value of switch inst + * @return if this block has preceding execution state + */ + bool hasSwitchBranchES(const SVFVar* var, s64_t succ, IntervalExeState& es); + + /// protected data members, also used in subclasses + SVFIR* _svfir; + PTACallGraph* _callgraph; + /// Execution State, used to store the Interval Value of every SVF variable + SVFIR2ItvExeState* _svfir2ExeState; + AEAPI* _api{nullptr}; + + ICFG* _icfg; + AEStat* _stat; + AEKind _kind; + + Set _bugLoc; + SVFBugReport _recoder; + std::vector _callSiteStack; + Map _nodeToBugInfo; + +private: + // helper functions in handleCallSite + bool isExtCall(const CallICFGNode* callNode); + void extCallPass(const CallICFGNode* callNode); + bool isRecursiveCall(const CallICFGNode* callNode); + void recursiveCallPass(const CallICFGNode* callNode); + bool isDirectCall(const CallICFGNode* callNode); + void directCallFunPass(const CallICFGNode* callNode); + bool isIndirectCall(const CallICFGNode* callNode); + void indirectCallFunPass(const CallICFGNode* callNode); + + // helper functions in hasInEdgesES + bool isFunEntry(const CFBasicBlockNode* block); + bool isGlobalEntry(const CFBasicBlockNode* block); + + // helper functions in handleCycle + bool widenFixpointPass(const CFBasicBlockNode* cycle_head, IntervalExeState& pre_es); + bool narrowFixpointPass(const CFBasicBlockNode* cycle_head, IntervalExeState& pre_es); + + // private data + CFBasicBlockGraph* _CFBlockG; + AndersenWaveDiff *_ander; + Map _preES; + Map _postES; + Map _funcToWTO; + Set _recursiveFuns; + std::string _moduleName; + +}; + +class AEAPI { +public: + + typedef ExeState::Addrs Addrs; + enum ExtAPIType { UNCLASSIFIED, MEMCPY, MEMSET, STRCPY, STRCAT }; + static bool classof(const AEAPI* api) { + return api->getKind() == AEKind::AE; + } + + /** + * Constructor of AEAPI + * + * @param ae Abstract Execution or its subclass + * @param stat AEStat + */ + AEAPI(AE* ae, AEStat* stat): _ae(ae), _stat(stat) + { + initExtFunMap(); + _kind = AEKind::AE; + } + + virtual ~AEAPI() {} + + void setModule(SVFIR* svfModule) { + _svfir = svfModule; + } + + AEKind getKind() const { return _kind; } + + /** + * handle external function call + * + * @param call call node whose callee is external function + */ + virtual void handleExtAPI(const CallICFGNode *call); + + /** + * the map of external function to its API type + * + * In AEAPI, this function is mainly used for abstract explanation. + * In subclasses, this function is mainly used to check specific bugs + */ + virtual void initExtFunMap(); + + /** + * get byte size of alloca inst + * + * @param addr Address Stmt like malloc/calloc/ALLOCA/StackAlloc + * @return the byte size e.g. int32_t a[10] -> return 40 + */ + u32_t getAllocaInstByteSize(const AddrStmt *addr); + + /** + * get byte size of alloca inst + * e.g. source code str = "abc", there are str value, return "abc" + * + * @param rhs SVFValue of string + * @return the string + */ + std::string strRead(const SVFValue* rhs); + + /** + * get length of string + * e.g. source code str = "abc", return 3 + * + * @param strValue SVFValue of string + * @return IntervalValue of string length + */ + IntervalValue getStrlen(const SVF::SVFValue *strValue); + + /** + * get memory allocation size + * e.g arr = new int[10] + * .... + * memset(arr, 1, 10* sizeof(int)) + * when we trace the 'arr', we can get the alloc size [40, 40] + * @param value to be traced + * @return IntervalValue of allocation size + */ + IntervalValue traceMemoryAllocationSize(const SVFValue *value); + /** + * execute strcpy in abstract execution + * e.g arr = new char[10] + * str = "abc" + * strcpy(arr, str) + * we can set arr[0]='a', arr[1]='b', arr[2]='c', arr[3]='\0' + * @param call callnode of strcpy like api + */ + virtual void handleStrcpy(const CallICFGNode *call); + /** + * execute strcpy in abstract execution + * e.g arr[10] = "abc" + * str = "de" + * strcat(arr, str) + * we can set arr[3]='d', arr[4]='e', arr[5]='\0' + * @param call callnode of strcat like api + */ + virtual void handleStrcat(const CallICFGNode *call); + /** + * execute memcpy in abstract execution + * e.g arr = new char[10] + * str = "abcd" + * memcpy(arr, str, 5) + * we can set arr[3]='d', arr[4]='e', arr[5]='\0' + * @param call callnode of memcpy like api + */ + virtual void handleMemcpy(const SVFValue* dst, const SVFValue* src, IntervalValue len, u32_t start_idx); + /** + * execute memset in abstract execution + * e.g arr = new char[10] + * memset(arr, 'c', 2) + * we can set arr[0]='c', arr[1]='c', arr[2]='\0' + * @param call callnode of memset like api + */ + virtual void handleMemset(const SVFValue* dst, IntervalValue elem, IntervalValue len); + + /** + * if this NodeID in SVFIR is a pointer, get the pointee type + * e.g arr = (int*) malloc(10*sizeof(int)) + * getPointeeType(arr) -> return int + * we can set arr[0]='c', arr[1]='c', arr[2]='\0' + * @param call callnode of memset like api + */ + const SVFType* getPointeeElement(NodeID id); + + void collectCheckPoint(); + void checkPointAllSet(); + +protected: + // helper functions for traceMemoryAllocationSize and canSafelyAccessMemory + void AccessMemoryViaRetNode(const CallICFGNode *callnode, SVF::FILOWorkList& worklist, Set& visited); + void AccessMemoryViaCopyStmt(const CopyStmt *copy, SVF::FILOWorkList& worklist, Set& visited); + void AccessMemoryViaLoadStmt(const LoadStmt *load, SVF::FILOWorkList& worklist, Set& visited); + void AccessMemoryViaCallArgs(const SVF::SVFArgument *arg, SVF::FILOWorkList& worklist, Set& visited); + + +protected: + AE* _ae; + AEStat* _stat; + SVFIR* _svfir; + AEKind _kind; + + Map> _func_map; + + Set _checkpoints; + Set _checkpoint_names; +}; +} \ No newline at end of file diff --git a/svf/include/AE/Svfexe/BufOverflowChecker.h b/svf/include/AE/Svfexe/BufOverflowChecker.h new file mode 100644 index 000000000..b839b4632 --- /dev/null +++ b/svf/include/AE/Svfexe/BufOverflowChecker.h @@ -0,0 +1,203 @@ +//===- BufOverflowChecker.cpp -- BufOVerflowChecker Client for Abstract Execution---// +// +// SVF: Static Value-Flow Analysis +// +// Copyright (C) <2013-> +// + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. + +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . +// +//===----------------------------------------------------------------------===// + + +// +// Created by Jiawei Wang on 2024/1/12. +// + +#include "AE.h" + +namespace SVF { + +struct BufOverflowException: public std::exception { +public: + BufOverflowException(std::string msg, u32_t allocLb, + u32_t allocUb, u32_t accessLb, u32_t accessUb, const SVFValue* allocVal) : + _msg(msg), _allocLb(allocLb), _allocUb(allocUb), + _accessLb(accessLb), _accessUb(accessUb), _allocVar(allocVal) { + } + + u32_t getAllocLb() const { + return _allocLb; + } + + void setAllocLb(u32_t allocLb) { + _allocLb = allocLb; + } + + u32_t getAllocUb() const { + return _allocUb; + } + + void setAllocUb(u32_t allocUb) { + _allocUb = allocUb; + } + + u32_t getAccessLb() const { + return _accessLb; + } + + void setAccessLb(u32_t accessLb) { + _accessLb = accessLb; + } + + u32_t getAccessUb() const { + return _accessUb; + } + + void setAccessUb(u32_t accessUb) { + _accessUb = accessUb; + } + + const SVFValue* getAllocVar() const { + return _allocVar; + } + + const char* what() const noexcept override { + return _msg.c_str(); + } + + +protected: + std::string _msg; + u32_t _allocLb, _allocUb, _accessLb, _accessUb; + const SVFValue* _allocVar; +}; + +class BufOverflowCheckerAPI: public AEAPI { +public: + BufOverflowCheckerAPI() = delete; + BufOverflowCheckerAPI(AE * ae, AEStat * stat): AEAPI(ae, stat) { + initExtFunMap(); + initExtAPIBufOverflowCheckRules(); + _kind = AEKind::BufOverflowChecker; + } + static bool classof(const AEAPI* api) { + return api->getKind() == AEKind::BufOverflowChecker; + } + + /** + * the map of external function to its API type + * + * it initialize the ext apis about buffer overflow checking + */ + virtual void initExtFunMap(); + + /** + * the map of ext apis of buffer overflow checking rules + * + * it initialize the rules of extapis about buffer overflow checking + * e.g. memcpy(dst, src, sz) -> we check allocSize(dst)>=sz and allocSize(src)>=sz + */ + void initExtAPIBufOverflowCheckRules(); + + /** + * handle external function call regarding buffer overflow checking + * e.g. memcpy(dst, src, sz) -> we check allocSize(dst)>=sz and allocSize(src)>=sz + * + * @param call call node whose callee is external function + */ + void handleExtAPI(const CallICFGNode *call) ; + /** + * detect buffer overflow from strcpy like apis + * e.g. strcpy(dst, src), if dst is shorter than src, we will throw buffer overflow + * + * @param call call node whose callee is strcpy-like external function + * @return true if the buffer overflow is detected + */ + bool detectStrcpy(const CallICFGNode *call); + /** + * detect buffer overflow from strcat like apis + * e.g. strcat(dst, src), if dst is shorter than src, we will throw buffer overflow + * + * @param call call node whose callee is strcpy-like external function + * @return true if the buffer overflow is detected + */ + bool detectStrcat(const CallICFGNode *call); + + /** + * detect buffer overflow by giving a var and a length + * e.g. int x[10]; x[10] = 1; + * we call canSafelyAccessMemory(x, 11 * sizeof(int)); + * + * @param value the value of the buffer overflow checkpoint + * @param len the length of the buffer overflow checkpoint + * @return true if the buffer overflow is detected + */ + bool canSafelyAccessMemory(const SVFValue *value, const IntervalValue &len, const ICFGNode *curNode); + + + Map _addrToGep; + Map>> _extAPIBufOverflowCheckRules; +}; + +class BufOverflowChecker: public AE { + friend BufOverflowCheckerAPI; + +public: + BufOverflowChecker() : AE() { + _kind = AEKind::BufOverflowChecker; + } + + static bool classof(const AE* ae) { + return ae->getKind() == AEKind::BufOverflowChecker; + } + + void initExtAPI() override { + _api = new BufOverflowCheckerAPI(this, _stat); + } + +private: + /** + * handle SVF statement regarding buffer overflow checking + * + * @param stmt SVF statement + */ + virtual void handleSVFStatement(const SVFStmt *stmt) override; + + /** + * handle ICFGNode regarding buffer overflow checking + * + * @param node ICFGNode + */ + virtual void handleICFGNode(const SVF::ICFGNode *node) override; + + /** + * check buffer overflow at ICFGNode which is a checkpoint + * + * @param node ICFGNode + * @return true if the buffer overflow is detected + */ + bool detectBufOverflow(const ICFGNode *node); + + /** + * add buffer overflow bug to recoder + * + * @param e the exception that is thrown by BufOverflowChecker + * @param node ICFGNode that causes the exception + */ + void addBugToRecoder(const BufOverflowException& e, const ICFGNode* node); + + +}; +} \ No newline at end of file diff --git a/svf/include/AbstractExecution/SVFIR2ConsExeState.h b/svf/include/AE/Svfexe/SVFIR2ConsExeState.h similarity index 98% rename from svf/include/AbstractExecution/SVFIR2ConsExeState.h rename to svf/include/AE/Svfexe/SVFIR2ConsExeState.h index 18e745c0f..93fb433a1 100644 --- a/svf/include/AbstractExecution/SVFIR2ConsExeState.h +++ b/svf/include/AE/Svfexe/SVFIR2ConsExeState.h @@ -27,7 +27,7 @@ #ifndef SVF_SVFIR2CONSEXESTATE_H #define SVF_SVFIR2CONSEXESTATE_H -#include "AbstractExecution/ConsExeState.h" +#include "AE/Core/ConsExeState.h" #include "SVFIR/SVFIR.h" namespace SVF diff --git a/svf/include/AbstractExecution/SVFIR2ItvExeState.h b/svf/include/AE/Svfexe/SVFIR2ItvExeState.h similarity index 96% rename from svf/include/AbstractExecution/SVFIR2ItvExeState.h rename to svf/include/AE/Svfexe/SVFIR2ItvExeState.h index b40270d54..8fd173303 100644 --- a/svf/include/AbstractExecution/SVFIR2ItvExeState.h +++ b/svf/include/AE/Svfexe/SVFIR2ItvExeState.h @@ -30,11 +30,11 @@ #ifndef Z3_EXAMPLE_SVFIR2ITVEXESTATE_H #define Z3_EXAMPLE_SVFIR2ITVEXESTATE_H +#include "AE/Core/ExeState.h" +#include "AE/Core/IntervalExeState.h" +#include "AE/Core/IntervalValue.h" +#include "AE/Core/RelExeState.h" #include "SVFIR/SVFIR.h" -#include "AbstractExecution/ExeState.h" -#include "AbstractExecution/IntervalExeState.h" -#include "AbstractExecution/IntervalValue.h" -#include "AbstractExecution/RelExeState.h" namespace SVF { diff --git a/svf/include/Graphs/CFBasicBlockG.h b/svf/include/Graphs/CFBasicBlockG.h index ee4f237c2..a48593ef5 100644 --- a/svf/include/Graphs/CFBasicBlockG.h +++ b/svf/include/Graphs/CFBasicBlockG.h @@ -130,6 +130,11 @@ class CFBasicBlockNode : public GenericCFBasicBlockNodeTy { } + virtual ~CFBasicBlockNode() { + for (auto edge : OutEdges) + delete edge; + } + friend std::ostream &operator<<(std::ostream &o, const CFBasicBlockNode &node) { o << node.toString(); diff --git a/svf/include/SVFIR/SVFType.h b/svf/include/SVFIR/SVFType.h index d00f342ff..8ed64aced 100644 --- a/svf/include/SVFIR/SVFType.h +++ b/svf/include/SVFIR/SVFType.h @@ -31,100 +31,14 @@ #define INCLUDE_SVFIR_SVFTYPE_H_ #include "Util/SparseBitVector.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "Util/GeneralType.h" + namespace SVF { class SVFType; class SVFPointerType; -typedef std::ostream OutStream; -typedef unsigned u32_t; -typedef signed s32_t; -typedef unsigned long long u64_t; -typedef signed long long s64_t; - -typedef u32_t NodeID; -typedef u32_t EdgeID; -typedef unsigned SymID; -typedef unsigned CallSiteID; -typedef unsigned ThreadID; -typedef s64_t APOffset; - -typedef SparseBitVector<> NodeBS; -typedef unsigned PointsToID; - -/// provide extra hash function for std::pair handling -template struct Hash; - -template struct Hash> -{ - // Pairing function from: http://szudzik.com/ElegantPairing.pdf - static size_t szudzik(size_t a, size_t b) - { - return a > b ? b * b + a : a * a + a + b; - } - - size_t operator()(const std::pair& t) const - { - Hash first; - Hash second; - return szudzik(first(t.first), second(t.second)); - } -}; - -template struct Hash -{ - size_t operator()(const T& t) const - { - std::hash h; - return h(t); - } -}; - -template , - typename KeyEqual = std::equal_to, - typename Allocator = std::allocator > -using Set = std::unordered_set; - -template , - typename KeyEqual = std::equal_to, - typename Allocator = std::allocator > > -using Map = std::unordered_map; - -template , - typename Allocator = std::allocator > -using OrderedSet = std::set; - -template , - typename Allocator = std::allocator > > -using OrderedMap = std::map; - -typedef std::pair NodePair; -typedef OrderedSet OrderedNodeSet; -typedef Set NodeSet; -typedef Set NodePairSet; -typedef Map NodePairMap; -typedef std::vector NodeVector; -typedef std::vector EdgeVector; -typedef std::stack NodeStack; -typedef std::list NodeList; -typedef std::deque NodeDeque; -typedef NodeSet EdgeSet; -typedef std::vector CallStrCxt; -typedef unsigned Version; -typedef Set VersionSet; -typedef std::pair VersionedVar; -typedef Set VersionedVarSet; /*! * Flattened type information of StructType, ArrayType and SingleValueType diff --git a/svf/include/Util/CFBasicBlockGBuilder.h b/svf/include/Util/CFBasicBlockGBuilder.h index b28204c52..d93c598ae 100644 --- a/svf/include/Util/CFBasicBlockGBuilder.h +++ b/svf/include/Util/CFBasicBlockGBuilder.h @@ -41,6 +41,10 @@ class CFBasicBlockGBuilder public: CFBasicBlockGBuilder() : _CFBasicBlockG() {} + ~CFBasicBlockGBuilder() { + delete _CFBasicBlockG; + } + virtual void build(ICFG* icfg); inline CFBasicBlockGraph* getCFBasicBlockGraph() diff --git a/svf/include/Util/GeneralType.h b/svf/include/Util/GeneralType.h new file mode 100644 index 000000000..ba5a1a81b --- /dev/null +++ b/svf/include/Util/GeneralType.h @@ -0,0 +1,123 @@ +//===- GeneralType.h -- Primitive types used in SVF--------------------------// +// +// SVF: Static Value-Flow Analysis +// +// Copyright (C) <2013-> +// + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. + +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . +// +//===----------------------------------------------------------------------===// + +/* + * GeneralType.h + * + * Created on: Feb 8, 2024 + * Author: Jiawei Wang + */ + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "Util/SparseBitVector.h" + +namespace SVF +{ +typedef std::ostream OutStream; +typedef unsigned u32_t; +typedef signed s32_t; +typedef unsigned long long u64_t; +typedef signed long long s64_t; + +typedef u32_t NodeID; +typedef u32_t EdgeID; +typedef unsigned SymID; +typedef unsigned CallSiteID; +typedef unsigned ThreadID; +typedef s64_t APOffset; + +typedef SparseBitVector<> NodeBS; +typedef unsigned PointsToID; + +/// provide extra hash function for std::pair handling +template struct Hash; + +template struct Hash> +{ + // Pairing function from: http://szudzik.com/ElegantPairing.pdf + static size_t szudzik(size_t a, size_t b) + { + return a > b ? b * b + a : a * a + a + b; + } + + size_t operator()(const std::pair& t) const + { + Hash first; + Hash second; + return szudzik(first(t.first), second(t.second)); + } +}; + +template struct Hash +{ + size_t operator()(const T& t) const + { + std::hash h; + return h(t); + } +}; + +template , + typename KeyEqual = std::equal_to, + typename Allocator = std::allocator> +using Set = std::unordered_set; + +template , + typename KeyEqual = std::equal_to, + typename Allocator = std::allocator>> +using Map = std::unordered_map; + +template , + typename Allocator = std::allocator> +using OrderedSet = std::set; + +template , + typename Allocator = std::allocator>> +using OrderedMap = std::map; + +typedef std::pair NodePair; +typedef OrderedSet OrderedNodeSet; +typedef Set NodeSet; +typedef Set NodePairSet; +typedef Map NodePairMap; +typedef std::vector NodeVector; +typedef std::vector EdgeVector; +typedef std::stack NodeStack; +typedef std::list NodeList; +typedef std::deque NodeDeque; +typedef NodeSet EdgeSet; +typedef std::vector CallStrCxt; +typedef unsigned Version; +typedef Set VersionSet; +typedef std::pair VersionedVar; +typedef Set VersionedVarSet; +} \ No newline at end of file diff --git a/svf/include/Util/Options.h b/svf/include/Util/Options.h index 8bb194ed7..fe1be2d7c 100644 --- a/svf/include/Util/Options.h +++ b/svf/include/Util/Options.h @@ -261,6 +261,17 @@ class Options static const Option LoopAnalysis; static const Option LoopBound; + // Abstract Execution + static const Option WidenDelay; + /// the max time consumptions (seconds). Default: 4 hours 14400s + static const Option Timeout; + /// bug info output file, Default: output.db + static const Option OutputName; + /// open buffer overflow checker, Default: false + static const Option BufferOverflowCheck; + /// if the access index of gepstmt is unknown, skip it, Default: false + static const Option GepUnknownIdx; + static const Option RunUncallFuncs; }; } // namespace SVF diff --git a/svf/lib/AbstractExecution/BoundedZ3Expr.cpp b/svf/lib/AE/Core/BoundedZ3Expr.cpp similarity index 97% rename from svf/lib/AbstractExecution/BoundedZ3Expr.cpp rename to svf/lib/AE/Core/BoundedZ3Expr.cpp index d81a9f852..b1e578ff2 100644 --- a/svf/lib/AbstractExecution/BoundedZ3Expr.cpp +++ b/svf/lib/AE/Core/BoundedZ3Expr.cpp @@ -26,7 +26,7 @@ * Author: Xiao Cheng * */ -#include "AbstractExecution/BoundedZ3Expr.h" +#include "AE/Core/BoundedZ3Expr.h" #include "Util/Options.h" using namespace SVF; diff --git a/svf/lib/AbstractExecution/ConsExeState.cpp b/svf/lib/AE/Core/ConsExeState.cpp similarity index 99% rename from svf/lib/AbstractExecution/ConsExeState.cpp rename to svf/lib/AE/Core/ConsExeState.cpp index 89cbd119b..677a45cf5 100644 --- a/svf/lib/AbstractExecution/ConsExeState.cpp +++ b/svf/lib/AE/Core/ConsExeState.cpp @@ -24,12 +24,9 @@ // Created by jiawei and xiao on 6/1/23. // - - -#include "AbstractExecution/ConsExeState.h" -#include +#include "AE/Core/ConsExeState.h" #include "Util/Options.h" - +#include using namespace SVF; using namespace SVFUtil; diff --git a/svf/lib/AbstractExecution/ExeState.cpp b/svf/lib/AE/Core/ExeState.cpp similarity index 97% rename from svf/lib/AbstractExecution/ExeState.cpp rename to svf/lib/AE/Core/ExeState.cpp index f1fe7766d..b74ed1149 100644 --- a/svf/lib/AbstractExecution/ExeState.cpp +++ b/svf/lib/AE/Core/ExeState.cpp @@ -27,8 +27,8 @@ * */ -#include "AbstractExecution/ExeState.h" -#include "AbstractExecution/IntervalExeState.h" +#include "AE/Core/ExeState.h" +#include "AE/Core/IntervalExeState.h" using namespace SVF; diff --git a/svf/lib/AbstractExecution/IntervalExeState.cpp b/svf/lib/AE/Core/IntervalExeState.cpp similarity index 99% rename from svf/lib/AbstractExecution/IntervalExeState.cpp rename to svf/lib/AE/Core/IntervalExeState.cpp index 023e530da..eb86eb54c 100644 --- a/svf/lib/AbstractExecution/IntervalExeState.cpp +++ b/svf/lib/AE/Core/IntervalExeState.cpp @@ -27,7 +27,7 @@ * */ -#include "AbstractExecution/IntervalExeState.h" +#include "AE/Core/IntervalExeState.h" #include "Util/SVFUtil.h" using namespace SVF; diff --git a/svf/lib/AbstractExecution/RelExeState.cpp b/svf/lib/AE/Core/RelExeState.cpp similarity index 98% rename from svf/lib/AbstractExecution/RelExeState.cpp rename to svf/lib/AE/Core/RelExeState.cpp index 7950903eb..a780cc7c4 100644 --- a/svf/lib/AbstractExecution/RelExeState.cpp +++ b/svf/lib/AE/Core/RelExeState.cpp @@ -27,8 +27,9 @@ * */ -#include "AbstractExecution/RelExeState.h" -#include "SVFIR/SVFIR.h" +#include "AE/Core/RelExeState.h" +#include "Util/GeneralType.h" +#include "Util/SVFUtil.h" #include using namespace SVF; diff --git a/svf/lib/AbstractExecution/RelationSolver.cpp b/svf/lib/AE/Core/RelationSolver.cpp similarity index 99% rename from svf/lib/AbstractExecution/RelationSolver.cpp rename to svf/lib/AE/Core/RelationSolver.cpp index f5be246d8..c8ffb49c5 100644 --- a/svf/lib/AbstractExecution/RelationSolver.cpp +++ b/svf/lib/AE/Core/RelationSolver.cpp @@ -26,7 +26,7 @@ * Author: Jiawei Ren * */ -#include "AbstractExecution/RelationSolver.h" +#include "AE/Core/RelationSolver.h" using namespace SVF; using namespace SVFUtil; diff --git a/svf/lib/AbstractExecution/SVFIR2Relation.cpp b/svf/lib/AE/Core/SVFIR2Relation.cpp similarity index 98% rename from svf/lib/AbstractExecution/SVFIR2Relation.cpp rename to svf/lib/AE/Core/SVFIR2Relation.cpp index f72cb5a9c..d207927d7 100644 --- a/svf/lib/AbstractExecution/SVFIR2Relation.cpp +++ b/svf/lib/AE/Core/SVFIR2Relation.cpp @@ -2,7 +2,7 @@ // Created by Xiao on 2022/8/18. // -#include "AbstractExecution/SVFIR2ItvExeState.h" +#include "AE/Svfexe/SVFIR2ItvExeState.h" using namespace SVF; using namespace SVFUtil; diff --git a/svf/lib/AbstractExecution/SymState.cpp b/svf/lib/AE/Core/SymState.cpp similarity index 96% rename from svf/lib/AbstractExecution/SymState.cpp rename to svf/lib/AE/Core/SymState.cpp index 9a7e65f44..b21ff7b4a 100644 --- a/svf/lib/AbstractExecution/SymState.cpp +++ b/svf/lib/AE/Core/SymState.cpp @@ -25,7 +25,7 @@ // -#include "AbstractExecution/SymState.h" +#include "AE/Core/SymState.h" using namespace SVF; diff --git a/svf/lib/AE/Svfexe/AE.cpp b/svf/lib/AE/Svfexe/AE.cpp new file mode 100644 index 000000000..37b04e5aa --- /dev/null +++ b/svf/lib/AE/Svfexe/AE.cpp @@ -0,0 +1,1487 @@ +//===- AE.cpp -- Abstract Execution---------------------------------// +// +// SVF: Static Value-Flow Analysis +// +// Copyright (C) <2013-> +// + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. + +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . +// +//===----------------------------------------------------------------------===// + + +// +// Created by Jiawei Wang on 2024/1/10. +// +#include "WPA/Andersen.h" +#include "Util/CFBasicBlockGBuilder.h" +#include "SVFIR/SVFIR.h" +#include "AE/Svfexe/AE.h" +#include "Util/Options.h" +#include + +using namespace SVF; +using namespace SVFUtil; +using namespace z3; + + +// according to varieties of cmp insts, +// maybe var X var, var X const, const X var, const X const +// we accept 'var X const' 'var X var' 'const X const' +// if 'const X var', we need to reverse op0 op1 and its predicate 'var X' const' +// X' is reverse predicate of X +// == -> !=, != -> ==, > -> <=, >= -> <, < -> >=, <= -> > + +Map _reverse_predicate = { + {CmpStmt::Predicate::FCMP_OEQ , CmpStmt::Predicate::FCMP_ONE}, // == -> != + {CmpStmt::Predicate::FCMP_UEQ , CmpStmt::Predicate::FCMP_UNE}, // == -> != + {CmpStmt::Predicate::FCMP_OGT , CmpStmt::Predicate::FCMP_OLE}, // > -> <= + {CmpStmt::Predicate::FCMP_OGE , CmpStmt::Predicate::FCMP_OLT}, // >= -> < + {CmpStmt::Predicate::FCMP_OLT , CmpStmt::Predicate::FCMP_OGE}, // < -> >= + {CmpStmt::Predicate::FCMP_OLE , CmpStmt::Predicate::FCMP_OGT}, // <= -> > + {CmpStmt::Predicate::FCMP_ONE , CmpStmt::Predicate::FCMP_OEQ}, // != -> == + {CmpStmt::Predicate::FCMP_UNE , CmpStmt::Predicate::FCMP_UEQ}, // != -> == + {CmpStmt::Predicate::ICMP_EQ , CmpStmt::Predicate::ICMP_NE}, // == -> != + {CmpStmt::Predicate::ICMP_NE , CmpStmt::Predicate::ICMP_EQ}, // != -> == + {CmpStmt::Predicate::ICMP_UGT , CmpStmt::Predicate::ICMP_ULE}, // > -> <= + {CmpStmt::Predicate::ICMP_ULT , CmpStmt::Predicate::ICMP_UGE}, // < -> >= + {CmpStmt::Predicate::ICMP_UGE , CmpStmt::Predicate::ICMP_ULT}, // >= -> < + {CmpStmt::Predicate::ICMP_SGT , CmpStmt::Predicate::ICMP_SLE}, // > -> <= + {CmpStmt::Predicate::ICMP_SLT , CmpStmt::Predicate::ICMP_SGE}, // < -> >= + {CmpStmt::Predicate::ICMP_SGE , CmpStmt::Predicate::ICMP_SLT}, // >= -> < +}; + + +Map _switch_lhsrhs_predicate = { + {CmpStmt::Predicate::FCMP_OEQ , CmpStmt::Predicate::FCMP_OEQ}, // == -> == + {CmpStmt::Predicate::FCMP_UEQ , CmpStmt::Predicate::FCMP_UEQ}, // == -> == + {CmpStmt::Predicate::FCMP_OGT , CmpStmt::Predicate::FCMP_OLT}, // > -> < + {CmpStmt::Predicate::FCMP_OGE , CmpStmt::Predicate::FCMP_OLE}, // >= -> <= + {CmpStmt::Predicate::FCMP_OLT , CmpStmt::Predicate::FCMP_OGT}, // < -> > + {CmpStmt::Predicate::FCMP_OLE , CmpStmt::Predicate::FCMP_OGE}, // <= -> >= + {CmpStmt::Predicate::FCMP_ONE , CmpStmt::Predicate::FCMP_ONE}, // != -> != + {CmpStmt::Predicate::FCMP_UNE , CmpStmt::Predicate::FCMP_UNE}, // != -> != + {CmpStmt::Predicate::ICMP_EQ , CmpStmt::Predicate::ICMP_EQ}, // == -> == + {CmpStmt::Predicate::ICMP_NE , CmpStmt::Predicate::ICMP_NE}, // != -> != + {CmpStmt::Predicate::ICMP_UGT , CmpStmt::Predicate::ICMP_ULT}, // > -> < + {CmpStmt::Predicate::ICMP_ULT , CmpStmt::Predicate::ICMP_UGT}, // < -> > + {CmpStmt::Predicate::ICMP_UGE , CmpStmt::Predicate::ICMP_ULE}, // >= -> <= + {CmpStmt::Predicate::ICMP_SGT , CmpStmt::Predicate::ICMP_SLT}, // > -> < + {CmpStmt::Predicate::ICMP_SLT , CmpStmt::Predicate::ICMP_SGT}, // < -> > + {CmpStmt::Predicate::ICMP_SGE , CmpStmt::Predicate::ICMP_SLE}, // >= -> <= +}; + +void AE::initExtAPI() +{ + _api = new AEAPI(this, _stat); +} + +void AE::runOnModule(SVF::SVFIR *svfModule) { + // 1. Start clock + _stat->startClk(); + + _svfir = svfModule; + _ander = AndersenWaveDiff::createAndersenWaveDiff(_svfir); + _api->setModule(_svfir); + // init SVF Execution States + _svfir2ExeState = new SVFIR2ItvExeState(_svfir); + + // init SSE External API Handler + _callgraph = _ander->getPTACallGraph(); + _icfg = _svfir->getICFG(); + CFBasicBlockGBuilder CFBGBuilder; + _icfg->updateCallGraph(_callgraph); + + CFBGBuilder.build(_icfg); + _CFBlockG = CFBGBuilder.getCFBasicBlockGraph(); + /// collect checkpoint + _api->collectCheckPoint(); + + /// if function contains callInst that call itself, it is a recursive function. + markRecursiveFuns(); + for (const SVFFunction* fun: _svfir->getModule()->getFunctionSet()) { + if (_CFBlockG->hasGNode(_icfg->getFunEntryICFGNode(fun)->getId()) ) { + const CFBasicBlockNode *node = _CFBlockG->getGNode(_icfg->getFunEntryICFGNode(fun)->getId()); + auto *wto = new CFBasicBlockGWTO(_CFBlockG, node); + wto->init(); + _funcToWTO[fun] = wto; + } + } + analyse(); + _api->checkPointAllSet(); + // 5. Stop clock and report bugs + _stat->endClk(); + _stat->finializeStat(); + if (Options::PStat()) + { + _stat->performStat(); + } + _stat->reportBug(); +} + +AE::AE() { + _stat = new AEStat(this); +} +/// Destructor +AE::~AE() { + delete _stat; + delete _api; + delete _svfir2ExeState; + for (auto it: _funcToWTO) + delete it.second; + +} + +void AE::markRecursiveFuns() { + // detect if callgraph has cycle + CallGraphSCC* _callGraphScc = _ander->getCallGraphSCC(); + _callGraphScc->find(); + + for (auto it = _callgraph->begin(); it != _callgraph->end(); it++) { + if (_callGraphScc->isInCycle(it->second->getId())) + _recursiveFuns.insert(it->second->getFunction()); + } +} + +/// Program entry +void AE::analyse() { + // handle Global ICFGNode of SVFModule + handleGlobalNode(); + if (const SVFFunction* fun = _svfir->getModule()->getSVFFunction("main")) { + handleFunc(fun); + } +} + +/// handle global node +void AE::handleGlobalNode() { + IntervalExeState es; + const ICFGNode* node = _icfg->getGlobalICFGNode(); + _svfir2ExeState->setEs(es); + // Global Node, we just need to handle addr, load, store, copy and gep + for (const SVFStmt *stmt: node->getSVFStmts()) { + if (const AddrStmt *addr = SVFUtil::dyn_cast(stmt)) { + _svfir2ExeState->translateAddr(addr); + } else if (const LoadStmt *load = SVFUtil::dyn_cast(stmt)) { + _svfir2ExeState->translateLoad(load); + } else if (const StoreStmt *store = SVFUtil::dyn_cast(stmt)) { + _svfir2ExeState->translateStore(store); + } else if (const CopyStmt *copy = SVFUtil::dyn_cast(stmt)) { + _svfir2ExeState->translateCopy(copy); + } else if (const GepStmt *gep = SVFUtil::dyn_cast(stmt)) { + _svfir2ExeState->translateGep(gep); + } + else + assert(false && "implement this part"); + } + // for stmts in global node, exe state will move to global state to lower memory usage + _svfir2ExeState->moveToGlobal(); +} + +/// get execution state by merging states of predecessor blocks +/// Scenario 1: preblock -----(intraEdge)----> block, join the preES of inEdges +/// Scenario 2: preblock -----(callEdge)----> block +bool AE::hasInEdgesES(const CFBasicBlockNode *block) { + if (isGlobalEntry(block)) { + _preES[block] = IntervalExeState(); + return true; + } + // is common basic block + else + { + IntervalExeState es; + u32_t inEdgeNum = 0; + for (auto& edge: block->getInEdges()) { + if (_postES.find(edge->getSrcNode()) != _postES.end()) { + const IntraCFGEdge *intraCfgEdge = SVFUtil::dyn_cast(edge->getICFGEdge()); + if (intraCfgEdge && intraCfgEdge->getCondition()) { + IntervalExeState tmpEs = _postES[edge->getSrcNode()]; + if (hasBranchES(intraCfgEdge, tmpEs)) { + es.joinWith(tmpEs); + inEdgeNum++; + } else { + // do nothing + } + } + else { + es.joinWith(_postES[edge->getSrcNode()]); + inEdgeNum++; + } + } else { + + } + } + if (inEdgeNum == 0) { + return false; + } + else { + _preES[block] = es; + return true; + } + } + assert(false && "implement this part"); +} + +bool AE::isFunEntry(const SVF::CFBasicBlockNode *block) { + if (SVFUtil::isa(*block->getICFGNodes().begin())) { + if (_preES.find(block) != _preES.end()) { + return true; + } + } + return false; +} + +bool AE::isGlobalEntry(const SVF::CFBasicBlockNode *block) { + if (!block->hasIncomingEdge()) + return true; + else + return false; +} + +bool AE::hasCmpBranchES(const CmpStmt* cmpStmt, s64_t succ, IntervalExeState& es) { + IntervalExeState new_es = es; + // get cmp stmt's op0, op1, and predicate + NodeID op0 = cmpStmt->getOpVarID(0); + NodeID op1 = cmpStmt->getOpVarID(1); + NodeID res_id = cmpStmt->getResID(); + s32_t predicate = cmpStmt->getPredicate(); + + // if op0 or op1 is undefined, return; + // skip address compare + if (new_es.inVarToAddrsTable(op0) || new_es.inVarToAddrsTable(op1)) { + es = new_es; + return true; + } + const LoadStmt *load_op0 = nullptr; + const LoadStmt *load_op1 = nullptr; + // get '%1 = load i32 s', and load inst may not exist + SVFVar* loadVar0 = _svfir->getGNode(op0); + if (!loadVar0->getInEdges().empty()) { + SVFStmt *loadVar0InStmt = *loadVar0->getInEdges().begin(); + if (const LoadStmt *loadStmt = SVFUtil::dyn_cast(loadVar0InStmt)) { + load_op0 = loadStmt; + } + else if (const CopyStmt *copyStmt = SVFUtil::dyn_cast(loadVar0InStmt)) { + loadVar0 = _svfir->getGNode(copyStmt->getRHSVarID()); + if (!loadVar0->getInEdges().empty()) { + SVFStmt *loadVar0InStmt2 = *loadVar0->getInEdges().begin(); + if (const LoadStmt *loadStmt = SVFUtil::dyn_cast(loadVar0InStmt2)) { + load_op0 = loadStmt; + } + } + } + } + + SVFVar* loadVar1 = _svfir->getGNode(op1); + if (!loadVar1->getInEdges().empty()) { + SVFStmt *loadVar1InStmt = *loadVar1->getInEdges().begin(); + if (const LoadStmt *loadStmt = SVFUtil::dyn_cast(loadVar1InStmt)) { + load_op1 = loadStmt; + } + else if (const CopyStmt *copyStmt = SVFUtil::dyn_cast(loadVar1InStmt)) { + loadVar1 = _svfir->getGNode(copyStmt->getRHSVarID()); + if (!loadVar1->getInEdges().empty()) { + SVFStmt *loadVar1InStmt2 = *loadVar1->getInEdges().begin(); + if (const LoadStmt *loadStmt = SVFUtil::dyn_cast(loadVar1InStmt2)) { + load_op1 = loadStmt; + } + } + } + } + // for const X const, we may get concrete resVal instantly + // for var X const, we may get [0,1] if the intersection of var and const is not empty set + IntervalValue resVal = new_es[res_id]; + resVal.meet_with(IntervalValue((int64_t) succ, succ)); + // If Var X const generates bottom value, it means this branch path is not feasible. + if (resVal.isBottom()) { + return false; + } + + bool b0 = new_es[op0].is_numeral(); + bool b1 = new_es[op1].is_numeral(); + + // if const X var, we should reverse op0 and op1. + if (b0 && !b1) { + new_es.cpyItvToLocal(op1); + } else if (!b0 && b1) { + new_es.cpyItvToLocal(op0); + } + + // if const X var, we should reverse op0 and op1. + if (b0 && !b1) { + std::swap(op0, op1); + std::swap(load_op0, load_op1); + predicate = _switch_lhsrhs_predicate[predicate]; + } else { + // if var X var, we cannot preset the branch condition to infer the intervals of var0,var1 + if (!b0 && !b1) { + es = new_es; + return true; + } + // if const X const, we can instantly get the resVal + else if (b0 && b1) { + es = new_es; + return true; + } + } + // if cmp is 'var X const == false', we should reverse predicate 'var X' const == true' + // X' is reverse predicate of X + if (succ == 0) { + predicate = _reverse_predicate[predicate]; + } else {} + // change interval range according to the compare predicate + ExeState::Addrs addrs; + if(load_op0 && new_es.inVarToAddrsTable(load_op0->getRHSVarID())) + addrs = new_es.getAddrs(load_op0->getRHSVarID()); + + IntervalValue &lhs = new_es[op0], &rhs = new_es[op1]; + switch (predicate) { + case CmpStmt::Predicate::ICMP_EQ: + case CmpStmt::Predicate::FCMP_OEQ: + case CmpStmt::Predicate::FCMP_UEQ: { + // Var == Const, so [var.lb, var.ub].meet_with(const) + lhs.meet_with(rhs); + // if lhs is register value, we should also change its mem obj + for (const auto &addr: addrs) { + NodeID objId = new_es.getInternalID(addr); + if (new_es.inLocToValTable(objId)) { + new_es.load(addr).meet_with(rhs); + } + } + break; + } + case CmpStmt::Predicate::ICMP_NE: + case CmpStmt::Predicate::FCMP_ONE: + case CmpStmt::Predicate::FCMP_UNE: + // Compliment set + break; + case CmpStmt::Predicate::ICMP_UGT: + case CmpStmt::Predicate::ICMP_SGT: + case CmpStmt::Predicate::FCMP_OGT: + case CmpStmt::Predicate::FCMP_UGT: + // Var > Const, so [var.lb, var.ub].meet_with([Const+1, +INF]) + lhs.meet_with(IntervalValue(rhs.lb() + 1, IntervalValue::plus_infinity())); + // if lhs is register value, we should also change its mem obj + for (const auto &addr: addrs) { + NodeID objId = new_es.getInternalID(addr); + if (new_es.inLocToValTable(objId)) { + new_es.load(addr).meet_with( + IntervalValue(rhs.lb() + 1, IntervalValue::plus_infinity())); + } + } + break; + case CmpStmt::Predicate::ICMP_UGE: + case CmpStmt::Predicate::ICMP_SGE: + case CmpStmt::Predicate::FCMP_OGE: + case CmpStmt::Predicate::FCMP_UGE: { + // Var >= Const, so [var.lb, var.ub].meet_with([Const, +INF]) + lhs.meet_with(IntervalValue(rhs.lb(), IntervalValue::plus_infinity())); + // if lhs is register value, we should also change its mem obj + for (const auto &addr: addrs) { + NodeID objId = new_es.getInternalID(addr); + if (new_es.inLocToValTable(objId)) { + new_es.load(addr).meet_with( + IntervalValue(rhs.lb(), IntervalValue::plus_infinity())); + } + } + break; + } + case CmpStmt::Predicate::ICMP_ULT: + case CmpStmt::Predicate::ICMP_SLT: + case CmpStmt::Predicate::FCMP_OLT: + case CmpStmt::Predicate::FCMP_ULT: { + // Var < Const, so [var.lb, var.ub].meet_with([-INF, const.ub-1]) + lhs.meet_with(IntervalValue(IntervalValue::minus_infinity(), rhs.ub() - 1)); + // if lhs is register value, we should also change its mem obj + for (const auto &addr: addrs) { + NodeID objId = new_es.getInternalID(addr); + if (new_es.inLocToValTable(objId)) { + new_es.load(addr).meet_with( + IntervalValue(IntervalValue::minus_infinity(), rhs.ub() - 1)); + } + } + break; + } + case CmpStmt::Predicate::ICMP_ULE: + case CmpStmt::Predicate::ICMP_SLE: + case CmpStmt::Predicate::FCMP_OLE: + case CmpStmt::Predicate::FCMP_ULE: { + // Var <= Const, so [var.lb, var.ub].meet_with([-INF, const.ub]) + lhs.meet_with(IntervalValue(IntervalValue::minus_infinity(), rhs.ub())); + // if lhs is register value, we should also change its mem obj + for (const auto &addr: addrs) { + NodeID objId = new_es.getInternalID(addr); + if (new_es.inLocToValTable(objId)) { + new_es.load(addr).meet_with( + IntervalValue(IntervalValue::minus_infinity(), rhs.ub())); + } + } + break; + } + case CmpStmt::Predicate::FCMP_FALSE: + break; + case CmpStmt::Predicate::FCMP_TRUE: + break; + default: + assert(false && "implement this part"); + abort(); + } + es = new_es; + return true; +} + +bool AE::hasSwitchBranchES(const SVFVar* var, s64_t succ, IntervalExeState& es) { + IntervalExeState new_es = es; + new_es.cpyItvToLocal(var->getId()); + IntervalValue& switch_cond = new_es[var->getId()]; + s64_t value = succ; + FIFOWorkList workList; + for (SVFStmt *cmpVarInStmt: var->getInEdges()) { + workList.push(cmpVarInStmt); + } + switch_cond.meet_with(IntervalValue(value, value)); + if (switch_cond.isBottom()) { + return false; + } + while(!workList.empty()) { + const SVFStmt* stmt = workList.pop(); + if (SVFUtil::isa(stmt)) { + IntervalValue& copy_cond = new_es[var->getId()]; + copy_cond.meet_with(IntervalValue(value, value)); + } + else if (const LoadStmt* load = SVFUtil::dyn_cast(stmt)) { + if (new_es.inVarToAddrsTable(load->getRHSVarID())) { + ExeState::Addrs &addrs = new_es.getAddrs(load->getRHSVarID()); //3108 + for (const auto &addr: addrs) { + NodeID objId = new_es.getInternalID(addr); + if (new_es.inLocToValTable(objId)) { + new_es.load(addr).meet_with(switch_cond); + } + } + } + } + } + es = new_es; + return true; +} + +bool AE::hasBranchES(const IntraCFGEdge* intraEdge, IntervalExeState& es) { + const SVFValue *cond = intraEdge->getCondition(); + NodeID cmpID = _svfir->getValueNode(cond); + SVFVar *cmpVar = _svfir->getGNode(cmpID); + if (cmpVar->getInEdges().empty()) { + return hasSwitchBranchES(cmpVar, intraEdge->getSuccessorCondValue(), es); + } else { + assert(!cmpVar->getInEdges().empty() && + "no in edges?"); + SVFStmt *cmpVarInStmt = *cmpVar->getInEdges().begin(); + if (const CmpStmt *cmpStmt = SVFUtil::dyn_cast(cmpVarInStmt)) { + return hasCmpBranchES(cmpStmt, intraEdge->getSuccessorCondValue(), es); + } else { + return hasSwitchBranchES(cmpVar, intraEdge->getSuccessorCondValue(), es); + } + } + return true; +} +/// handle instructions in svf basic blocks +void AE::handleBlock(const CFBasicBlockNode *block) { + _stat->getBlockTrace()++; + // Get execution states from in edges + if (!hasInEdgesES(block)) { + // No ES on the in edges - Infeasible block + return; + } else { + // Has ES on the in edges - Feasible block + // Get execution state from in edges + _svfir2ExeState->setEs(_preES[block]); + } + + std::deque worklist; + for (auto it = block->begin(); it != block->end(); ++it) { + worklist.push_back(*it); + } + while(!worklist.empty()) { + const ICFGNode* curICFGNode = worklist.front(); + worklist.pop_front(); + handleICFGNode(curICFGNode); + } + _preES.erase(block); + _postES[block] = _svfir2ExeState->getEs(); +} + +void AE::handleCallSite(const ICFGNode* node) { + if (const CallICFGNode* callNode = SVFUtil::dyn_cast(node)) { + if (isExtCall(callNode)) { + extCallPass(callNode); + } + else if (isRecursiveCall(callNode)) { + recursiveCallPass(callNode); + } + else if (isDirectCall(callNode)) { + directCallFunPass(callNode); + } + else if (isIndirectCall(callNode)) { + indirectCallFunPass(callNode); + } + else { + assert(false && "implement this part"); + } + } + else { + assert (false && "it is not call node"); + } +} + +bool AE::isExtCall(const SVF::CallICFGNode *callNode) { + const SVFFunction *callfun = SVFUtil::getCallee(callNode->getCallSite()); + return SVFUtil::isExtCall(callfun); +} + +void AE::extCallPass(const SVF::CallICFGNode *callNode) { + _callSiteStack.push_back(callNode); + _api->handleExtAPI(callNode); + _callSiteStack.pop_back(); +} + +bool AE::isRecursiveCall(const SVF::CallICFGNode *callNode) { + const SVFFunction *callfun = SVFUtil::getCallee(callNode->getCallSite()); + return _recursiveFuns.find(callfun) != _recursiveFuns.end(); +} + +void AE::recursiveCallPass(const SVF::CallICFGNode *callNode) { + SkipRecursiveCall(callNode); + const RetICFGNode *retNode = callNode->getRetICFGNode(); + if (retNode->getSVFStmts().size() > 0) { + if (const RetPE *retPE = SVFUtil::dyn_cast(*retNode->getSVFStmts().begin())) { + if (!retPE->getLHSVar()->isPointer() && + !retPE->getLHSVar()->isConstDataOrAggDataButNotNullPtr()) { + _svfir2ExeState->getEs()[retPE->getLHSVarID()] = IntervalValue::top(); + } + } + } +} + +bool AE::isDirectCall(const SVF::CallICFGNode *callNode) { + const SVFFunction *callfun = SVFUtil::getCallee(callNode->getCallSite()); + return _funcToWTO.find(callfun) != _funcToWTO.end(); +} +void AE::directCallFunPass(const SVF::CallICFGNode *callNode) { + const SVFFunction *callfun = SVFUtil::getCallee(callNode->getCallSite()); + IntervalExeState preES = _svfir2ExeState->getEs(); + _callSiteStack.push_back(callNode); + + auto* curBlockNode = _CFBlockG->getCFBasicBlockNode(callNode->getId()); + _postES[curBlockNode] = _svfir2ExeState->getEs(); + + handleFunc(callfun); + _callSiteStack.pop_back(); + // handle Ret node + const RetICFGNode *retNode = callNode->getRetICFGNode(); + // resume ES to callnode + _postES[_CFBlockG->getCFBasicBlockNode(retNode->getId())] = _postES[_CFBlockG->getCFBasicBlockNode(callNode->getId())]; +} + +bool AE::isIndirectCall(const SVF::CallICFGNode *callNode) { + const auto callsiteMaps = _svfir->getIndirectCallsites(); + return callsiteMaps.find(callNode) != callsiteMaps.end(); +} + +void AE::indirectCallFunPass(const SVF::CallICFGNode *callNode) { + const auto callsiteMaps = _svfir->getIndirectCallsites(); + NodeID call_id = callsiteMaps.at(callNode); + if (!_svfir2ExeState->getEs().inVarToAddrsTable(call_id)) { + return; + } + ExeState::Addrs Addrs = _svfir2ExeState->getAddrs(call_id); + NodeID addr = *Addrs.begin(); + SVFVar *func_var = _svfir->getGNode(_svfir2ExeState->getInternalID(addr)); + const SVFFunction *callfun = SVFUtil::dyn_cast(func_var->getValue()); + if (callfun) { + IntervalExeState preES = _svfir2ExeState->getEs(); + _callSiteStack.push_back(callNode); + auto *curBlockNode = _CFBlockG->getCFBasicBlockNode(callNode->getId()); + + _postES[curBlockNode] = _svfir2ExeState->getEs(); + + handleFunc(callfun); + _callSiteStack.pop_back(); + // handle Ret node + const RetICFGNode *retNode = callNode->getRetICFGNode(); + _postES[_CFBlockG->getCFBasicBlockNode(retNode->getId())] = _postES[_CFBlockG->getCFBasicBlockNode(callNode->getId())]; + } +} + + + +void AE::handleICFGNode(const ICFGNode *curICFGNode) { + _stat->getICFGNodeTrace()++; + // handle SVF Stmt + for (const SVFStmt *stmt: curICFGNode->getSVFStmts()) { + handleSVFStatement(stmt); + } + // inlining the callee by calling handleFunc for the callee function + if (const CallICFGNode* callnode = SVFUtil::dyn_cast(curICFGNode)) { + handleCallSite(callnode); + } else { + + } + _stat->countStateSize(); +} + +/// handle wto cycle (loop) +void AE::handleCycle(const CFBasicBlockGWTOCycle *cycle) { + // Get execution states from in edges + if (!hasInEdgesES(cycle->head())) { + // No ES on the in edges - Infeasible block + return; + } + IntervalExeState pre_es = _preES[cycle->head()]; + // set -widen-delay + s32_t widen_delay = Options::WidenDelay(); + bool incresing = true; + for (int i = 0; ; i++) { + const CFBasicBlockNode* cycle_head = cycle->head(); + // handle cycle head + handleBlock(cycle_head); + if (i < widen_delay) { + if (i> 0 && pre_es >= _postES[cycle_head]) { + break; + } + pre_es = _postES[cycle_head]; + } + else { + if (i >= widen_delay) { + if (incresing) { + bool is_fixpoint = widenFixpointPass(cycle_head, pre_es); + if (is_fixpoint) + incresing = false; + } + if (!incresing) { + bool is_fixpoint = narrowFixpointPass(cycle_head, pre_es); + if (is_fixpoint) + break; + } + } + } + for (auto it = cycle->begin(); it != cycle->end(); ++it) { + const CFBasicBlockGWTOComp* cur = *it; + if (const CFBasicBlockGWTONode* vertex = SVFUtil::dyn_cast(cur)) { + handleBlock(vertex->node()); + } + else if (const CFBasicBlockGWTOCycle* cycle = SVFUtil::dyn_cast(cur)) { + handleCycle(cycle); + } else { + assert(false && "unknown WTO type!"); + } + } + } +} + +bool AE::widenFixpointPass(const CFBasicBlockNode* cycle_head, IntervalExeState& pre_es) { + // increasing iterations + IntervalExeState new_pre_es = pre_es.widening(_postES[cycle_head]); + IntervalExeState new_pre_vaddr_es = new_pre_es; + _svfir2ExeState->widenAddrs(new_pre_es, _postES[cycle_head]); + + if (pre_es >= new_pre_es) { + // increasing iterations - fixpoint reached + pre_es = new_pre_es; + _postES[cycle_head] = pre_es; + return true; + } else { + pre_es = new_pre_es; + _postES[cycle_head] = pre_es; + return false; + } +} + +bool AE::narrowFixpointPass(const SVF::CFBasicBlockNode *cycle_head, SVF::IntervalExeState &pre_es) { + // decreasing iterations + IntervalExeState new_pre_es = pre_es.narrowing(_postES[cycle_head]); + IntervalExeState new_pre_vaddr_es = new_pre_es; + _svfir2ExeState->narrowAddrs(new_pre_es, _postES[cycle_head]); + if (new_pre_es >= pre_es) { + // decreasing iterations - fixpoint reached + pre_es = new_pre_es; + _postES[cycle_head] = pre_es; + return true; + } + else { + pre_es = new_pre_es; + _postES[cycle_head] = pre_es; + return false; + } +} + + + +/// handle user defined function, ext function is not included. +void AE::handleFunc(const SVFFunction *func) { + _stat->getFunctionTrace()++; + CFBasicBlockGWTO* wto = _funcToWTO[func]; + // set function entry ES + for (auto it = wto->begin(); it!= wto->end(); ++it) { + const CFBasicBlockGWTOComp* cur = *it; + if (const CFBasicBlockGWTONode* vertex = SVFUtil::dyn_cast(cur)) { + handleBlock(vertex->node()); + } + else if (const CFBasicBlockGWTOCycle* cycle = SVFUtil::dyn_cast(cur)) { + handleCycle(cycle); + } else { + assert(false && "unknown WTO type!"); + } + } +} + + +void AE::handleSVFStatement(const SVFStmt *stmt) { + if (const AddrStmt *addr = SVFUtil::dyn_cast(stmt)) { + _svfir2ExeState->translateAddr(addr); + } else if (const BinaryOPStmt *binary = SVFUtil::dyn_cast(stmt)) { + _svfir2ExeState->translateBinary(binary); + } else if (const CmpStmt *cmp = SVFUtil::dyn_cast(stmt)) { + _svfir2ExeState->translateCmp(cmp); + } else if (SVFUtil::isa(stmt)) { + } else if (SVFUtil::isa(stmt)) { + // branch stmt is handled in hasBranchES + } else if (const LoadStmt *load = SVFUtil::dyn_cast(stmt)) { + _svfir2ExeState->translateLoad(load); + } else if (const StoreStmt *store = SVFUtil::dyn_cast(stmt)) { + _svfir2ExeState->translateStore(store); + } else if (const CopyStmt *copy = SVFUtil::dyn_cast(stmt)) { + _svfir2ExeState->translateCopy(copy); + } else if (const GepStmt *gep = SVFUtil::dyn_cast(stmt)) { + _svfir2ExeState->translateGep(gep); + } else if (const SelectStmt *select = SVFUtil::dyn_cast(stmt)) { + _svfir2ExeState->translateSelect(select); + } else if (const PhiStmt *phi = SVFUtil::dyn_cast(stmt)) { + _svfir2ExeState->translatePhi(phi); + } else if (const CallPE *callPE = SVFUtil::dyn_cast(stmt)) { + // To handle Call Edge + _svfir2ExeState->translateCall(callPE); + } else if (const RetPE *retPE = SVFUtil::dyn_cast(stmt)) { + _svfir2ExeState->translateRet(retPE); + } else + assert(false && "implement this part"); +} + + +void AE::SkipRecursiveCall(const CallICFGNode *callNode) { + const SVFFunction *callfun = SVFUtil::getCallee(callNode->getCallSite()); + const RetICFGNode *retNode = callNode->getRetICFGNode(); + if (retNode->getSVFStmts().size() > 0) { + if (const RetPE *retPE = SVFUtil::dyn_cast(*retNode->getSVFStmts().begin())) { + IntervalExeState es; + if (!retPE->getLHSVar()->isPointer() && !retPE->getLHSVar()->isConstDataOrAggDataButNotNullPtr()) + _svfir2ExeState->getEs()[retPE->getLHSVarID()] = IntervalValue::top(); + } + } + if (!retNode->getOutEdges().empty()) { + if (retNode->getOutEdges().size() == 1) { + + } else { + return; + } + } + SkipRecursiveFunc(callfun); +} + +void AE::SkipRecursiveFunc(const SVFFunction *func) { + // handle Recursive Funcs, go throw every relevant funcs/blocks. + // for every Call Argv, Ret , Global Vars, we make it as Top value + FIFOWorkList blkWorkList; + FIFOWorkList instWorklist; + for (const SVFBasicBlock * bb: func->getReachableBBs()) { + for (const SVFInstruction* inst: bb->getInstructionList()) { + const ICFGNode* node = _icfg->getICFGNode(inst); + for (const SVFStmt *stmt: node->getSVFStmts()) { + if (const StoreStmt *store = SVFUtil::dyn_cast(stmt)) { + const SVFVar *rhsVar = store->getRHSVar(); + u32_t lhs = store->getLHSVarID(); + IntervalExeState &curES = _svfir2ExeState->getEs(); + if (curES.inVarToAddrsTable(lhs)) { + if (!rhsVar->isPointer() && !rhsVar->isConstDataOrAggDataButNotNullPtr()) { + const SVFIR2ItvExeState::Addrs &addrs =curES.getAddrs(lhs); + assert(!addrs.empty()); + for (const auto &addr: addrs) { + curES.store(addr, IntervalValue::top()); + } + } + } + } + } + } + } +} + +// count the size of memory map +void AEStat::countStateSize() { + if (count == 0) { + generalNumMap["Global_ES_Var_AVG_Num"] = IntervalExeState::globalES.getVarToVal().size(); + generalNumMap["Global_ES_Loc_AVG_Num"] = IntervalExeState::globalES.getLocToVal().size(); + generalNumMap["Global_ES_Var_Addr_AVG_Num"] = IntervalExeState::globalES.getVarToAddrs().size(); + generalNumMap["Global_ES_Loc_Addr_AVG_Num"] = IntervalExeState::globalES.getLocToAddrs().size(); + generalNumMap["ES_Var_AVG_Num"] = 0; + generalNumMap["ES_Loc_AVG_Num"] = 0; + generalNumMap["ES_Var_Addr_AVG_Num"] = 0; + generalNumMap["ES_Loc_Addr_AVG_Num"] = 0; + } + ++count; + generalNumMap["ES_Var_AVG_Num"] += _ae->_svfir2ExeState->getEs().getVarToVal().size(); + generalNumMap["ES_Loc_AVG_Num"] += _ae->_svfir2ExeState->getEs().getLocToVal().size(); + generalNumMap["ES_Var_Addr_AVG_Num"] += _ae->_svfir2ExeState->getEs().getVarToAddrs().size(); + generalNumMap["ES_Loc_Addr_AVG_Num"] += _ae->_svfir2ExeState->getEs().getLocToAddrs().size(); +} + +void AEStat::finializeStat() { + memUsage = getMemUsage(); + if (count > 0) { + generalNumMap["ES_Var_AVG_Num"] /= count; + generalNumMap["ES_Loc_AVG_Num"] /= count; + generalNumMap["ES_Var_Addr_AVG_Num"] /= count; + generalNumMap["ES_Loc_Addr_AVG_Num"] /= count; + } + generalNumMap["SVF_STMT_NUM"] = count; + generalNumMap["ICFG_Node_Num"] = _ae->_svfir->getICFG()->nodeNum; + u32_t callSiteNum = 0; + u32_t extCallSiteNum = 0; + Set funs; + for (const auto &it: *_ae->_svfir->getICFG()) { + if (it.second->getFun()) { + funs.insert(it.second->getFun()); + } + if (const CallICFGNode *callNode = dyn_cast(it.second)) { + if (!isExtCall(callNode->getCallSite())) { + callSiteNum++; + } else { + extCallSiteNum++; + } + } + } + generalNumMap["Func_Num"] = funs.size(); + generalNumMap["EXT_CallSite_Num"] = extCallSiteNum; + generalNumMap["NonEXT_CallSite_Num"] = callSiteNum; + generalNumMap["VarToAddrSize"] = _ae->_svfir2ExeState->getEs().getVarToAddrs().size(); + generalNumMap["LocToAddrSize"] = _ae->_svfir2ExeState->getEs().getLocToAddrs().size(); + generalNumMap["Bug_Num"] = _ae->_nodeToBugInfo.size(); + timeStatMap["Total_Time(sec)"] = (double)(endTime - startTime) / TIMEINTERVAL; + +} + +void AEStat::performStat() { + std::string fullName(_ae->_moduleName); + std::string name; + std::string moduleName; + if (fullName.find('/') == std::string::npos) { + std::string name = fullName; + moduleName = name.substr(0, fullName.find('.')); + } else { + std::string name = fullName.substr(fullName.find('/'), fullName.size()); + moduleName = name.substr(0, fullName.find('.')); + } + + SVFUtil::outs() << "\n************************\n"; + SVFUtil::outs() << "################ (program : " << moduleName << ")###############\n"; + SVFUtil::outs().flags(std::ios::left); + unsigned field_width = 30; + for (NUMStatMap::iterator it = generalNumMap.begin(), eit = generalNumMap.end(); it != eit; ++it) { + // format out put with width 20 space + std::cout << std::setw(field_width) << it->first << it->second << "\n"; + } + SVFUtil::outs() << "-------------------------------------------------------\n"; + for (TIMEStatMap::iterator it = timeStatMap.begin(), eit = timeStatMap.end(); it != eit; ++it) { + // format out put with width 20 space + SVFUtil::outs() << std::setw(field_width) << it->first << it->second << "\n"; + } + SVFUtil::outs() << "Memory usage: " << memUsage << "\n"; + + SVFUtil::outs() << "#######################################################" << std::endl; + SVFUtil::outs().flush(); +} + +void AEStat::reportBug() { + + std::ofstream f; + if (Options::OutputName().size() == 0) { + f.open("/dev/null"); + } else { + f.open(Options::OutputName()); + } + + std::cerr << "######################Full Overflow (" + std::to_string(_ae->_nodeToBugInfo.size()) + " found)######################\n"; + f << "######################Full Overflow (" + std::to_string(_ae->_nodeToBugInfo.size()) + " found)######################\n"; + std::cerr << "---------------------------------------------\n"; + f << "---------------------------------------------\n"; + for (auto& it: _ae->_nodeToBugInfo) { + std::cerr << it.second << "---------------------------------------------\n"; + f << it.second << "---------------------------------------------\n"; + } +} + +void AEAPI::initExtFunMap() { +#define SSE_FUNC_PROCESS(LLVM_NAME ,FUNC_NAME) \ + auto sse_##FUNC_NAME = [this](const CallSite &cs) { \ + /* run real ext function */ \ + IntervalExeState &es = _ae->_svfir2ExeState->getEs(); \ + u32_t rhs_id = _svfir->getValueNode(cs.getArgument(0)); \ + if (!es.inVarToValTable(rhs_id)) return; \ + u32_t rhs = _ae->_svfir2ExeState->getEs()[rhs_id].lb().getNumeral(); \ + s32_t res = FUNC_NAME(rhs); \ + u32_t lhsId = _svfir->getValueNode(cs.getInstruction()); \ + _ae->_svfir2ExeState->getEs()[lhsId] = IntervalValue(res); \ + return; \ + }; \ + _func_map[#FUNC_NAME] = sse_##FUNC_NAME; \ + + SSE_FUNC_PROCESS(isalnum, isalnum); + SSE_FUNC_PROCESS(isalpha, isalpha); + SSE_FUNC_PROCESS(isblank, isblank); + SSE_FUNC_PROCESS(iscntrl, iscntrl); + SSE_FUNC_PROCESS(isdigit, isdigit); + SSE_FUNC_PROCESS(isgraph, isgraph); + SSE_FUNC_PROCESS(isprint, isprint); + SSE_FUNC_PROCESS(ispunct, ispunct); + SSE_FUNC_PROCESS(isspace, isspace); + SSE_FUNC_PROCESS(isupper, isupper); + SSE_FUNC_PROCESS(isxdigit, isxdigit); + SSE_FUNC_PROCESS(llvm.sin.f64, sin); + SSE_FUNC_PROCESS(llvm.cos.f64, cos); + SSE_FUNC_PROCESS(llvm.tan.f64, tan); + SSE_FUNC_PROCESS(llvm.log.f64, log); + SSE_FUNC_PROCESS(sinh, sinh); + SSE_FUNC_PROCESS(cosh, cosh); + SSE_FUNC_PROCESS(tanh, tanh); + + auto sse_svf_assert = [this](const CallSite &cs) { + const CallICFGNode* callNode = SVFUtil::dyn_cast(_svfir->getICFG()->getICFGNode(cs.getInstruction())); + _checkpoints.erase(callNode); + u32_t arg0 = _svfir->getValueNode(cs.getArgument(0)); + IntervalExeState &es = _ae->_svfir2ExeState->getEs(); + es[arg0].meet_with(IntervalValue(1, 1)); + if (es[arg0].equals(IntervalValue(1, 1))) { + SVFUtil::outs() << SVFUtil::sucMsg("The assertion is successfully verified!!\n"); + } else { + SVFUtil::errs() <<"svf_assert Fail. " << cs.getInstruction()->toString() << "\n"; + assert(false); + } + return; + }; + _func_map["svf_assert"] = sse_svf_assert; + + auto svf_print = [&](const CallSite &cs) { + if (cs.arg_size() < 2) return; + IntervalExeState &es = _ae->_svfir2ExeState->getEs(); + u32_t num_id = _svfir->getValueNode(cs.getArgument(0)); + std::string text = strRead(cs.getArgument(1)); + assert(es.inVarToValTable(num_id) && "print() should pass integer"); + IntervalValue itv = es[num_id]; + std::cout << "Text: " << text <<", Value: " << cs.getArgument(0)->toString() << ", PrintVal: " << itv.toString() << std::endl; + return; + }; + _func_map["svf_print"] = svf_print; + + // init _checkpoint_names + _checkpoint_names.insert("svf_assert"); +}; + +std::string AEAPI::strRead(const SVFValue* rhs) { + // sse read string nodeID->string + IntervalExeState &es = _ae->_svfir2ExeState->getEs(); + std::string str0; + + for (u32_t index = 0; index < Options::MaxFieldLimit(); index++) { + // dead loop for string and break if there's a \0. If no \0, it will throw err. + if (!es.inVarToAddrsTable(_svfir->getValueNode(rhs))) continue; + Addrs expr0 = _ae->_svfir2ExeState->getGepObjAddress(_svfir->getValueNode(rhs), index); + IntervalValue val = IntervalValue::bottom(); + for (const auto &addr: expr0) { + val.join_with(es.load(addr)); + } + if (!val.is_numeral()) { + break; + } + if ((char) val.getNumeral() == '\0') { + break; + } + str0.push_back((char) val.getNumeral()); + } + return str0; +} + +void AEAPI::handleExtAPI(const CallICFGNode *call) { + const SVFFunction *fun = SVFUtil::getCallee(call->getCallSite()); + assert(fun && "SVFFunction* is nullptr"); + CallSite cs = SVFUtil::getSVFCallSite(call->getCallSite()); + ExtAPIType extType = UNCLASSIFIED; + // get type of mem api + for (const std::string &annotation: fun->getAnnotations()) { + if (annotation.find("MEMCPY") != std::string::npos) + extType = MEMCPY; + if (annotation.find("MEMSET") != std::string::npos) + extType = MEMSET; + if (annotation.find("STRCPY") != std::string::npos) + extType = STRCPY; + if (annotation.find("STRCAT") != std::string::npos) + extType = STRCAT; + } + if (extType == UNCLASSIFIED) { + if (_func_map.find(fun->getName()) != _func_map.end()) { + _func_map[fun->getName()](cs); + } else { + u32_t lhsId = _svfir->getValueNode(SVFUtil::getSVFCallSite(call->getCallSite()).getInstruction()); + if (_ae->_svfir2ExeState->getEs().inVarToAddrsTable(lhsId)) { + + } else { + _ae->_svfir2ExeState->getEs()[lhsId] = IntervalValue(); + } + return; + } + } + // 1. memcpy functions like memcpy_chk, strncpy, annotate("MEMCPY"), annotate("BUF_CHECK:Arg0, Arg2"), annotate("BUF_CHECK:Arg1, Arg2") + else if (extType == MEMCPY) { + IntervalValue len = _ae->_svfir2ExeState->getEs()[_svfir->getValueNode(cs.getArgument(2))]; + handleMemcpy(cs.getArgument(0), cs.getArgument(1), len, 0); + } + else if (extType == MEMSET) { + // memset dst is arg0, elem is arg1, size is arg2 + IntervalValue len = _ae->_svfir2ExeState->getEs()[_svfir->getValueNode(cs.getArgument(2))]; + IntervalValue elem = _ae->_svfir2ExeState->getEs()[_svfir->getValueNode(cs.getArgument(1))]; + handleMemset(cs.getArgument(0), elem, len); + } + else if (extType == STRCPY) { + handleStrcpy(call); + } + else if (extType == STRCAT) { + handleStrcat(call); + } + else { + + } + return; +} + +void AEAPI::collectCheckPoint() { + // traverse every ICFGNode + for (auto it = _ae->_svfir->getICFG()->begin(); it != _ae->_svfir->getICFG()->end(); ++it) { + const ICFGNode* node = it->second; + if (const CallICFGNode *call = SVFUtil::dyn_cast(node)) { + if (const SVFFunction *fun = SVFUtil::getCallee(call->getCallSite())) { + if (_checkpoint_names.find(fun->getName()) != _checkpoint_names.end()) { + _checkpoints.insert(call); + } + } + } + } +} + +void AEAPI::checkPointAllSet() { + if (_checkpoints.size() == 0) { + return; + } + else { + SVFUtil::errs() << SVFUtil::sucMsg("There exists checkpoints not checked!!\n"); + for (const CallICFGNode* call: _checkpoints) { + SVFUtil::errs() << SVFUtil::sucMsg(call->toString() + "\n"); + } + } + +} + + +void AEAPI::handleStrcpy(const CallICFGNode *call) { + // strcpy, __strcpy_chk, stpcpy , wcscpy, __wcscpy_chk + // get the dst and src + CallSite cs = SVFUtil::getSVFCallSite(call->getCallSite()); + const SVFValue* arg0Val = cs.getArgument(0); + const SVFValue* arg1Val = cs.getArgument(1); + IntervalValue strLen = getStrlen(arg1Val); + // no need to -1, since it has \0 as the last byte + handleMemcpy(arg0Val, arg1Val, strLen,strLen.lb().getNumeral()); +} + +u32_t AEAPI::getAllocaInstByteSize(const AddrStmt *addr) { + if (const ObjVar* objvar = SVFUtil::dyn_cast(addr->getRHSVar())) { + objvar->getType(); + if (objvar->getMemObj()->isConstantByteSize()) { + u32_t sz = objvar->getMemObj()->getByteSizeOfObj(); + return sz; + } + + else { + const std::vector& sizes = addr->getArrSize(); + // Default element size is set to 1. + u32_t elementSize = 1; + u64_t res = elementSize; + for (const SVFValue* value: sizes) { + if (!_ae->_svfir2ExeState->inVarToValTable(_svfir->getValueNode(value))) { + _ae->_svfir2ExeState->getEs()[_svfir->getValueNode(value)] = IntervalValue(Options::MaxFieldLimit()); + } + IntervalValue itv = _ae->_svfir2ExeState->getEs()[_svfir->getValueNode(value)]; + res = res * itv.ub().getNumeral() > Options::MaxFieldLimit()? Options::MaxFieldLimit(): res * itv.ub().getNumeral(); + } + return (u32_t)res; + } + } + assert (false && "Addr rhs value is not ObjVar"); +} + +IntervalValue AEAPI::traceMemoryAllocationSize(const SVFValue *value) { + /// Usually called by a GepStmt overflow check, or external API (like memcpy) overflow check + /// Defitions of Terms: + /// source node: malloc or gepStmt(array), sink node: gepStmt or external API (like memcpy) + /// it tracks the value flow from sink to source, and accumulates offset + /// then compare the accumulated offset and malloc size (or gepStmt array size) + SVF::FILOWorkList worklist; + Set visited; + visited.insert(value); + Map gep_offsets; + worklist.push(value); + IntervalValue total_bytes(0); + while (!worklist.empty()) { + value = worklist.pop(); + if (const SVFInstruction* ins = SVFUtil::dyn_cast(value)) { + const ICFGNode* node = _svfir->getICFG()->getICFGNode(ins); + /// CallNode means Source Node + if (const CallICFGNode* callnode = SVFUtil::dyn_cast(node)) { + //to handle Ret PE + AccessMemoryViaRetNode(callnode, worklist, visited); + } + for (const SVFStmt *stmt: node->getSVFStmts()) { + if (const CopyStmt *copy = SVFUtil::dyn_cast(stmt)) { + // Copy Stmt, forward to lhs + AccessMemoryViaCopyStmt(copy, worklist, visited); + } + else if (const LoadStmt *load = SVFUtil::dyn_cast(stmt)) { + // Load Stmt, forward to the Var from last Store Stmt + AccessMemoryViaLoadStmt(load, worklist, visited); + } + else if (const GepStmt *gep = SVFUtil::dyn_cast(stmt)) { + // there are 3 type of gepStmt + // 1. ptr get offset + // 2. struct get field + // 3. array get element + // for array gep, there are two kind of overflow checking + // Arr [Struct.C * 10] arr, Struct.C {i32 a, i32 b} + // arr[11].a = **, it is "lhs = gep *arr, 0 (ptr), 11 (arrIdx), 0 (ptr), 0(struct field)" + // 1) in this case arrIdx 11 is overflow. + // Other case, + // Struct.C {i32 a, [i32*10] b, i32 c}, C.b[11] = 1 + // it is "lhs - gep *C, 0(ptr), 1(struct field), 0(ptr), 11(arrIdx)" + // 2) in this case arrIdx 11 is larger than its getOffsetVar.Type Array([i32*10]) + + // therefore, if last getOffsetVar.Type is not the Array, just check the overall offset and its + // gep source type size (together with totalOffset along the value flow). + // Alloc Size: TBD, but totalOffset + current Gep offset + + // otherwise, if last getOffsetVar.Type is the Array, check the last idx and array. (just offset, + // not with totalOffset during check) + // Alloc Size: getOffsetVar.TypeByteSize() + + // make sure it has OffsetVarAndGepType Pair + if (gep->getOffsetVarAndGepTypePairVec().size() > 0) { + // check if last OffsetVarAndGepType Pair is Array + const SVFType* gepType = gep->getOffsetVarAndGepTypePairVec().back().second; + // if its array + if (gepType->isArrayTy()) { + u32_t rhs_type_bytes = gepType->getByteSize(); + // if gepStmt's base var is Array, compares offset with the arraysize + return IntervalValue(rhs_type_bytes); + } + else { + IntervalValue byteOffset; + if (gep->isConstantOffset()) { + byteOffset = IntervalValue(gep->accumulateConstantByteOffset()); + } + else { + IntervalValue byteOffset = _ae->_svfir2ExeState->getByteOffset(gep); + } + // for variable offset, join with accumulate gep offset + gep_offsets[gep->getICFGNode()] = byteOffset; + total_bytes = total_bytes + byteOffset; + } + } + if (!visited.count(gep->getRHSVar()->getValue())) { + visited.insert(gep->getRHSVar()->getValue()); + worklist.push(gep->getRHSVar()->getValue()); + } + } + else if (const AddrStmt *addr = SVFUtil::dyn_cast(stmt)) { + // addrStmt is source node. + u32_t arr_type_size = getAllocaInstByteSize(addr); + return IntervalValue(arr_type_size) - total_bytes; + } + } + } + else if (const SVF::SVFGlobalValue* gvalue = SVFUtil::dyn_cast(value)) { + u32_t arr_type_size = 0; + const SVFType* svftype = gvalue->getType(); + if (SVFUtil::isa(svftype)) { + if(const SVFArrayType* ptrArrType = SVFUtil::dyn_cast(getPointeeElement(_svfir->getValueNode(value)))) + arr_type_size = ptrArrType->getByteSize(); + else + arr_type_size = svftype->getByteSize(); + } else + arr_type_size = svftype->getByteSize(); + return IntervalValue(arr_type_size) - total_bytes; + } + else if (const SVF::SVFArgument* arg = SVFUtil::dyn_cast(value)) { + // to handle call PE + AccessMemoryViaCallArgs(arg, worklist, visited); + } + else { + // maybe SVFConstant + return IntervalValue(0); + } + } + return IntervalValue(0); +} + + +IntervalValue AEAPI::getStrlen(const SVF::SVFValue *strValue) { + IntervalExeState &es = _ae->_svfir2ExeState->getEs(); + IntervalValue dst_size = traceMemoryAllocationSize(strValue); + u32_t len = 0; + NodeID dstid = _svfir->getValueNode(strValue); + u32_t elemSize = 1; + if (_ae->_svfir2ExeState->inVarToAddrsTable(dstid)) { + for (u32_t index = 0; index < dst_size.lb().getNumeral(); index++) { + Addrs expr0 = _ae->_svfir2ExeState->getGepObjAddress(dstid, index); + IntervalValue val = IntervalValue::bottom(); + for (const auto &addr: expr0) { + val.join_with(es.load(addr)); + } + if (val.is_numeral() && (char) val.getNumeral() == '\0') { + break; + } + ++len; + } + if (strValue->getType()->isArrayTy()) { + elemSize = SVFUtil::dyn_cast(strValue->getType())->getTypeOfElement()->getByteSize(); + } else if (strValue->getType()->isPointerTy()) { + if (const SVFType* elemType = getPointeeElement(_svfir->getValueNode(strValue))) { + elemSize = elemType->getByteSize(); + } + else { + elemSize = 1; + } + } else { + assert(false && "we cannot support this type"); + } + } + if (len == 0) { + return IntervalValue((s64_t)0, (s64_t)Options::MaxFieldLimit()); + } else { + return IntervalValue(len * elemSize); + } +} + + +void AEAPI::handleStrcat(const SVF::CallICFGNode *call) { + // __strcat_chk, strcat, __wcscat_chk, wcscat, __strncat_chk, strncat, __wcsncat_chk, wcsncat + // to check it is strcat group or strncat group + const SVFFunction *fun = SVFUtil::getCallee(call->getCallSite()); + const std::vector strcatGroup = {"__strcat_chk", "strcat", "__wcscat_chk", "wcscat"}; + const std::vector strncatGroup = {"__strncat_chk", "strncat", "__wcsncat_chk", "wcsncat"}; + if (std::find(strcatGroup.begin(), strcatGroup.end(), fun->getName()) != strcatGroup.end()) { + CallSite cs = SVFUtil::getSVFCallSite(call->getCallSite()); + const SVFValue* arg0Val = cs.getArgument(0); + const SVFValue* arg1Val = cs.getArgument(1); + IntervalValue strLen0 = getStrlen(arg0Val); + IntervalValue strLen1 = getStrlen(arg1Val); + IntervalValue totalLen = strLen0 + strLen1; + handleMemcpy(arg0Val, arg1Val, strLen1, strLen0.lb().getNumeral()); + // do memcpy + } else if (std::find(strncatGroup.begin(), strncatGroup.end(), fun->getName()) != strncatGroup.end()) { + CallSite cs = SVFUtil::getSVFCallSite(call->getCallSite()); + const SVFValue* arg0Val = cs.getArgument(0); + const SVFValue* arg1Val = cs.getArgument(1); + const SVFValue* arg2Val = cs.getArgument(2); + IntervalValue arg2Num = _ae->_svfir2ExeState->getEs()[_svfir->getValueNode(arg2Val)]; + IntervalValue strLen0 = getStrlen(arg0Val); + IntervalValue totalLen = strLen0 + arg2Num; + handleMemcpy(arg0Val, arg1Val, arg2Num, strLen0.lb().getNumeral()); + // do memcpy + } else { + assert(false && "unknown strcat function, please add it to strcatGroup or strncatGroup"); + } +} + +void AEAPI::handleMemcpy(const SVF::SVFValue *dst, const SVF::SVFValue *src, SVF::IntervalValue len, u32_t start_idx) { + IntervalExeState &es = _ae->_svfir2ExeState->getEs(); + u32_t dstId = _svfir->getValueNode(dst); // pts(dstId) = {objid} objbar objtypeinfo->getType(). + u32_t srcId = _svfir->getValueNode(src); + u32_t elemSize = 1; + if (dst->getType()->isArrayTy()) { + elemSize = SVFUtil::dyn_cast(dst->getType())->getTypeOfElement()->getByteSize(); + } + // memcpy(i32*, i32*, 40) + else if (dst->getType()->isPointerTy()) { + if (const SVFType* elemType = getPointeeElement(_svfir->getValueNode(dst))) { + if (elemType->isArrayTy()) + elemSize = SVFUtil::dyn_cast(elemType)->getTypeOfElement()->getByteSize(); + else + elemSize = elemType->getByteSize(); + } + else { + elemSize = 1; + } + } + else { + assert(false && "we cannot support this type"); + } + u32_t size = std::min((u32_t)Options::MaxFieldLimit(), (u32_t) len.lb().getNumeral()); + u32_t range_val = size / elemSize; + if (_ae->_svfir2ExeState->inVarToAddrsTable(srcId) && _ae->_svfir2ExeState->inVarToAddrsTable(dstId)) { + for (u32_t index = 0; index < range_val; index++) { + // dead loop for string and break if there's a \0. If no \0, it will throw err. + Addrs expr_src = _ae->_svfir2ExeState->getGepObjAddress(srcId, index); + Addrs expr_dst = _ae->_svfir2ExeState->getGepObjAddress(dstId, index + start_idx); + for (const auto &dst: expr_dst) { + for (const auto &src: expr_src) { + u32_t objId = ExeState::getInternalID(src); + if (es.inLocToValTable(objId)) { + es.store(dst, es.load(src)); + } else if (es.inLocToAddrsTable(objId)) { + es.storeAddrs(dst, es.loadAddrs(src)); + } + } + } + } + } +} + +const SVFType* AEAPI::getPointeeElement(NodeID id) { + assert(_ae->_svfir2ExeState->inVarToAddrsTable(id) && "id is not in varToAddrsTable"); + if (_ae->_svfir2ExeState->inVarToAddrsTable(id)) { + const Addrs& addrs = _ae->_svfir2ExeState->getAddrs(id); + for (auto addr: addrs) { + NodeID addr_id = _ae->_svfir2ExeState->getInternalID(addr); + if (addr_id == 0) // nullptr has no memobj, skip + continue; + return SVFUtil::dyn_cast(_svfir->getGNode(addr_id))->getMemObj()->getType(); + } + } + return nullptr; +} + +void AEAPI::handleMemset(const SVF::SVFValue *dst, SVF::IntervalValue elem, SVF::IntervalValue len) { + IntervalExeState &es = _ae->_svfir2ExeState->getEs(); + u32_t dstId = _svfir->getValueNode(dst); + u32_t size = std::min((u32_t)Options::MaxFieldLimit(), (u32_t) len.lb().getNumeral()); + u32_t elemSize = 1; + if (dst->getType()->isArrayTy()) { + elemSize = SVFUtil::dyn_cast(dst->getType())->getTypeOfElement()->getByteSize(); + } + else if (dst->getType()->isPointerTy()) { + if (const SVFType* elemType = getPointeeElement(_svfir->getValueNode(dst))) { + elemSize = elemType->getByteSize(); + } + else { + elemSize = 1; + } + } + else { + assert(false && "we cannot support this type"); + } + + u32_t range_val = size / elemSize; + for (u32_t index = 0; index < range_val; index++) { + // dead loop for string and break if there's a \0. If no \0, it will throw err. + if (_ae->_svfir2ExeState->inVarToAddrsTable(dstId)) { + Addrs lhs_gep = _ae->_svfir2ExeState->getGepObjAddress(dstId, index); + for (const auto &addr: lhs_gep) { + u32_t objId = ExeState::getInternalID(addr); + if (es.inLocToValTable(objId)) { + IntervalValue tmp = es.load(addr); + tmp.join_with(IntervalValue(elem)); + es.store(addr, tmp); + } else { + es.store(addr, IntervalValue(elem)); + } + } + } else + break; + } +} + + + +void AEAPI::AccessMemoryViaRetNode(const CallICFGNode *callnode, SVF::FILOWorkList& worklist, Set& visited) { + if (callnode->getRetICFGNode()->getSVFStmts().size() > 0) { + const RetPE *ret = SVFUtil::dyn_cast(*callnode->getRetICFGNode()->getSVFStmts().begin()); + SVF::ValVar *ret_gnode = SVFUtil::dyn_cast(_svfir->getGNode(ret->getRHSVar()->getId())); + if (ret_gnode->hasIncomingEdges(SVFStmt::PEDGEK::Phi)) { + const SVFStmt::SVFStmtSetTy &stmt_set = ret_gnode->getIncomingEdges(SVFStmt::PEDGEK::Phi); + for (auto it = stmt_set.begin(); it != stmt_set.end(); ++it) { + const SVFStmt *stmt = *it; + if (const PhiStmt *phi = SVFUtil::dyn_cast(stmt)) { + if (!visited.count(phi->getOpVar(0)->getValue())) { + worklist.push(phi->getOpVar(0)->getValue()); + visited.insert(phi->getOpVar(0)->getValue()); + } + } + } + } + } +} + +void AEAPI::AccessMemoryViaCopyStmt(const CopyStmt *copy, SVF::FILOWorkList& worklist, Set& visited) { + if (!visited.count(copy->getRHSVar()->getValue())) { + visited.insert(copy->getRHSVar()->getValue()); + worklist.push(copy->getRHSVar()->getValue()); + } +} + +void AEAPI::AccessMemoryViaLoadStmt(const LoadStmt *load, SVF::FILOWorkList& worklist, Set& visited) { + if (_ae->_svfir2ExeState->inVarToAddrsTable(load->getLHSVarID())) { + const ExeState::Addrs &Addrs = _ae->_svfir2ExeState->getAddrs(load->getLHSVarID()); + for (auto vaddr: Addrs) { + NodeID id = _ae->_svfir2ExeState->getInternalID(vaddr); + if (id == 0) // nullptr has no memobj, skip + continue; + const auto *val = _svfir->getGNode(id); + if (!visited.count(val->getValue())) { + visited.insert(val->getValue()); + worklist.push(val->getValue()); + } + } + } +} + +void AEAPI::AccessMemoryViaCallArgs(const SVF::SVFArgument *arg, + SVF::FILOWorkList &worklist, + Set &visited) { + std::vector callstack = _ae->_callSiteStack; + SVF::ValVar *arg_gnode = SVFUtil::cast(_svfir->getGNode(_svfir->getValueNode(arg))); + if (arg_gnode->hasIncomingEdges(SVFStmt::PEDGEK::Call)) { + while (!callstack.empty()) { + const CallICFGNode *cur_call = callstack.back(); + callstack.pop_back(); + for (const SVFStmt *stmt: cur_call->getSVFStmts()) { + if (const CallPE *callPE = SVFUtil::dyn_cast(stmt)) { + if (callPE->getLHSVarID() == _svfir->getValueNode(arg)) { + if (!SVFUtil::isa(callPE->getRHSVar()) && + !SVFUtil::isa(callPE->getRHSVar())) { + if (!visited.count(callPE->getRHSVar()->getValue())) { + visited.insert(callPE->getRHSVar()->getValue()); + worklist.push(callPE->getRHSVar()->getValue()); + break; + } + } + } + } + } + } + } +} + diff --git a/svf/lib/AE/Svfexe/BufOverflowChecker.cpp b/svf/lib/AE/Svfexe/BufOverflowChecker.cpp new file mode 100644 index 000000000..e501d79f5 --- /dev/null +++ b/svf/lib/AE/Svfexe/BufOverflowChecker.cpp @@ -0,0 +1,716 @@ +//===- BufOverflowChecker.cpp -- BufOVerflowChecker Client for Abstract Execution---// +// +// SVF: Static Value-Flow Analysis +// +// Copyright (C) <2013-> +// + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. + +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . +// +//===----------------------------------------------------------------------===// + + +// +// Created by Jiawei Wang on 2024/1/12. +// + +#include "AE/Svfexe/BufOverflowChecker.h" +#include "SVFIR/SVFType.h" +#include "Util/Options.h" +#include + +namespace SVF { + +std::string IntervalToIntStr(const IntervalValue& inv) { + if (inv.is_infinite()) { + return inv.toString(); + } else { + int64_t lb_val = inv.lb().getNumeral(); + int64_t ub_val = inv.ub().getNumeral(); + + // check lb + s32_t lb_s32 = (lb_val < static_cast(INT_MIN)) ? INT_MIN : + (lb_val > static_cast(INT_MAX)) ? INT_MAX : + static_cast(lb_val); + + // check ub + s32_t ub_s32 = (ub_val < static_cast(INT_MIN)) ? INT_MIN : + (ub_val > static_cast(INT_MAX)) ? INT_MAX : + static_cast(ub_val); + + return "[" + std::to_string(lb_s32) + ", " + std::to_string(ub_s32) + "]"; + } + } + +void BufOverflowChecker::handleSVFStatement(const SVFStmt *stmt) +{ + AE::handleSVFStatement(stmt); + // for gep stmt, add the gep stmt to the addrToGep map + if (const GepStmt *gep = SVFUtil::dyn_cast(stmt)) { + for (NodeID addrID: _svfir2ExeState->getAddrs(gep->getLHSVarID())) { + NodeID objId = _svfir2ExeState->getInternalID(addrID); + if (auto* extapi = SVFUtil::dyn_cast(_api)) + extapi->_addrToGep[objId] = gep; + } + } +} + +void BufOverflowCheckerAPI::initExtAPIBufOverflowCheckRules() { + //void llvm_memcpy_p0i8_p0i8_i64(char* dst, char* src, int sz, int flag){} + _extAPIBufOverflowCheckRules["llvm_memcpy_p0i8_p0i8_i64"] = {{0, 2}, {1,2}}; + //void llvm_memcpy_p0_p0_i64(char* dst, char* src, int sz, int flag){} + _extAPIBufOverflowCheckRules["llvm_memcpy_p0_p0_i64"] = {{0, 2}, {1,2}}; + //void llvm_memcpy_p0i8_p0i8_i32(char* dst, char* src, int sz, int flag){} + _extAPIBufOverflowCheckRules["llvm_memcpy_p0i8_p0i8_i32"] = {{0, 2}, {1,2}}; + //void llvm_memcpy(char* dst, char* src, int sz, int flag){} + _extAPIBufOverflowCheckRules["llvm_memcpy"] = {{0, 2}, {1,2}}; + //void llvm_memmove(char* dst, char* src, int sz, int flag){} + _extAPIBufOverflowCheckRules["llvm_memmove"] = {{0, 2}, {1,2}}; + //void llvm_memmove_p0i8_p0i8_i64(char* dst, char* src, int sz, int flag){} + _extAPIBufOverflowCheckRules["llvm_memmove_p0i8_p0i8_i64"] = {{0, 2}, {1,2}}; + //void llvm_memmove_p0_p0_i64(char* dst, char* src, int sz, int flag){} + _extAPIBufOverflowCheckRules["llvm_memmove_p0_p0_i64"] = {{0, 2}, {1,2}}; + //void llvm_memmove_p0i8_p0i8_i32(char* dst, char* src, int sz, int flag){} + _extAPIBufOverflowCheckRules["llvm_memmove_p0i8_p0i8_i32"] = {{0, 2}, {1,2}}; + //void __memcpy_chk(char* dst, char* src, int sz, int flag){} + _extAPIBufOverflowCheckRules["__memcpy_chk"] = {{0, 2}, {1,2}}; + //void *memmove(void *str1, const void *str2, unsigned long n) + _extAPIBufOverflowCheckRules["memmove"] = {{0, 2}, {1,2}}; + //void bcopy(const void *s1, void *s2, unsigned long n){} + _extAPIBufOverflowCheckRules["bcopy"] = {{0, 2}, {1,2}}; + //void *memccpy( void * restrict dest, const void * restrict src, int c, unsigned long count) + _extAPIBufOverflowCheckRules["memccpy"] = {{0, 3}, {1,3}}; + //void __memmove_chk(char* dst, char* src, int sz){} + _extAPIBufOverflowCheckRules["__memmove_chk"] = {{0, 2}, {1,2}}; + //void llvm_memset(char* dst, char elem, int sz, int flag){} + _extAPIBufOverflowCheckRules["llvm_memset"] = {{0, 2}}; + //void llvm_memset_p0i8_i32(char* dst, char elem, int sz, int flag){} + _extAPIBufOverflowCheckRules["llvm_memset_p0i8_i32"] = {{0, 2}}; + //void llvm_memset_p0i8_i64(char* dst, char elem, int sz, int flag){} + _extAPIBufOverflowCheckRules["llvm_memset_p0i8_i64"] = {{0, 2}}; + //void llvm_memset_p0_i64(char* dst, char elem, int sz, int flag){} + _extAPIBufOverflowCheckRules["llvm_memset_p0_i64"] = {{0, 2}}; + //char *__memset_chk(char * dest, int c, unsigned long destlen, int flag) + _extAPIBufOverflowCheckRules["__memset_chk"] = {{0, 2}}; + //char *wmemset(wchar_t * dst, wchar_t elem, int sz, int flag) { + _extAPIBufOverflowCheckRules["wmemset"] = {{0, 2}}; + //char *strncpy(char *dest, const char *src, unsigned long n) + _extAPIBufOverflowCheckRules["strncpy"] = {{0, 2}, {1,2}}; + //unsigned long iconv(void* cd, char **restrict inbuf, unsigned long *restrict inbytesleft, char **restrict outbuf, unsigned long *restrict outbytesleft) + _extAPIBufOverflowCheckRules["iconv"] = {{1, 2}, {3, 4}}; +} + + +bool BufOverflowCheckerAPI::detectStrcpy(const CallICFGNode *call) +{ + CallSite cs = SVFUtil::getSVFCallSite(call->getCallSite()); + const SVFValue* arg0Val = cs.getArgument(0); + const SVFValue* arg1Val = cs.getArgument(1); + IntervalValue strLen = getStrlen(arg1Val); + // no need to -1, since it has \0 as the last byte + return canSafelyAccessMemory(arg0Val, strLen, call); +} + +void BufOverflowCheckerAPI::initExtFunMap() { + + auto sse_scanf = [&](const CallSite &cs) { + //scanf("%d", &data); + BufOverflowChecker* ae = SVFUtil::dyn_cast(_ae); + if (cs.arg_size() < 2) return; + IntervalExeState &es = ae->_svfir2ExeState->getEs(); + u32_t dst_id = _svfir->getValueNode(cs.getArgument(1)); + if (!ae->_svfir2ExeState->inVarToAddrsTable(dst_id)) { + BufOverflowException bug("scanf may cause buffer overflow.\n", 0, 0, 0, 0, cs.getArgument(1)); + ae->addBugToRecoder(bug, _svfir->getICFG()->getICFGNode(cs.getInstruction())); + return; + } else { + Addrs Addrs = ae->_svfir2ExeState->getAddrs(dst_id); + for (auto vaddr: Addrs) { + u32_t objId = ae->_svfir2ExeState->getInternalID(vaddr); + IntervalValue range = ae->_svfir2ExeState->getRangeLimitFromType(_svfir->getGNode(objId)->getType()); + es.store(vaddr, range); + } + } + }; + auto sse_fscanf = [&](const CallSite &cs) { + //fscanf(stdin, "%d", &data); + BufOverflowChecker* ae = SVFUtil::dyn_cast(_ae); + if (cs.arg_size() < 3) return; + IntervalExeState &es = ae->_svfir2ExeState->getEs(); + u32_t dst_id = _svfir->getValueNode(cs.getArgument(2)); + if (!ae->_svfir2ExeState->inVarToAddrsTable(dst_id)) { + BufOverflowException bug("scanf may cause buffer overflow.\n", 0, 0, 0, 0, cs.getArgument(2)); + ae->addBugToRecoder(bug, _svfir->getICFG()->getICFGNode(cs.getInstruction())); + return; + } else { + Addrs Addrs = ae->_svfir2ExeState->getAddrs(dst_id); + for (auto vaddr: Addrs) { + u32_t objId = ae->_svfir2ExeState->getInternalID(vaddr); + IntervalValue range = ae->_svfir2ExeState->getRangeLimitFromType(_svfir->getGNode(objId)->getType()); + es.store(vaddr, range); + } + } + }; + + _func_map["__isoc99_fscanf"] = sse_fscanf; + _func_map["__isoc99_scanf"] = sse_scanf; + _func_map["__isoc99_vscanf"] = sse_scanf; + _func_map["fscanf"] = sse_fscanf; + _func_map["scanf"] = sse_scanf; + _func_map["sscanf"] = sse_scanf; + _func_map["__isoc99_sscanf"] = sse_scanf; + _func_map["vscanf"] = sse_scanf; + + auto sse_fread = [&](const CallSite &cs) { + BufOverflowChecker* ae = SVFUtil::dyn_cast(_ae); + if (cs.arg_size() < 3) return; + IntervalExeState &es = ae->_svfir2ExeState->getEs(); + u32_t block_count_id = _svfir->getValueNode(cs.getArgument(2)); + u32_t block_size_id = _svfir->getValueNode(cs.getArgument(1)); + IntervalValue block_count = es[block_count_id]; + IntervalValue block_size = es[block_size_id]; + IntervalValue block_byte = block_count * block_size; + canSafelyAccessMemory(cs.getArgument(0), block_byte, _svfir->getICFG()->getICFGNode(cs.getInstruction())); + }; + _func_map["fread"] = sse_fread; + + auto sse_sprintf = [&](const CallSite &cs) { + // printf is difficult to predict since it has no byte size arguments + }; + + auto sse_snprintf = [&](const CallSite &cs) { + BufOverflowChecker* ae = SVFUtil::dyn_cast(_ae); + if (cs.arg_size() < 2) return; + IntervalExeState &es = ae->_svfir2ExeState->getEs(); + u32_t size_id = _svfir->getValueNode(cs.getArgument(1)); + u32_t dst_id = _svfir->getValueNode(cs.getArgument(0)); + // get elem size of arg2 + u32_t elemSize = 1; + if (cs.getArgument(2)->getType()->isArrayTy()) { + elemSize = SVFUtil::dyn_cast(cs.getArgument(2)->getType())->getTypeOfElement()->getByteSize(); + } + else if (cs.getArgument(2)->getType()->isPointerTy()) { + elemSize = getPointeeElement(_svfir->getValueNode(cs.getArgument(2)))->getByteSize(); + } + else { + return; + // assert(false && "we cannot support this type"); + } + IntervalValue size = es[size_id] * IntervalValue(elemSize) - IntervalValue(1); + if (!es.inVarToAddrsTable(dst_id)) { + if (Options::BufferOverflowCheck()) + { + BufOverflowException bug( + "snprintf dst_id or dst is not defined nor initializesd.\n", + 0, 0, 0, 0, cs.getArgument(0)); + ae->addBugToRecoder(bug, _svfir->getICFG()->getICFGNode(cs.getInstruction())); + return; + } + } + canSafelyAccessMemory(cs.getArgument(0), size, _svfir->getICFG()->getICFGNode(cs.getInstruction())); + }; + _func_map["__snprintf_chk"] = sse_snprintf; + _func_map["__vsprintf_chk"] = sse_sprintf; + _func_map["__sprintf_chk"] = sse_sprintf; + _func_map["snprintf"] = sse_snprintf; + _func_map["sprintf"] = sse_sprintf; + _func_map["vsprintf"] = sse_sprintf; + _func_map["vsnprintf"] = sse_snprintf; + _func_map["__vsnprintf_chk"] = sse_snprintf; + _func_map["swprintf"] = sse_snprintf; + _func_map["_snwprintf"] = sse_snprintf; + + + auto sse_itoa = [&](const CallSite &cs) { + // itoa(num, ch, 10); + // num: int, ch: char*, 10 is decimal + BufOverflowChecker* ae = SVFUtil::dyn_cast(_ae); + if (cs.arg_size() < 3) return; + IntervalExeState &es = ae->_svfir2ExeState->getEs(); + u32_t num_id = _svfir->getValueNode(cs.getArgument(0)); + + u32_t num = (u32_t) es[num_id].getNumeral(); + std::string snum = std::to_string(num); + canSafelyAccessMemory(cs.getArgument(1), IntervalValue((s32_t)snum.size()), _svfir->getICFG()->getICFGNode(cs.getInstruction())); + }; + _func_map["itoa"] = sse_itoa; + + + auto sse_strlen = [&](const CallSite &cs) { + // check the arg size + BufOverflowChecker* ae = SVFUtil::dyn_cast(_ae); + if (cs.arg_size() < 1) return; + const SVFValue* strValue = cs.getArgument(0); + IntervalExeState &es = ae->_svfir2ExeState->getEs(); + IntervalValue dst_size = getStrlen(strValue); + u32_t elemSize = 1; + if (strValue->getType()->isArrayTy()) { + elemSize = SVFUtil::dyn_cast(strValue->getType())->getTypeOfElement()->getByteSize(); + } + else if (strValue->getType()->isPointerTy()) { + elemSize = getPointeeElement(_svfir->getValueNode(strValue))->getByteSize(); + } + u32_t lhsId = _svfir->getValueNode(cs.getInstruction()); + es[lhsId] = dst_size / IntervalValue(elemSize); + }; + _func_map["strlen"] = sse_strlen; + _func_map["wcslen"] = sse_strlen; + + auto sse_recv = [&](const CallSite &cs) { + // recv(sockfd, buf, len, flags); + BufOverflowChecker* ae = SVFUtil::dyn_cast(_ae); + if (cs.arg_size() < 4) return; + IntervalExeState &es = ae->_svfir2ExeState->getEs(); + u32_t len_id = _svfir->getValueNode(cs.getArgument(2)); + IntervalValue len = es[len_id] - IntervalValue(1); + u32_t lhsId = _svfir->getValueNode(cs.getInstruction()); + es[lhsId] = len; + canSafelyAccessMemory(cs.getArgument(1), len, _svfir->getICFG()->getICFGNode(cs.getInstruction()));; + }; + _func_map["recv"] = sse_recv; + _func_map["__recv"] = sse_recv; + auto safe_bufaccess = [&](const CallSite &cs) { + const CallICFGNode* callNode = SVFUtil::dyn_cast(_svfir->getICFG()->getICFGNode(cs.getInstruction())); + _checkpoints.erase(callNode); + //void SAFE_BUFACCESS(void* data, int size); + BufOverflowChecker* ae = SVFUtil::dyn_cast(_ae); + if (cs.arg_size() < 2) return; + IntervalExeState &es = ae->_svfir2ExeState->getEs(); + u32_t size_id = _svfir->getValueNode(cs.getArgument(1)); + IntervalValue val = es[size_id]; + if (val.isBottom()) { + val = IntervalValue(0); + assert(false && "SAFE_BUFACCESS size is bottom"); + } + bool isSafe = canSafelyAccessMemory(cs.getArgument(0), val, _svfir->getICFG()->getICFGNode(cs.getInstruction())); + if (isSafe) { + std::cout << "safe buffer access success\n"; + return; + } + else { + std::string err_msg = "this SAFE_BUFACCESS should be a safe access but detected buffer overflow. Pos: "; + err_msg += cs.getInstruction()->getSourceLoc(); + std::cerr << err_msg << std::endl; + assert(false); + } + }; + _func_map["SAFE_BUFACCESS"] = safe_bufaccess; + + auto unsafe_bufaccess = [&](const CallSite &cs) { + const CallICFGNode* callNode = SVFUtil::dyn_cast(_svfir->getICFG()->getICFGNode(cs.getInstruction())); + _checkpoints.erase(callNode); + //void UNSAFE_BUFACCESS(void* data, int size); + BufOverflowChecker* ae = SVFUtil::dyn_cast(_ae); + if (cs.arg_size() < 2) return; + IntervalExeState &es = ae->_svfir2ExeState->getEs(); + u32_t size_id = _svfir->getValueNode(cs.getArgument(1)); + IntervalValue val = es[size_id]; + if (val.isBottom()) { + assert(false && "UNSAFE_BUFACCESS size is bottom"); + } + bool isSafe = canSafelyAccessMemory(cs.getArgument(0), val, _svfir->getICFG()->getICFGNode(cs.getInstruction())); + if (!isSafe) { + std::cout << "detect buffer overflow success\n"; + return; + } else { + // if it is safe, it means it is wrongly labeled, assert false. + std::string err_msg = "this UNSAFE_BUFACCESS should be a buffer overflow but not detected. Pos: "; + err_msg += cs.getInstruction()->getSourceLoc(); + std::cerr << err_msg << std::endl; + assert(false); + } + }; + _func_map["UNSAFE_BUFACCESS"] = unsafe_bufaccess; + + // init _checkpoint_names + _checkpoint_names.insert("SAFE_BUFACCESS"); + _checkpoint_names.insert("UNSAFE_BUFACCESS"); +} + +bool BufOverflowCheckerAPI::detectStrcat(const CallICFGNode *call) +{ + BufOverflowChecker* ae = SVFUtil::dyn_cast(_ae); + const SVFFunction *fun = SVFUtil::getCallee(call->getCallSite()); + // check the arg size + // if it is strcat group, we need to check the length of string, + // e.g. strcat(str1, str2); which checks AllocSize(str1) >= Strlen(str1) + Strlen(str2); + // if it is strncat group, we do not need to check the length of string, + // e.g. strncat(str1, str2, n); which checks AllocSize(str1) >= Strlen(str1) + n; + + const std::vector strcatGroup = {"__strcat_chk", "strcat", "__wcscat_chk", "wcscat"}; + const std::vector strncatGroup = {"__strncat_chk", "strncat", "__wcsncat_chk", "wcsncat"}; + if (std::find(strcatGroup.begin(), strcatGroup.end(), fun->getName()) != strcatGroup.end()) { + CallSite cs = SVFUtil::getSVFCallSite(call->getCallSite()); + const SVFValue* arg0Val = cs.getArgument(0); + const SVFValue* arg1Val = cs.getArgument(1); + IntervalValue strLen0 = getStrlen(arg0Val); + IntervalValue strLen1 = getStrlen(arg1Val); + IntervalValue totalLen = strLen0 + strLen1; + return canSafelyAccessMemory(arg0Val, totalLen, call); + } else if (std::find(strncatGroup.begin(), strncatGroup.end(), fun->getName()) != strncatGroup.end()) { + CallSite cs = SVFUtil::getSVFCallSite(call->getCallSite()); + const SVFValue* arg0Val = cs.getArgument(0); + const SVFValue* arg2Val = cs.getArgument(2); + IntervalValue arg2Num = ae->_svfir2ExeState->getEs()[_svfir->getValueNode(arg2Val)]; + IntervalValue strLen0 = getStrlen(arg0Val); + IntervalValue totalLen = strLen0 + arg2Num; + return canSafelyAccessMemory(arg0Val, totalLen, call); + } else { + assert(false && "unknown strcat function, please add it to strcatGroup or strncatGroup"); + } +} + +void BufOverflowCheckerAPI::handleExtAPI(const CallICFGNode *call) { + AEAPI::handleExtAPI(call); + BufOverflowChecker* ae = SVFUtil::dyn_cast(_ae); + const SVFFunction *fun = SVFUtil::getCallee(call->getCallSite()); + assert(fun && "SVFFunction* is nullptr"); + CallSite cs = SVFUtil::getSVFCallSite(call->getCallSite()); + // check the type of mem api, + // MEMCPY: like memcpy, memcpy_chk, llvm.memcpy etc. + // MEMSET: like memset, memset_chk, llvm.memset etc. + // STRCPY: like strcpy, strcpy_chk, wcscpy etc. + // STRCAT: like strcat, strcat_chk, wcscat etc. + // for other ext api like printf, scanf, etc., they have their own handlers + ExtAPIType extType = UNCLASSIFIED; + // get type of mem api + for (const std::string &annotation: fun->getAnnotations()) { + if (annotation.find("MEMCPY") != std::string::npos) + extType = MEMCPY; + if (annotation.find("MEMSET") != std::string::npos) + extType = MEMSET; + if (annotation.find("STRCPY") != std::string::npos) + extType = STRCPY; + if (annotation.find("STRCAT") != std::string::npos) + extType = STRCAT; + } + // 1. memcpy functions like memcpy_chk, strncpy, annotate("MEMCPY"), annotate("BUF_CHECK:Arg0, Arg2"), annotate("BUF_CHECK:Arg1, Arg2") + if (extType == MEMCPY) { + if (_extAPIBufOverflowCheckRules.count(fun->getName()) == 0) { + // if it is not in the rules, we do not check it + SVFUtil::errs() << "Warning: " << fun->getName() << " is not in the rules, please implement it\n"; + return; + } + // call parseMemcpyBufferCheckArgs to parse the BUF_CHECK annotation + std::vector> args = _extAPIBufOverflowCheckRules.at(fun->getName()); + // loop the args and check the offset + for (auto arg: args) { + IntervalValue offset = ae->_svfir2ExeState->getEs()[_svfir->getValueNode(cs.getArgument(arg.second))] - IntervalValue(1); + canSafelyAccessMemory(cs.getArgument(arg.first), offset, call); + } + } + // 2. memset functions like memset, memset_chk, annotate("MEMSET"), annotate("BUF_CHECK:Arg0, Arg2") + else if (extType == MEMSET) { + if (_extAPIBufOverflowCheckRules.count(fun->getName()) == 0) { + // if it is not in the rules, we do not check it + SVFUtil::errs() << "Warning: " << fun->getName() << " is not in the rules, please implement it\n"; + return; + } + std::vector> args = _extAPIBufOverflowCheckRules.at(fun->getName()); + // loop the args and check the offset + for (auto arg: args) { + IntervalValue offset = ae->_svfir2ExeState->getEs()[_svfir->getValueNode(cs.getArgument(arg.second))] - IntervalValue(1); + canSafelyAccessMemory(cs.getArgument(arg.first), offset, call); + } + } + else if (extType == STRCPY) { + detectStrcpy(call); + } + else if (extType == STRCAT) { + detectStrcat(call); + } + else { + + } + return; +} + + +void BufOverflowChecker::handleICFGNode(const SVF::ICFGNode *node) { + AE::handleICFGNode(node); + detectBufOverflow(node); +} + +// +bool BufOverflowChecker::detectBufOverflow(const ICFGNode *node) { + + auto *extapi = SVFUtil::dyn_cast(_api); + for (auto* stmt: node->getSVFStmts()) { + if (const GepStmt *gep = SVFUtil::dyn_cast(stmt)) { + const SVFVar* gepRhs = gep->getRHSVar(); + if (const SVFInstruction* inst = SVFUtil::dyn_cast(gepRhs->getValue())) { + const ICFGNode* icfgNode = _svfir->getICFG()->getICFGNode(inst); + for (const SVFStmt* stmt2: icfgNode->getSVFStmts()) { + if (const GepStmt *gep2 = SVFUtil::dyn_cast(stmt2)) { + return extapi->canSafelyAccessMemory(gep2->getLHSVar()->getValue(), IntervalValue(0, 0), node); + } + } + } + } + else if (const LoadStmt* load = SVFUtil::dyn_cast(stmt)) { + if (_svfir2ExeState->inVarToAddrsTable(load->getRHSVarID())) { + ExeState::Addrs Addrs = _svfir2ExeState->getAddrs(load->getRHSVarID()); + for (auto vaddr: Addrs) { + u32_t objId = _svfir2ExeState->getInternalID(vaddr); + if (extapi->_addrToGep.find(objId) != extapi->_addrToGep.end()) { + const GepStmt* gep = extapi->_addrToGep.at(objId); + return extapi->canSafelyAccessMemory(gep->getLHSVar()->getValue(), IntervalValue(0, 0), node); + } + } + } + } + else if (const StoreStmt* store = SVFUtil::dyn_cast(stmt)) { + if (_svfir2ExeState->inVarToAddrsTable(store->getLHSVarID())) { + ExeState::Addrs Addrs = _svfir2ExeState->getAddrs(store->getLHSVarID()); + for (auto vaddr: Addrs) { + u32_t objId = _svfir2ExeState->getInternalID(vaddr); + if (extapi->_addrToGep.find(objId) != extapi->_addrToGep.end()) { + const GepStmt* gep = extapi->_addrToGep.at(objId); + return extapi->canSafelyAccessMemory(gep->getLHSVar()->getValue(), IntervalValue(0, 0), node); + } + } + } + } + } + return true; +} + +void BufOverflowChecker::addBugToRecoder(const BufOverflowException& e, const ICFGNode* node) { + const SVFInstruction* inst = nullptr; + if (const CallICFGNode* call = SVFUtil::dyn_cast(node)) { + inst = call->getCallSite(); + } + else { + inst = node->getSVFStmts().back()->getInst(); + } + GenericBug::EventStack eventStack; + SVFBugEvent sourceInstEvent(SVFBugEvent::EventType::SourceInst, inst); + for (const auto &callsite: _callSiteStack) { + SVFBugEvent callSiteEvent(SVFBugEvent::EventType::CallSite, callsite->getCallSite()); + eventStack.push_back(callSiteEvent); + } + eventStack.push_back(sourceInstEvent); + if (eventStack.size() == 0) return; + std::string loc = eventStack.back().getEventLoc(); + if (_bugLoc.find(loc) != _bugLoc.end()) { + return; + } else { + _bugLoc.insert(loc); + } + _recoder.addAbsExecBug(GenericBug::FULLBUFOVERFLOW, eventStack, e.getAllocLb(), e.getAllocUb(), e.getAccessLb(), + e.getAccessUb()); + _nodeToBugInfo[node] = e.what(); +} + + +bool BufOverflowCheckerAPI::canSafelyAccessMemory(const SVFValue *value, const IntervalValue &len, const ICFGNode *curNode) { + BufOverflowChecker* ae = static_cast(this->_ae); + const SVFValue *firstValue = value; + /// Usually called by a GepStmt overflow check, or external API (like memcpy) overflow check + /// Defitions of Terms: + /// source node: malloc or gepStmt(array), sink node: gepStmt or external API (like memcpy) + /// e.g. 1) a = malloc(10), a[11] = 10, a[11] is the sink node, a is the source node (malloc) + /// 2) A = struct {int a[10];}, A.a[11] = 10, A.a[11] is the sink, A.a is the source node (gepStmt(array)) + + /// it tracks the value flow from sink to source, and accumulates offset + /// then compare the accumulated offset and malloc size (or gepStmt array size) + SVF::FILOWorkList worklist; + Set visited; + visited.insert(value); + Map gep_offsets; + IntervalValue total_bytes = len; + worklist.push(value); + std::vector callstack = ae->_callSiteStack; + while (!worklist.empty()) { + value = worklist.pop(); + if (const SVFInstruction *ins = SVFUtil::dyn_cast(value)) { + const ICFGNode *node = _svfir->getICFG()->getICFGNode(ins); + if (const CallICFGNode *callnode = SVFUtil::dyn_cast(node)) { + AccessMemoryViaRetNode(callnode, worklist, visited); + } + for (const SVFStmt *stmt: node->getSVFStmts()) { + if (const CopyStmt *copy = SVFUtil::dyn_cast(stmt)) { + AccessMemoryViaCopyStmt(copy, worklist, visited); + } else if (const LoadStmt *load = SVFUtil::dyn_cast(stmt)) { + AccessMemoryViaLoadStmt(load, worklist, visited); + } else if (const GepStmt *gep = SVFUtil::dyn_cast(stmt)) { + // there are 3 type of gepStmt + // 1. ptr get offset + // 2. struct get field + // 3. array get element + // for array gep, there are two kind of overflow checking + // Arr [Struct.C * 10] arr, Struct.C {i32 a, i32 b} + // arr[11].a = **, it is "lhs = gep *arr, 0 (ptr), 11 (arrIdx), 0 (ptr), 0(struct field)" + // 1) in this case arrIdx 11 is overflow. + // Other case, + // Struct.C {i32 a, [i32*10] b, i32 c}, C.b[11] = 1 + // it is "lhs - gep *C, 0(ptr), 1(struct field), 0(ptr), 11(arrIdx)" + // 2) in this case arrIdx 11 is larger than its getOffsetVar.Type Array([i32*10]) + + // therefore, if last getOffsetVar.Type is not the Array, just check the overall offset and its + // gep source type size (together with totalOffset along the value flow). + // so if curgepOffset + totalOffset >= gepSrc (overflow) + // else totalOffset += curgepOffset + + // otherwise, if last getOffsetVar.Type is the Array, check the last idx and array. (just offset, + // not with totalOffset during check) + // so if getOffsetVarVal > getOffsetVar.TypeSize (overflow) + // else safe and return. + IntervalValue byteOffset; + if (gep->isConstantOffset()) { + byteOffset = IntervalValue(gep->accumulateConstantByteOffset()); + } else { + byteOffset = ae->_svfir2ExeState->getByteOffset(gep); + } + // for variable offset, join with accumulate gep offset + gep_offsets[gep->getICFGNode()] = byteOffset; + if (byteOffset.ub().getNumeral() >= Options::MaxFieldLimit() && Options::GepUnknownIdx()) { + return true; + } + + if (gep->getOffsetVarAndGepTypePairVec().size() > 0) { + const SVFVar *gepVal = gep->getOffsetVarAndGepTypePairVec().back().first; + const SVFType *gepType = gep->getOffsetVarAndGepTypePairVec().back().second; + + if (gepType->isArrayTy()) { + const SVFArrayType *gepArrType = SVFUtil::dyn_cast(gepType); + IntervalValue gepArrTotalByte(0); + const SVFValue *idxValue = gepVal->getValue(); + u32_t arrElemSize = gepArrType->getTypeOfElement()->getByteSize(); + if (const SVFConstantInt *op = SVFUtil::dyn_cast(idxValue)) { + u32_t lb = (double) Options::MaxFieldLimit() / arrElemSize >= op->getSExtValue() ? + op->getSExtValue() * arrElemSize : Options::MaxFieldLimit(); + gepArrTotalByte = gepArrTotalByte + IntervalValue(lb, lb); + } else { + u32_t idx = _svfir->getValueNode(idxValue); + IntervalValue idxVal = ae->_svfir2ExeState->getEs()[idx]; + if (idxVal.isBottom()) { + gepArrTotalByte = gepArrTotalByte + IntervalValue(0, 0); + } else { + u32_t ub = (idxVal.ub().getNumeral() < 0) ? 0 : + (double) Options::MaxFieldLimit() / arrElemSize >= + idxVal.ub().getNumeral() ? + arrElemSize * idxVal.ub().getNumeral() : Options::MaxFieldLimit(); + u32_t lb = (idxVal.lb().getNumeral() < 0) ? 0 : + ((double) Options::MaxFieldLimit() / arrElemSize >= + idxVal.lb().getNumeral()) ? + arrElemSize * idxVal.lb().getNumeral() : Options::MaxFieldLimit(); + gepArrTotalByte = gepArrTotalByte + IntervalValue(lb, ub); + } + } + total_bytes = total_bytes + gepArrTotalByte; + if (total_bytes.ub().getNumeral() >= gepArrType->getByteSize()) { + std::string msg = + "Buffer overflow!! Accessing buffer range: " + + IntervalToIntStr(total_bytes) + + "\nAllocated Gep buffer size: " + + std::to_string(gepArrType->getByteSize()) + "\n"; + msg += "Position: " + firstValue->toString() + "\n"; + msg += " The following is the value flow. [[\n"; + for (auto it = gep_offsets.begin(); it != gep_offsets.end(); ++it) { + msg += it->first->toString() + ", Offset: " + IntervalToIntStr(it->second) + + "\n"; + } + msg += "]].\nAlloc Site: " + gep->toString() + "\n"; + + BufOverflowException bug(SVFUtil::errMsg(msg), gepArrType->getByteSize(), + gepArrType->getByteSize(), + total_bytes.lb().getNumeral(), total_bytes.ub().getNumeral(), + firstValue); + ae->addBugToRecoder(bug, curNode); + return false; + } else { + // for gep last index's type is arr, stop here. + return true; + } + } else { + total_bytes = total_bytes + byteOffset; + } + + } + if (!visited.count(gep->getRHSVar()->getValue())) { + visited.insert(gep->getRHSVar()->getValue()); + worklist.push(gep->getRHSVar()->getValue()); + } + } else if (const AddrStmt *addr = SVFUtil::dyn_cast(stmt)) { + // addrStmt is source node. + u32_t arr_type_size = getAllocaInstByteSize(addr); + if (total_bytes.ub().getNumeral() >= arr_type_size || + total_bytes.lb().getNumeral() < 0) { + std::string msg = + "Buffer overflow!! Accessing buffer range: " + IntervalToIntStr(total_bytes) + + "\nAllocated buffer size: " + std::to_string(arr_type_size) + "\n"; + msg += "Position: " + firstValue->toString() + "\n"; + msg += " The following is the value flow. [[\n"; + for (auto it = gep_offsets.begin(); it != gep_offsets.end(); ++it) { + msg += it->first->toString() + ", Offset: " + IntervalToIntStr(it->second) + "\n"; + } + msg += "]].\n Alloc Site: " + addr->toString() + "\n"; + BufOverflowException bug(SVFUtil::wrnMsg(msg), arr_type_size, arr_type_size, + total_bytes.lb().getNumeral(), total_bytes.ub().getNumeral(), + firstValue); + ae->addBugToRecoder(bug, curNode); + return false; + } else { + + return true; + } + } + } + } + else if (const SVF::SVFGlobalValue *gvalue = SVFUtil::dyn_cast(value)) { + u32_t arr_type_size = 0; + const SVFType *svftype = gvalue->getType(); + if (SVFUtil::isa(svftype)) { + if (const SVFArrayType *ptrArrType = SVFUtil::dyn_cast( + getPointeeElement(_svfir->getValueNode(gvalue)))) + arr_type_size = ptrArrType->getByteSize(); + else + arr_type_size = svftype->getByteSize(); + } else + arr_type_size = svftype->getByteSize(); + + if (total_bytes.ub().getNumeral() >= arr_type_size || total_bytes.lb().getNumeral() < 0) { + std::string msg = "Buffer overflow!! Accessing buffer range: " + IntervalToIntStr(total_bytes) + + "\nAllocated buffer size: " + std::to_string(arr_type_size) + "\n"; + msg += "Position: " + firstValue->toString() + "\n"; + msg += " The following is the value flow.\n[["; + for (auto it = gep_offsets.begin(); it != gep_offsets.end(); ++it) { + msg += it->first->toString() + ", Offset: " + IntervalToIntStr(it->second) + "\n"; + } + msg += "]]. \nAlloc Site: " + gvalue->toString() + "\n"; + + BufOverflowException bug(SVFUtil::wrnMsg(msg), arr_type_size, arr_type_size, + total_bytes.lb().getNumeral(), total_bytes.ub().getNumeral(), firstValue); + ae->addBugToRecoder(bug, curNode); + return false; + } else { + return true; + } + } + else if (const SVF::SVFArgument *arg = SVFUtil::dyn_cast(value)) { + AccessMemoryViaCallArgs(arg, worklist, visited); + } else { + // maybe SVFConstant + // it may be cannot find the source, maybe we start from non-main function, + // therefore it loses the value flow track + return true; + } + } + // it may be cannot find the source, maybe we start from non-main function, + // therefore it loses the value flow track + return true; +} + +} diff --git a/svf/lib/AbstractExecution/SVFIR2ConsExeState.cpp b/svf/lib/AE/Svfexe/SVFIR2ConsExeState.cpp similarity index 99% rename from svf/lib/AbstractExecution/SVFIR2ConsExeState.cpp rename to svf/lib/AE/Svfexe/SVFIR2ConsExeState.cpp index 4d1a94bd9..4bd02d7be 100644 --- a/svf/lib/AbstractExecution/SVFIR2ConsExeState.cpp +++ b/svf/lib/AE/Svfexe/SVFIR2ConsExeState.cpp @@ -25,7 +25,7 @@ // -#include "AbstractExecution/SVFIR2ConsExeState.h" +#include "AE/Svfexe/SVFIR2ConsExeState.h" #include "Util/Options.h" using namespace SVF; diff --git a/svf/lib/AbstractExecution/SVFIR2ItvExeState.cpp b/svf/lib/AE/Svfexe/SVFIR2ItvExeState.cpp similarity index 99% rename from svf/lib/AbstractExecution/SVFIR2ItvExeState.cpp rename to svf/lib/AE/Svfexe/SVFIR2ItvExeState.cpp index ce14320a3..43e15f838 100644 --- a/svf/lib/AbstractExecution/SVFIR2ItvExeState.cpp +++ b/svf/lib/AE/Svfexe/SVFIR2ItvExeState.cpp @@ -27,7 +27,7 @@ * */ -#include "AbstractExecution/SVFIR2ItvExeState.h" +#include "AE/Svfexe/SVFIR2ItvExeState.h" #include "Util/Options.h" using namespace SVF; diff --git a/svf/lib/Util/Options.cpp b/svf/lib/Util/Options.cpp index 686cee7f5..8ebe402a8 100644 --- a/svf/lib/Util/Options.cpp +++ b/svf/lib/Util/Options.cpp @@ -839,4 +839,17 @@ const Option Options::LoopBound( 1 ); +const Option Options::WidenDelay( + "widen-delay", "Loop Widen Delay", 3); +const Option Options::Timeout( + "timeout", "time out (seconds), set -1 (no timeout), default 14400s",14400); +const Option Options::OutputName( + "output","output db file","output.db"); +const Option Options::BufferOverflowCheck( + "overflow","Buffer Overflow Detection",false); +const Option Options::GepUnknownIdx( + "gep-unknown-idx","Skip Gep Unknown Index",false); +const Option Options::RunUncallFuncs( + "run-uncall-fun","Skip Gep Unknown Index",false); + } // namespace SVF.