From f1ea9b5c7ac5e390fbb22ef015f153f0ef334137 Mon Sep 17 00:00:00 2001 From: Johannes Blaser Date: Tue, 13 Feb 2024 18:49:35 +0100 Subject: [PATCH 1/2] Changed class name derivation; no longer incorrectly based on LLVM type (in IR) --- svf-llvm/include/SVF-LLVM/ObjTypeInference.h | 6 +- svf-llvm/lib/CppUtil.cpp | 10 +- svf-llvm/lib/ObjTypeInference.cpp | 172 ++++++++++--------- 3 files changed, 104 insertions(+), 84 deletions(-) diff --git a/svf-llvm/include/SVF-LLVM/ObjTypeInference.h b/svf-llvm/include/SVF-LLVM/ObjTypeInference.h index 951be7d86..ec5fcc675 100644 --- a/svf-llvm/include/SVF-LLVM/ObjTypeInference.h +++ b/svf-llvm/include/SVF-LLVM/ObjTypeInference.h @@ -48,7 +48,7 @@ class ObjTypeInference typedef Map ValueToType; typedef std::pair ValueBoolPair; typedef Map> ValueToClassNames; - typedef Map> AllocToClsNameSources; + typedef Map> ObjToClsNameSources; private: @@ -57,7 +57,7 @@ class ObjTypeInference ValueToSources _valueToAllocs; // value allocations (stack, static, heap) cache ValueToClassNames _thisPtrClassNames; // thisptr class name cache ValueToSources _valueToAllocOrClsNameSources; // value alloc/clsname sources cache - AllocToClsNameSources _allocToClsNameSources; // alloc clsname sources cache + ObjToClsNameSources _objToClsNameSources; // alloc clsname sources cache public: @@ -122,7 +122,7 @@ class ObjTypeInference Set &bwFindAllocOrClsNameSources(const Value *startValue); /// forward find class name sources starting from an allocation - Set &fwFindClsNameSources(const CallBase *alloc); + Set &fwFindClsNameSources(const Value *startValue); }; } #endif //SVF_OBJTYPEINFERENCE_H diff --git a/svf-llvm/lib/CppUtil.cpp b/svf-llvm/lib/CppUtil.cpp index c6816e0e0..14bae02e8 100644 --- a/svf-llvm/lib/CppUtil.cpp +++ b/svf-llvm/lib/CppUtil.cpp @@ -28,7 +28,9 @@ */ #include "SVF-LLVM/CppUtil.h" +#include "SVF-LLVM/BasicTypes.h" #include "SVF-LLVM/LLVMUtil.h" +#include "Util/Casting.h" #include "Util/SVFUtil.h" #include "SVF-LLVM/LLVMModule.h" #include "SVF-LLVM/ObjTypeInference.h" @@ -640,9 +642,9 @@ Set cppUtil::extractClsNamesFromFunc(const Function *foo) { assert(foo->hasName() && "foo does not have a name? possible indirect call"); const std::string &name = foo->getName().str(); - if (isConstructor(foo)) + if (isConstructor(foo) || isDestructor(foo)) { - // c++ constructor + // c++ constructor or destructor DemangledName demangledName = cppUtil::demangle(name); return {demangledName.className}; } @@ -797,6 +799,10 @@ bool cppUtil::isClsNameSource(const Value *val) if(!foo) return false; return isConstructor(foo) || isDestructor(foo) || isTemplateFunc(foo) || isDynCast(foo); } + else if (const auto *func = SVFUtil::dyn_cast(val)) + { + return isConstructor(func) || isDestructor(func) || isTemplateFunc(func); + } return false; } diff --git a/svf-llvm/lib/ObjTypeInference.cpp b/svf-llvm/lib/ObjTypeInference.cpp index a716b2516..6d3e950aa 100644 --- a/svf-llvm/lib/ObjTypeInference.cpp +++ b/svf-llvm/lib/ObjTypeInference.cpp @@ -28,9 +28,11 @@ */ #include "SVF-LLVM/ObjTypeInference.h" +#include "SVF-LLVM/BasicTypes.h" #include "SVF-LLVM/LLVMModule.h" #include "SVF-LLVM/LLVMUtil.h" #include "SVF-LLVM/CppUtil.h" +#include "Util/Casting.h" #define TYPE_DEBUG 0 /* Turn this on if you're debugging type inference */ #define ERR_MSG(msg) \ @@ -590,9 +592,13 @@ u32_t ObjTypeInference::objTyToNumFields(const Type *objTy) /*! - * get or infer the class names of thisptr + * get or infer the class names of thisptr; starting from :param:`thisPtr`, will walk backwards to find + * all potential sources for the class name. Valid sources include global or stack variables, heap allocations, + * or C++ dynamic casts/constructors/destructors. + * If the source site is a global/stack/heap variable, find the corresponding constructor/destructor to + * extract the class' name from (since the type of the variable is not reliable but the demangled name is) * @param thisPtr - * @return + * @return a set of all possible type names that :param:`thisPtr` could point to */ Set &ObjTypeInference::inferThisPtrClsName(const Value *thisPtr) { @@ -600,59 +606,63 @@ Set &ObjTypeInference::inferThisPtrClsName(const Value *thisPtr) if (it != _thisPtrClassNames.end()) return it->second; Set names; - auto insertClassNames = [&names](Set &classNames) + + // Lambda for checking a function is a valid name source & extracting a class name from it + auto addNamesFromFunc = [&names](const Function *func) -> void { - names.insert(classNames.begin(), classNames.end()); + ABORT_IFNOT(isClsNameSource(func), "Func is invalid class name source: " + dumpValueAndDbgInfo(func)); + for (auto name : extractClsNamesFromFunc(func)) names.insert(name); }; - // backward find heap allocations or class name sources - Set &vals = bwFindAllocOrClsNameSources(thisPtr); - for (const auto &val: vals) + // Lambda for getting callee & extracting class name for calls to constructors/destructors/template funcs + auto addNamesFromCall = [&names, &addNamesFromFunc](const CallBase *call) -> void { + ABORT_IFNOT(isClsNameSource(call), "Call is invalid class name source: " + dumpValueAndDbgInfo(call)); + + const auto *func = call->getCalledFunction(); + if (isDynCast(func)) names.insert(extractClsNameFromDynCast(call)); + else addNamesFromFunc(func); + }; + + // Walk backwards to find all valid source sites for the pointer (e.g. stack/global/heap variables) + for (const auto &val: bwFindAllocOrClsNameSources(thisPtr)) + { + // A source site is either a constructor/destructor/template function from which the class name can be + // extracted; a call to a C++ constructor/destructor/template function from which the class name can be + // extracted; or an allocation site of an object (i.e. a stack/global/heap variable), from which a + // forward walk can be performed to find calls to C++ constructor/destructor/template functions from + // which the class' name can then be extracted; skip starting pointer if (val == thisPtr) continue; if (const auto *func = SVFUtil::dyn_cast(val)) { - // extract class name from function name - Set classNames = extractClsNamesFromFunc(func); - insertClassNames(classNames); + // Constructor/destructor/template func; extract name from func directly + addNamesFromFunc(func); } - else if (SVFUtil::isa(val)) + else if (isClsNameSource(val)) { - // extract class name from instructions - const Type *type = infersiteToType(val); - const std::string &className = typeToClsName(type); - if (!className.empty()) - { - Set tgt{className}; - insertClassNames(tgt); - } + // Call to constructor/destructor/template func; get callee; extract name from callee + ABORT_IFNOT(SVFUtil::isa(val), "Call source site is not a callbase: " + dumpValueAndDbgInfo(val)); + addNamesFromCall(SVFUtil::cast(val)); } - else if (const auto *callBase = SVFUtil::dyn_cast(val)) + else if (isAlloc(val)) { - if (const Function *callFunc = callBase->getCalledFunction()) + // Stack/global/heap allocation site; walk forward; find constructor/destructor/template calls + ABORT_IFNOT((SVFUtil::isa(val)), + "Alloc site source is not a stack/heap/global variable: " + dumpValueAndDbgInfo(val)); + for (const auto *src : fwFindClsNameSources(val)) { - Set classNames = extractClsNamesFromFunc(callFunc); - insertClassNames(classNames); - if (isDynCast(callFunc)) - { - // dynamic cast - Set tgt{extractClsNameFromDynCast(callBase)}; - insertClassNames(tgt); - } - else if (isNewAlloc(callFunc)) - { - // for heap allocation, we forward find class name sources - Set& srcs = fwFindClsNameSources(callBase); - for (const auto &src: srcs) - { - classNames = extractClsNamesFromFunc(src); - insertClassNames(classNames); - } - } + if (const auto *func = SVFUtil::dyn_cast(src)) addNamesFromFunc(func); + else if (const auto *call = SVFUtil::dyn_cast(src)) addNamesFromCall(SVFUtil::cast(src)); + else ABORT_MSG("Source site from forward walk is invalid: " + dumpValueAndDbgInfo(src)); } } + else + { + ERR_MSG("Unsupported source type found:" + dumpValueAndDbgInfo(val)); + } } + return _thisPtrClassNames[thisPtr] = names; } @@ -711,48 +721,43 @@ Set &ObjTypeInference::bwFindAllocOrClsNameSources(const Value *s workList.push({curValue, true}); } - // current inst reside in cpp self-inference function + // If current value is an instruction inside a constructor/destructor/template, use it as a source if (const auto *inst = SVFUtil::dyn_cast(curValue)) { - if (const Function *foo = inst->getFunction()) + if (const auto *parent = inst->getFunction()) { - if (isConstructor(foo) || isDestructor(foo) || isTemplateFunc(foo) || isDynCast(foo)) - { - insertSource(foo); - if (canUpdate) - { - _valueToAllocOrClsNameSources[curValue] = sources; - } - continue; - } + if (isClsNameSource(parent)) insertSource(parent); } } + + // If the current value is an object (global, heap, stack, etc) or name source (constructor/destructor, + // a C++ dynamic cast, or a template function), use it as a source if (isAlloc(curValue) || isClsNameSource(curValue)) { insertSource(curValue); } - else if (const auto *getElementPtrInst = SVFUtil::dyn_cast(curValue)) + + // Explore the current value further depending on the type of the value; use cached values if possible + if (const auto *getElementPtrInst = SVFUtil::dyn_cast(curValue)) { - insertSource(getElementPtrInst); insertSourcesOrPushWorklist(getElementPtrInst->getPointerOperand()); } else if (const auto *bitCastInst = SVFUtil::dyn_cast(curValue)) { - Value *prevVal = bitCastInst->getOperand(0); - insertSourcesOrPushWorklist(prevVal); + insertSourcesOrPushWorklist(bitCastInst->getOperand(0)); } else if (const auto *phiNode = SVFUtil::dyn_cast(curValue)) { - for (u32_t i = 0; i < phiNode->getNumOperands(); ++i) + for (const auto *op : phiNode->operand_values()) { - insertSourcesOrPushWorklist(phiNode->getOperand(i)); + insertSourcesOrPushWorklist(op); } } else if (const auto *loadInst = SVFUtil::dyn_cast(curValue)) { - for (const auto &use: loadInst->getPointerOperand()->uses()) + for (const auto *user : loadInst->getPointerOperand()->users()) { - if (const auto *storeInst = SVFUtil::dyn_cast(use.getUser())) + if (const auto *storeInst = SVFUtil::dyn_cast(user)) { if (storeInst->getPointerOperand() == loadInst->getPointerOperand()) { @@ -763,9 +768,9 @@ Set &ObjTypeInference::bwFindAllocOrClsNameSources(const Value *s } else if (const auto *argument = SVFUtil::dyn_cast(curValue)) { - for (const auto &use: argument->getParent()->uses()) + for (const auto *user: argument->getParent()->users()) { - if (const auto *callBase = SVFUtil::dyn_cast(use.getUser())) + if (const auto *callBase = SVFUtil::dyn_cast(user)) { // skip function as parameter // e.g., call void @foo(%struct.ssl_ctx_st* %9, i32 (i8*, i32, i32, i8*)* @passwd_callback) @@ -778,7 +783,7 @@ Set &ObjTypeInference::bwFindAllocOrClsNameSources(const Value *s else if (const auto *callBase = SVFUtil::dyn_cast(curValue)) { ABORT_IFNOT(!callBase->doesNotReturn(), "callbase does not return:" + dumpValueAndDbgInfo(callBase)); - if (Function *callee = callBase->getCalledFunction()) + if (const auto *callee = callBase->getCalledFunction()) { if (!callee->isDeclaration()) { @@ -790,47 +795,56 @@ Set &ObjTypeInference::bwFindAllocOrClsNameSources(const Value *s } } } + + // If updating is allowed; store the gathered sources as sources for the current value in the cache if (canUpdate) { _valueToAllocOrClsNameSources[curValue] = sources; } } + return _valueToAllocOrClsNameSources[startValue]; } -Set &ObjTypeInference::fwFindClsNameSources(const CallBase *alloc) +Set &ObjTypeInference::fwFindClsNameSources(const Value *startValue) { + assert(startValue && "startValue was null?"); + // consult cache - auto tIt = _allocToClsNameSources.find(alloc); - if (tIt != _allocToClsNameSources.end()) + auto tIt = _objToClsNameSources.find(startValue); + if (tIt != _objToClsNameSources.end()) { return tIt->second; } - Set clsSources; - // for heap allocation, we forward find class name sources - auto inferViaCppCall = [&clsSources](const CallBase *callBase) + Set sources; + + // Lambda for adding a callee to the sources iff it is a constructor/destructor/template/dyncast + auto inferViaCppCall = [&sources](const CallBase *caller) { - if (!callBase->getCalledFunction()) return; - const Function *constructFoo = callBase->getCalledFunction(); - clsSources.insert(constructFoo); + if (!caller) return; + if (isClsNameSource(caller)) sources.insert(caller); }; - for (const auto &use: alloc->uses()) + + // Find all calls of starting val (or through cast); add as potential source iff applicable + for (const auto *user : startValue->users()) { - if (const auto *cppCall = SVFUtil::dyn_cast(use.getUser())) + if (const auto *caller = SVFUtil::dyn_cast(user)) { - inferViaCppCall(cppCall); + inferViaCppCall(caller); } - else if (const auto *bitCastInst = SVFUtil::dyn_cast(use.getUser())) + else if (const auto *bitcast = SVFUtil::dyn_cast(user)) { - for (const auto &use2: bitCastInst->uses()) + for (const auto *cast_user : bitcast->users()) { - if (const auto *cppCall2 = SVFUtil::dyn_cast(use2.getUser())) + if (const auto *caller = SVFUtil::dyn_cast(cast_user)) { - inferViaCppCall(cppCall2); + inferViaCppCall(caller); } } } } - return _allocToClsNameSources[alloc] = SVFUtil::move(clsSources); -} \ No newline at end of file + + // Store sources in cache for starting value & return the found sources + return _objToClsNameSources[startValue] = SVFUtil::move(sources); +} From 78cb220dcb50ad298f19ba7954a87688ebc584fd Mon Sep 17 00:00:00 2001 From: Johannes Blaser Date: Tue, 13 Feb 2024 20:06:25 +0100 Subject: [PATCH 2/2] Small fix to unused parameter --- svf-llvm/lib/ObjTypeInference.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/svf-llvm/lib/ObjTypeInference.cpp b/svf-llvm/lib/ObjTypeInference.cpp index 6d3e950aa..2150526c9 100644 --- a/svf-llvm/lib/ObjTypeInference.cpp +++ b/svf-llvm/lib/ObjTypeInference.cpp @@ -653,7 +653,7 @@ Set &ObjTypeInference::inferThisPtrClsName(const Value *thisPtr) for (const auto *src : fwFindClsNameSources(val)) { if (const auto *func = SVFUtil::dyn_cast(src)) addNamesFromFunc(func); - else if (const auto *call = SVFUtil::dyn_cast(src)) addNamesFromCall(SVFUtil::cast(src)); + else if (const auto *call = SVFUtil::dyn_cast(src)) addNamesFromCall(call); else ABORT_MSG("Source site from forward walk is invalid: " + dumpValueAndDbgInfo(src)); } }