Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Repeating life cycles #168

Closed
160 changes: 119 additions & 41 deletions source/LifecycleMethod.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,78 @@

namespace marianatrench {

void LifeCycleMethodGraph::addNode(const LifecycleMethodCall& node) {
adj_list_[node];
}

bool LifeCycleMethodGraph::operator==(const LifeCycleMethodGraph& other) const {
if (!(entry_point_ == other.entry_point_)) {
return false;
}

if (adj_list_.size() != other.adj_list_.size()) {
return false;
}

for (const auto& pair : adj_list_) {
const LifecycleMethodCall& node = pair.first;
const std::vector<LifecycleMethodCall>& neighbours = pair.second;

auto it = other.adj_list_.find(node);
if (it == other.adj_list_.end()) {
return false;
}

const std::vector<LifecycleMethodCall>& other_neighbours = it->second;
if (neighbours.size() != other_neighbours.size()) {
return false;
}

for (size_t i = 0; i < neighbours.size(); ++i) {
if (!(neighbours[i] == other_neighbours[i])) {
return false;
}
}
}

return true;
}

void LifeCycleMethodGraph::addEdge(
const LifecycleMethodCall& from,
const LifecycleMethodCall& to) {
adj_list_[from].push_back(to);
}

const std::vector<LifecycleMethodCall>& LifeCycleMethodGraph::getNeighbours(
const LifecycleMethodCall& node) const {
return adj_list_.at(node);
}

LifeCycleMethodGraph LifeCycleMethodGraph::from_json(const Json::Value& value) {
LifeCycleMethodGraph graph;
for (const auto& node_name : value.getMemberNames()) {
const auto& node = value[node_name];
JsonValidation::validate_object(node, "node");
const auto& instructions = node["instructions"];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think those are not necessary anymore?


if(node_name == "entry") {
LifecycleMethodCall entry_point = LifecycleMethodCall::from_json(instructions[0]);
graph.entry_point_ = entry_point;
}

for (const auto& instruction :JsonValidation::null_or_array(node, "instructions")) {
LifecycleMethodCall call = LifecycleMethodCall::from_json(instruction);
graph.addNode(call);
for (const auto& successor : JsonValidation::null_or_array(node, "successors")) {
LifecycleMethodCall successor_call = LifecycleMethodCall::from_json(successor);
graph.addEdge(call, successor_call);
}
}
}
return graph;
}

LifecycleMethodCall LifecycleMethodCall::from_json(const Json::Value& value) {
auto method_name = JsonValidation::string(value, "method_name");
auto return_type = JsonValidation::string(value, "return_type");
Expand Down Expand Up @@ -142,14 +214,20 @@ bool LifecycleMethodCall::operator==(const LifecycleMethodCall& other) const {
}

LifecycleMethod LifecycleMethod::from_json(const Json::Value& value) {
auto base_class_name = JsonValidation::string(value, "base_class_name");
auto method_name = JsonValidation::string(value, "method_name");
std::vector<LifecycleMethodCall> callees;
for (const auto& callee : JsonValidation::nonempty_array(value, "callees")) {
callees.emplace_back(LifecycleMethodCall::from_json(callee));
std::string base_class_name = JsonValidation::string(value, "base_class_name");
std::string method_name = JsonValidation::string(value, "method_name");
if (JsonValidation::has_field(value, "callees")) {
std::vector<LifecycleMethodCall> callees;
for (const auto& callee : JsonValidation::null_or_array(value, "callees")) {
callees.push_back(LifecycleMethodCall::from_json(callee));
}
return LifecycleMethod(base_class_name, method_name, callees);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick: std::move

Suggested change
return LifecycleMethod(base_class_name, method_name, callees);
return LifecycleMethod(base_class_name, method_name, std::move(callees));

} else if (JsonValidation::has_field(value, "control_flow_graph")) {
JsonValidation::validate_object(value, "control_flow_graph");
LifeCycleMethodGraph graph = LifeCycleMethodGraph::from_json(JsonValidation::object(value, "control_flow_graph"));
return LifecycleMethod(base_class_name, method_name, graph);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also here

Suggested change
return LifecycleMethod(base_class_name, method_name, graph);
return LifecycleMethod(base_class_name, method_name, std::move(graph));

}

return LifecycleMethod(base_class_name, method_name, callees);
throw std::invalid_argument("Invalid JSON format for LifecycleMethod");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's be a bit more descriptive, otherwise this will be confusing to the user.
We should also use JsonValidationError.

Suggested change
throw std::invalid_argument("Invalid JSON format for LifecycleMethod");
throw JsonValidationError(
value,
/* field */ std::nullopt,
"key `callees` or `control_flow_graph`");

}

bool LifecycleMethod::validate(
Expand Down Expand Up @@ -178,9 +256,9 @@ bool LifecycleMethod::validate(
return false;
}

for (const auto& callee : callees_) {
callee.validate(base_class, class_hierarchies);
}
// for (const auto& callee : callees_) {
// callee.validate(base_class, class_hierarchies);
// }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This still needs to be addressed. And same thing below.
You can do something like:

if (const auto* callees = std::get_if<std::vector<LifecycleMethodCall>>(callees_)) {
  // handle *callees
} else {
  const auto& graph = std::get< LifeCycleMethodGraph>(callees_);
  // handle graph
}


return true;
}
Expand All @@ -202,16 +280,16 @@ void LifecycleMethod::create_methods(
// in the DexMethod's code. The register location will be used to create the
// invoke operation for methods that take a given DexType* as its argument.
TypeIndexMap type_index_map;
for (const auto& callee : callees_) {
const auto* type_list = callee.get_argument_types();
if (type_list == nullptr) {
ERROR(1, "Callee `{}` has invalid argument types.", callee.to_string());
continue;
}
for (auto* type : *type_list) {
type_index_map.emplace(type, type_index_map.size() + 1);
}
}
// for (const auto& callee : callees_) {
// const auto* type_list = callee.get_argument_types();
// if (type_list == nullptr) {
// ERROR(1, "Callee `{}` has invalid argument types.", callee.to_string());
// continue;
// }
// for (auto* type : *type_list) {
// type_index_map.emplace(type, type_index_map.size() + 1);
// }
// }

auto* base_class_type = DexType::get_type(base_class_name_);
// Base class should exist. See validate().
Expand Down Expand Up @@ -319,27 +397,27 @@ const DexMethod* MT_NULLABLE LifecycleMethod::create_dex_method(
mt_assert(dex_klass != nullptr);

int callee_count = 0;
for (const auto& callee : callees_) {
auto* dex_method = callee.get_dex_method(dex_klass);
if (!dex_method) {
// Dex method does not apply for current APK.
// See `LifecycleMethod::validate()`.
continue;
}

++callee_count;

std::vector<Location> invoke_with_registers{this_location};
auto* type_list = callee.get_argument_types();
// This should have been verified at the start of `create_methods`
mt_assert(type_list != nullptr);
for (auto* type : *type_list) {
auto argument_register = method.get_local(type_index_map.at(type));
invoke_with_registers.push_back(argument_register);
}
main_block->invoke(
IROpcode::OPCODE_INVOKE_VIRTUAL, dex_method, invoke_with_registers);
}
// for (const auto& callee : callees_) {
// auto* dex_method = callee.get_dex_method(dex_klass);
// if (!dex_method) {
// // Dex method does not apply for current APK.
// // See `LifecycleMethod::validate()`.
// continue;
// }

// ++callee_count;

// std::vector<Location> invoke_with_registers{this_location};
// auto* type_list = callee.get_argument_types();
// // This should have been verified at the start of `create_methods`
// mt_assert(type_list != nullptr);
// for (auto* type : *type_list) {
// auto argument_register = method.get_local(type_index_map.at(type));
// invoke_with_registers.push_back(argument_register);
// }
// main_block->invoke(
// IROpcode::OPCODE_INVOKE_VIRTUAL, dex_method, invoke_with_registers);
// }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Obviously, we will need to keep this from now otherwise this will break existing lifecycle configurations.


if (callee_count < 2) {
// The point of life-cycle methods is to find flows where tainted member
Expand Down
31 changes: 29 additions & 2 deletions source/LifecycleMethod.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,33 @@ class LifecycleMethodCall {
std::optional<std::string> defined_in_derived_class_;
};

class LifeCycleMethodGraph {
public:
LifeCycleMethodGraph() : entry_point_("", "", {}, std::nullopt) {}
void addNode(const LifecycleMethodCall& node);
void addEdge(const LifecycleMethodCall& from, const LifecycleMethodCall& to);
const std::vector<LifecycleMethodCall>& getNeighbours(const LifecycleMethodCall& node) const;
bool operator==(const LifeCycleMethodGraph& other) const;

INCLUDE_DEFAULT_COPY_CONSTRUCTORS_AND_ASSIGNMENTS(LifeCycleMethodGraph)

static LifeCycleMethodGraph from_json(const Json::Value& value);

private:
struct NodeHasher {
std::size_t operator()(const LifecycleMethodCall& node) const {
return std::hash<std::string>{}(node.to_string());
}
};

std::unordered_map<LifecycleMethodCall, std::vector<LifecycleMethodCall>, NodeHasher> adj_list_;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This representation doesn't seem quite right. Why is the key a MethodCall itself? I thought we said it should be the name used as the key in the json. It should just be a string.
The parsing logic is quite weird, since we seem to create one node for each call, but a node is a std::vector. Basically, you are never using vectors of more than 1 element.

I think this should be redesigned to be something like:
std::unordered_map<std::stringg, LifecycleGraphNode> nodes_
And LifecycleGraphNode should have a std::vector<LifecycleMethodCall> has well as a std::vector<std::string> successors.


// Define the entry point of the graph
LifecycleMethodCall entry_point_;


};

/**
* A life-cycle method represents a collection of artificial DexMethods that
* simulate the life-cycle of a class.
Expand Down Expand Up @@ -126,7 +153,7 @@ class LifecycleMethod {
explicit LifecycleMethod(
std::string base_class_name,
std::string method_name,
std::vector<LifecycleMethodCall> callees)
std::variant<std::vector<LifecycleMethodCall>,LifeCycleMethodGraph> callees)
: base_class_name_(std::move(base_class_name)),
method_name_(std::move(method_name)),
callees_(std::move(callees)) {}
Expand Down Expand Up @@ -174,7 +201,7 @@ class LifecycleMethod {

std::string base_class_name_;
std::string method_name_;
std::vector<LifecycleMethodCall> callees_;
std::variant<std::vector<LifecycleMethodCall>,LifeCycleMethodGraph> callees_;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe rename this as body_ since this represents the method body (i.e content).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

^ rename this body_ as said before.

ConcurrentMap<const DexType*, const Method*> class_to_lifecycle_method_;
};

Expand Down
Loading