Skip to content

Commit

Permalink
fix: fix dynamic callgraph generator
Browse files Browse the repository at this point in the history
The execution keeps tracks of the executed functions, instead of relying on the stackstrace id.
It does also support callaback. All functions passed as parameters that are executed are considered as callbacks
  • Loading branch information
tdurieux committed Sep 30, 2024
1 parent 3b8d19e commit 62b7ae4
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 68 deletions.
45 changes: 25 additions & 20 deletions deps/v8/src/interpreter/bytecode-generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1398,13 +1398,13 @@ void BytecodeGenerator::GenerateBytecode(uintptr_t stack_limit) {
// the function information
std::vector<std::string> funcs;
// the function information
std::map<std::string, int> funcMap;
std::map<std::string, uint32_t> func2id;
// the incremental ID of the functions
int funcID = 0;
uint32_t global_func_id = 0;
// the depth of the stack to collect
int stackDepth = 100;
// should node functions be traced
bool traceNode = false;
bool trace_all = false;

bool isInit = false;
void CleanupAtExit() {
Expand Down Expand Up @@ -1468,7 +1468,7 @@ void BytecodeGenerator::GenerateBytecodeBody() {
stackDepth = std::stoi(traceDepthEnv);
}
isInit = true;
traceNode = std::getenv("TRACE_ALL") ? true : false;
trace_all = std::getenv("TRACE_ALL") ? true : false;
atexit(CleanupAtExit);
}
// Build the arguments object if it is used.
Expand Down Expand Up @@ -1534,15 +1534,20 @@ void BytecodeGenerator::GenerateBytecodeBody() {
.ToCString(DISALLOW_NULLS, ROBUST_STRING_TRAVERSAL)
.get()
: "<unknown>";
bool toTrace =
traceNode ? true : path.find("node:") != 0 && !script_.is_null();
if (toTrace && literal->function_literal_id() != 0) {
bool to_trace = true;
if (!trace_all) {
to_trace = (path.find("node:") != 0 || path.find("node:timers") == 0) &&
path.find("/ts-node/") == -1 &&
path.find("/typescript/") == -1 && path.find("/npm/") == -1
&& path.find("/source-map-support/") == -1;
}
if (to_trace) {
int start_position = literal->start_position();
int end_position = literal->end_position();
std::string functionKey = std::to_string(literal->function_literal_id()) +
std::string function_key = std::to_string(literal->function_literal_id()) +
"\t" + std::to_string(start_position) + "\t" +
std::to_string(end_position) + "\t" + path;
Script::PositionInfo info = Script::PositionInfo();
Script::PositionInfo pos = Script::PositionInfo();
if (!script_.is_null()) {
int position = literal->function_token_position();
if (position == kNoSourcePosition) {
Expand All @@ -1552,17 +1557,17 @@ void BytecodeGenerator::GenerateBytecodeBody() {
position = literal->start_position();
}
if (position != kNoSourcePosition) {
Script::GetPositionInfo(script_, position, &info, Script::NO_OFFSET);
Script::GetPositionInfo(script_, position, &pos, Script::NO_OFFSET);
}
}
int func_id = -1;
if (funcMap.find(functionKey) == funcMap.end()) {
func_id = funcID++;
funcMap[functionKey] = func_id;
create_and_add_func_name(func_id, literal, info, isConstructor,
functionKey.c_str());
uint32_t func_id = std::numeric_limits<uint32_t>::max();
if (func2id.find(function_key) == func2id.end()) {
func_id = global_func_id++;
func2id[function_key] = func_id;
create_and_add_func_name(func_id, literal, pos, isConstructor,
function_key.c_str());
} else {
func_id = funcMap[functionKey];
func_id = func2id[function_key];
}
RegisterList args = register_allocator()->NewRegisterList(2);
builder()
Expand Down Expand Up @@ -1590,15 +1595,15 @@ void BytecodeGenerator::BuildReturn(int source_position) {
std::string path = String::cast(script_->name())
.ToCString(DISALLOW_NULLS, ROBUST_STRING_TRAVERSAL)
.get();
bool toTrace = traceNode ? true : path.find("node:") != 0;
bool toTrace = trace_all ? true : path.find("node:") != 0;
if (toTrace && stackDepth < 2) {
auto literal = info()->literal();
int start_position = literal->start_position();
int end_position = literal->end_position();
std::string functionKey = std::to_string(literal->function_literal_id()) +
std::string function_key = std::to_string(literal->function_literal_id()) +
"\t" + std::to_string(start_position) + "\t" +
std::to_string(end_position) + "\t" + path;
int func_id = funcMap[functionKey];
int func_id = func2id[function_key];
RegisterAllocationScope register_scope(this);
// Runtime returns {result} value, preserving accumulator.
RegisterList result = register_allocator()->NewRegisterList(2);
Expand Down
92 changes: 71 additions & 21 deletions deps/v8/src/runtime/runtime-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1250,8 +1250,31 @@ constexpr size_t BUFFER_SIZE = 1024 * 1024; // 512 MB buffer
std::array<char, BUFFER_SIZE> buffer;
size_t bufferPos = 0;

std::map<StackFrameId, int> frameMap;
std::unordered_set<std::string> printedTraces;
struct CallerId {
uint32_t callee = UINT32_MAX;
uint32_t caller = UINT32_MAX;

bool operator==(const CallerId& other) const {
return callee == other.callee && caller == other.caller;
}

std::size_t Hash() const {
return base::hash_combine(callee, caller);
}

};

// Custom hash function for CallerId
struct CallerIdHash {
std::size_t operator()(const CallerId& id) const {
return id.Hash();
}
};

// Use the new structure in the unordered_set
std::unordered_set<CallerId, CallerIdHash> known_calls;
std::map<internal::Address, uint32_t> address2func;
std::map<internal::Address, uint32_t> cb2func;

void FlushBuffer() {
if (bufferPos > 0) {
Expand Down Expand Up @@ -1309,40 +1332,67 @@ RUNTIME_FUNCTION(Runtime_TraceEnter) {
std::atexit(SaveAtExit);
}

uint32_t funcId = NumberToUint32(args[1]);
CallerId call_id;
call_id.callee = NumberToUint32(args[1]);
if (call_id.callee == std::numeric_limits<uint32_t>::max()) {
return ReadOnlyRoots(isolate).undefined_value();
}

if (collectStackDepth > 1) {
std::string stackTrace;
stackTrace.reserve(40);
std::vector<internal::Address> cb;

JavaScriptStackFrameIterator it(isolate);
int level = 0;
while (!it.done()) {
if (level > 0) {
stackTrace += ',';
auto frame = it.frame();
if (frame->is_java_script()) {
auto func = frame->function();
auto func_address = func.shared().address();
if (level == 0) {
address2func[func_address] = call_id.callee;
const int length = frame->ComputeParametersCount();
for (int i = 0; i < length; ++i) {
auto param = frame->GetParameter(i);
if (param.IsJSFunction()) {
cb.push_back(JSFunction::cast(param).shared().address());
}
}
// the cb is colled, add edge between the cb and the caller
if (cb2func.find(func_address) != cb2func.end()) {
AppendIntToBuffer(cb2func[func_address]);
AppendCharToBuffer('\t');
AppendIntToBuffer(call_id.callee);
AppendCharToBuffer('\n');
}
} else if (level >= 1) {
if (address2func.find(func_address) != address2func.end()) {
call_id.caller = address2func[func_address];
break;
}
}
}
StackFrameId frameId = it.frame()->id();
if (frameMap.find(frameId) != frameMap.end() ||
level > collectStackDepth) {
stackTrace += std::to_string(frameMap[frameId]);
break;
}
int new_frame_id = frameMap.size();
frameMap[frameId] = new_frame_id;
stackTrace += std::to_string(new_frame_id);
++level;
it.Advance();
}
stackTrace += '\t';
stackTrace += std::to_string(funcId);
if (printedTraces.insert(stackTrace).second) {
if (call_id.caller == UINT32_MAX) {
return ReadOnlyRoots(isolate).undefined_value();
}
// JavaScriptFrame::PrintTop(isolate, stdout, false, true);
if (known_calls.insert(call_id).second) {
// This is a new stack trace, so we print it
AppendToBuffer(stackTrace.c_str(), stackTrace.length());
AppendIntToBuffer(call_id.caller);
AppendCharToBuffer('\t');
AppendIntToBuffer(call_id.callee);
AppendCharToBuffer('\n');
}
if (cb.size() > 0) {
for (auto& c : cb) {
cb2func[c] = call_id.caller;
}
}
} else {
AppendCharToBuffer('I');
AppendIntToBuffer(funcId);
AppendIntToBuffer(call_id.callee);
AppendCharToBuffer('\n');
}

Expand Down
46 changes: 19 additions & 27 deletions nodeCG2endor/nodeCG2endor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,19 @@ def func2name(data, known_funcs):
if data['position']['start'] == 0:
name = ''
else:
name = f".anonymous_function_{data['position']['line']}_{data['position']['column']}"
else:
name += f"_{data['position']['line']}_{data['position']['column']}"
name = f"anonymous_function_{data['position']['line']}_{data['position']['column']}"
# else:
# name += f"_{data['position']['line']}_{data['position']['column']}"
name += "()"
name = name.replace("Object.<anonymous>.","").replace(".","/")
data['func_name'] = name
data['func_prefix'] = prefix
name = prefix + name
if data['position']['file'].startswith('node:'):
return f"node://node/[node:{data['position']['file'].replace('node:',
'')}]/{name}()"
'')}]/{name}"
pack, version, pack_f_path = get_package_npm_version(data['position']['file'])
return f"javascript://{pack}${version}/[{pack}:{version}:{pack_f_path.replace(".js", "")}]/{name}()"
return f"javascript://{pack}${version}/[{pack}:{version}:{pack_f_path.replace(".js", "")}]/{name}"

package_cache = {}
def get_package_npm_version(p):
Expand Down Expand Up @@ -75,48 +77,36 @@ def readFuncInfoFile(p='func.tsv'):

def readCGFile(global_id_map, p='cg.tsv', function_calls={}):
print(f"Process file: {p}")
stack_2_func = {}
stack_parents = {}
call_in_stack = []
with open(p, 'r') as f:
for line in f:
line = line.rstrip()
if len(line) == 0:
continue
if line[0] in ['I', 'O']:
func_id = global_id_map[int(line[1:])]
called_func = global_id_map[int(line[1:])]
if line[0] == 'I':
if len(call_in_stack) > 0:
if call_in_stack[-1] not in function_calls:
function_calls[call_in_stack[-1]] = set()
function_calls[call_in_stack[-1]].add(func_id)
call_in_stack.append(func_id)
function_calls[call_in_stack[-1]].add(called_func)
call_in_stack.append(called_func)
elif line[0] == 'O':
index = len(call_in_stack) - 1
while call_in_stack[index] != func_id:
while call_in_stack[index] != called_func:
call_in_stack.pop()
index -= 1
call_in_stack.pop()
else:
split_line = line.split('\t')
if len(split_line) != 2:
continue
(stack, func_id) = split_line
func_id = global_id_map[int(func_id)]
stack_ids = stack.split(',')
stack_id = int(stack_ids[0])
stack_parent = int(stack_ids[-1])
if stack_id in stack_parents:
stack_parent = stack_parents[stack_id]
else:
stack_parents[stack_id] = stack_parent
stack_2_func[stack_id] = func_id
if stack_parent is stack_id:
continue
if stack_parent in stack_2_func:
if stack_2_func[stack_parent] not in function_calls:
function_calls[stack_2_func[stack_parent]] = set()
function_calls[stack_2_func[stack_parent]].add(func_id)
(caller_funct, called_func) = split_line
called_func = global_id_map[int(called_func)]
caller_funct = global_id_map[int(caller_funct)]
if caller_funct not in function_calls:
function_calls[caller_funct] = set()
function_calls[caller_funct].add(called_func)
return function_calls

def merge_func_infos(func_infos):
Expand All @@ -142,6 +132,8 @@ def merge_func_infos(func_infos):
for file in os.listdir(project_root):
if file.endswith('.tsv') and file.startswith('func_'):
proc = file.split('_')[1].replace('.tsv', '')
if not os.path.exists(os.path.join(project_root, f"cg_{proc}.tsv")):
continue
func_infos[proc] = (readFuncInfoFile(os.path.join(project_root, file)))
func_info, map_id = merge_func_infos(func_infos)

Expand Down

0 comments on commit 62b7ae4

Please sign in to comment.