Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prevent GCC using FP registers for pointer storage on arm64 Linux #193

Merged
merged 2 commits into from
Jan 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions binding.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"bindings/profilers/heap.cc",
"bindings/profilers/wall.cc",
"bindings/per-isolate-data.cc",
"bindings/profile-translator.cc",
"bindings/thread-cpu-clock.cc",
"bindings/translate-heap-profile.cc",
"bindings/translate-time-profile.cc",
Expand Down
23 changes: 23 additions & 0 deletions bindings/general-regs-only.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* Copyright 2024 Datadog, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#if defined(__linux__) && defined(__aarch64__)
#define GENERAL_REGS_ONLY __attribute__((target("general-regs-only")))
#else
#define GENERAL_REGS_ONLY
#endif
22 changes: 22 additions & 0 deletions bindings/profile-translator.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
* Copyright 2025 Datadog, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "profile-translator.hh"
#include <v8.h>

v8::Local<v8::Number> dd::ProfileTranslator::NewNumber(int64_t x) {
return v8::Number::New(isolate, static_cast<double>(x));
}
5 changes: 1 addition & 4 deletions bindings/profile-translator.hh
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,7 @@ class ProfileTranslator {
return v8::Boolean::New(isolate, x);
}

template <typename T>
v8::Local<v8::Number> NewNumber(T x) {
return v8::Number::New(isolate, x);
}
v8::Local<v8::Number> NewNumber(int64_t x);

v8::Local<v8::Array> NewArray(int length) {
return length == 0 ? emptyArray : v8::Array::New(isolate, length);
Expand Down
3 changes: 2 additions & 1 deletion bindings/profilers/heap.hh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#pragma once

#include <nan.h>
#include "general-regs-only.hh"

namespace dd {

Expand All @@ -34,7 +35,7 @@ class HeapProfiler {
// getAllocationProfile(): AllocationProfileNode
static NAN_METHOD(GetAllocationProfile);

static NAN_METHOD(MonitorOutOfMemory);
static NAN_METHOD(MonitorOutOfMemory) GENERAL_REGS_ONLY;

static NAN_MODULE_INIT(Init);
};
Expand Down
38 changes: 23 additions & 15 deletions bindings/profilers/wall.cc
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,8 @@ void SignalHandler::HandleProfilerSignal(int sig,
auto time_from = Now();
old_handler(sig, info, context);
auto time_to = Now();
double async_id = node::AsyncHooksGetExecutionAsyncId(isolate);
int64_t async_id =
static_cast<int64_t>(node::AsyncHooksGetExecutionAsyncId(isolate));
prof->PushContext(time_from, time_to, cpu_time, async_id);
}
#else
Expand Down Expand Up @@ -369,10 +370,17 @@ static int64_t GetV8ToEpochOffset() {
return V8toEpochOffset;
}

ContextsByNode WallProfiler::GetContextsByNode(CpuProfile* profile,
ContextBuffer& contexts,
int64_t startCpuTime) {
ContextsByNode contextsByNode;
Local<Number> NewNumberFromInt64(Isolate* isolate, int64_t value) {
return Number::New(isolate, static_cast<double>(value));
}

std::shared_ptr<ContextsByNode> CreateContextsByNode() {
return std::make_shared<ContextsByNode>();
}

std::shared_ptr<ContextsByNode> WallProfiler::GetContextsByNode(
CpuProfile* profile, ContextBuffer& contexts, int64_t startCpuTime) {
auto contextsByNode = CreateContextsByNode();

auto sampleCount = profile->GetSamplesCount();
if (contexts.empty() || sampleCount == 0) {
Expand Down Expand Up @@ -432,11 +440,11 @@ ContextsByNode WallProfiler::GetContextsByNode(CpuProfile* profile,
break;
} else {
// This sample context is the closest to this sample.
auto it = contextsByNode.find(sample);
auto it = contextsByNode->find(sample);
Local<Array> array;
if (it == contextsByNode.end()) {
if (it == contextsByNode->end()) {
array = Array::New(isolate);
contextsByNode[sample] = {array, 1};
(*contextsByNode)[sample] = {array, 1};
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this clean enough, or is there a better idiom to call the [] operator on the target of a pointer? I can also replace it with

contextsByNode->emplace(sample, (struct NodeInfo){array, 1});

} else {
array = it->second.contexts;
++it->second.hitcount;
Expand All @@ -459,17 +467,17 @@ ContextsByNode WallProfiler::GetContextsByNode(CpuProfile* profile,
// sample
if (collectCpuTime_ && !isIdleOrProgram(sample)) {
timedContext
->Set(
v8Context,
cpuTimeKey,
Number::New(isolate, sampleContext.cpu_time - lastCpuTime))
->Set(v8Context,
cpuTimeKey,
NewNumberFromInt64(isolate,
sampleContext.cpu_time - lastCpuTime))
.Check();
lastCpuTime = sampleContext.cpu_time;
}
timedContext
->Set(v8Context,
asyncIdKey,
Number::New(isolate, sampleContext.async_id))
NewNumberFromInt64(isolate, sampleContext.async_id))
.Check();
array->Set(v8Context, array->Length(), timedContext).Check();
}
Expand Down Expand Up @@ -881,7 +889,7 @@ Result WallProfiler::StopImpl(bool restart, v8::Local<v8::Value>& profile) {

profile = TranslateTimeProfile(v8_profile,
includeLines_,
&contextsByNode,
contextsByNode,
collectCpuTime_,
nonJSThreadsCpuTime);

Expand Down Expand Up @@ -1018,7 +1026,7 @@ NAN_METHOD(WallProfiler::Dispose) {
void WallProfiler::PushContext(int64_t time_from,
int64_t time_to,
int64_t cpu_time,
double async_id) {
int64_t async_id) {
// Be careful this is called in a signal handler context therefore all
// operations must be async signal safe (in particular no allocations).
// Our ring buffer avoids allocations.
Expand Down
19 changes: 11 additions & 8 deletions bindings/profilers/wall.hh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#pragma once

#include "contexts.hh"
#include "general-regs-only.hh"
#include "thread-cpu-clock.hh"

#include <nan.h>
Expand Down Expand Up @@ -82,7 +83,7 @@ class WallProfiler : public Nan::ObjectWrap {
int64_t time_from;
int64_t time_to;
int64_t cpu_time;
double async_id;
int64_t async_id;
};

using ContextBuffer = std::vector<SampleContext>;
Expand All @@ -95,9 +96,10 @@ class WallProfiler : public Nan::ObjectWrap {
// to work around https://bugs.chromium.org/p/v8/issues/detail?id=11051.
v8::CpuProfiler* CreateV8CpuProfiler();

ContextsByNode GetContextsByNode(v8::CpuProfile* profile,
ContextBuffer& contexts,
int64_t startCpuTime);
std::shared_ptr<ContextsByNode> GetContextsByNode(v8::CpuProfile* profile,
ContextBuffer& contexts,
int64_t startCpuTime)
GENERAL_REGS_ONLY;

bool waitForSignal(uint64_t targetCallCount = 0);

Expand All @@ -122,10 +124,11 @@ class WallProfiler : public Nan::ObjectWrap {
void PushContext(int64_t time_from,
int64_t time_to,
int64_t cpu_time,
double async_id);
int64_t async_id);
Result StartImpl();
std::string StartInternal();
Result StopImpl(bool restart, v8::Local<v8::Value>& profile);
Result StopImpl(bool restart,
v8::Local<v8::Value>& profile) GENERAL_REGS_ONLY;

CollectionMode collectionMode() {
auto res = collectionMode_.load(std::memory_order_relaxed);
Expand All @@ -146,12 +149,12 @@ class WallProfiler : public Nan::ObjectWrap {
return threadCpuStopWatch_.GetAndReset();
}

static NAN_METHOD(New);
static NAN_METHOD(New) GENERAL_REGS_ONLY;
static NAN_METHOD(Start);
static NAN_METHOD(Stop);
static NAN_METHOD(V8ProfilerStuckEventLoopDetected);
static NAN_METHOD(Dispose);
static NAN_MODULE_INIT(Init);
static NAN_MODULE_INIT(Init) GENERAL_REGS_ONLY;
static NAN_GETTER(GetContext);
static NAN_SETTER(SetContext);
static NAN_GETTER(SharedArrayGetter);
Expand Down
3 changes: 2 additions & 1 deletion bindings/translate-heap-profile.hh
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@
#pragma once

#include <v8-profiler.h>
#include "general-regs-only.hh"

namespace dd {

v8::Local<v8::Value> TranslateAllocationProfile(
v8::AllocationProfile::Node* node);
v8::AllocationProfile::Node* node) GENERAL_REGS_ONLY;

} // namespace dd
18 changes: 10 additions & 8 deletions bindings/translate-time-profile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,15 @@
*/

#include "translate-time-profile.hh"
#include "general-regs-only.hh"
#include "profile-translator.hh"

namespace dd {

namespace {
class TimeProfileTranslator : ProfileTranslator {
private:
ContextsByNode* contextsByNode;
std::shared_ptr<ContextsByNode> contextsByNode;
v8::Local<v8::Array> emptyArray = NewArray(0);
v8::Local<v8::Integer> zero = NewInteger(0);

Expand Down Expand Up @@ -80,7 +81,7 @@ class TimeProfileTranslator : ProfileTranslator {
}

v8::Local<v8::Array> GetLineNumberTimeProfileChildren(
const v8::CpuProfileNode* node) {
const v8::CpuProfileNode* node) GENERAL_REGS_ONLY {
unsigned int index = 0;
v8::Local<v8::Array> children;
int32_t count = node->GetChildrenCount();
Expand Down Expand Up @@ -200,7 +201,7 @@ class TimeProfileTranslator : ProfileTranslator {
}

public:
explicit TimeProfileTranslator(ContextsByNode* nls = nullptr)
explicit TimeProfileTranslator(std::shared_ptr<ContextsByNode> nls = nullptr)
: contextsByNode(nls) {}

v8::Local<v8::Value> TranslateTimeProfile(const v8::CpuProfile* profile,
Expand Down Expand Up @@ -230,11 +231,12 @@ class TimeProfileTranslator : ProfileTranslator {
};
} // namespace

v8::Local<v8::Value> TranslateTimeProfile(const v8::CpuProfile* profile,
bool includeLineInfo,
ContextsByNode* contextsByNode,
bool hasCpuTime,
int64_t nonJSThreadsCpuTime) {
v8::Local<v8::Value> TranslateTimeProfile(
const v8::CpuProfile* profile,
bool includeLineInfo,
std::shared_ptr<ContextsByNode> contextsByNode,
bool hasCpuTime,
int64_t nonJSThreadsCpuTime) {
return TimeProfileTranslator(contextsByNode)
.TranslateTimeProfile(
profile, includeLineInfo, hasCpuTime, nonJSThreadsCpuTime);
Expand Down
2 changes: 1 addition & 1 deletion bindings/translate-time-profile.hh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ namespace dd {
v8::Local<v8::Value> TranslateTimeProfile(
const v8::CpuProfile* profile,
bool includeLineInfo,
ContextsByNode* contextsByNode = nullptr,
std::shared_ptr<ContextsByNode> contextsByNode = nullptr,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: you could have kept a raw pointer for contextsByNode in translate-time-profile.cc/hh

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, but figured why not be consistent. If I already have a safe shared pointer around I might as well use it.

bool hasCpuTime = false,
int64_t nonJSThreadsCpuTime = 0);

Expand Down
Loading