Skip to content

[6.36][ntuple] properly handle unknown columns in CreateModel() #19574

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tree/ntuple/inc/ROOT/RField.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ public:
/// The type given to RFieldBase::Create was unknown
kUnknownType,
/// The field could not be created because its descriptor had an unknown structural role
kUnknownStructure
kUnknownStructure,
};

private:
Expand Down
3 changes: 2 additions & 1 deletion tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -704,9 +704,10 @@ public:
/// If set to true, projected fields will be reconstructed as such. This will prevent the model to be used
/// with an RNTupleReader, but it is useful, e.g., to accurately merge data.
bool fReconstructProjections = false;
/// By default, creating a model will fail if any of the reconstructed fields contains an unknown column type
/// or an unknown field structural role.
/// If this option is enabled, the model will be created and all fields containing unknown data (directly
/// or indirectly) will be skipped instead.
/// Normally creating a model will fail if any of the reconstructed fields contains an unknown column type.
bool fForwardCompatible = false;
/// If true, the model will be created without a default entry (bare model).
bool fCreateBare = false;
Expand Down
48 changes: 46 additions & 2 deletions tree/ntuple/src/RNTupleDescriptor.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ ROOT::RFieldDescriptor::CreateField(const RNTupleDescriptor &ntplDesc, const ROO
}
}

// Untyped records and collections
if (GetTypeName().empty()) {
switch (GetStructure()) {
case ROOT::ENTupleStructure::kRecord: {
Expand Down Expand Up @@ -138,7 +139,7 @@ ROOT::RFieldDescriptor::CreateField(const RNTupleDescriptor &ntplDesc, const ROO
}

return field;
} catch (RException &ex) {
} catch (const RException &ex) {
if (options.GetReturnInvalidOnError())
return std::make_unique<ROOT::RInvalidField>(GetFieldName(), GetTypeName(), ex.GetError().GetReport(),
ROOT::RInvalidField::RCategory::kGeneric);
Expand Down Expand Up @@ -639,6 +640,31 @@ ROOT::RResult<void> ROOT::RNTupleDescriptor::DropClusterGroupDetails(ROOT::Descr

std::unique_ptr<ROOT::RNTupleModel> ROOT::RNTupleDescriptor::CreateModel(const RCreateModelOptions &options) const
{
// Collect all top-level fields that have invalid columns (recursively): by default if we find any we throw an
// exception; if we are in ForwardCompatible mode, we proceed but skip of all those top-level fields.
std::unordered_set<ROOT::DescriptorId_t> invalidFields;
for (const auto &colDesc : GetColumnIterable()) {
if (colDesc.GetType() == ROOT::ENTupleColumnType::kUnknown) {
auto fieldId = colDesc.GetFieldId();
while (1) {
const auto &field = GetFieldDescriptor(fieldId);
if (field.GetParentId() == GetFieldZeroId())
break;
fieldId = field.GetParentId();
}
invalidFields.insert(fieldId);

// No need to look for all invalid fields if we're gonna error out anyway
if (!options.GetForwardCompatible())
break;
}
}

if (!options.GetForwardCompatible() && !invalidFields.empty())
throw ROOT::RException(R__FAIL(
"cannot create Model: descriptor contains unknown column types. Use 'SetForwardCompatible(true)' on the "
"RCreateModelOptions to create a partial model containing only the fields made up by known columns."));

auto fieldZero = std::make_unique<ROOT::RFieldZero>();
fieldZero->SetOnDiskId(GetFieldZeroId());
auto model = options.GetCreateBare() ? RNTupleModel::CreateBare(std::move(fieldZero))
Expand All @@ -647,9 +673,27 @@ std::unique_ptr<ROOT::RNTupleModel> ROOT::RNTupleDescriptor::CreateModel(const R
createFieldOpts.SetReturnInvalidOnError(options.GetForwardCompatible());
createFieldOpts.SetEmulateUnknownTypes(options.GetEmulateUnknownTypes());
for (const auto &topDesc : GetTopLevelFields()) {
if (invalidFields.count(topDesc.GetId()) > 0) {
// Field contains invalid columns: skip it
continue;
}

auto field = topDesc.CreateField(*this, createFieldOpts);
if (field->GetTraits() & ROOT::RFieldBase::kTraitInvalidField)

// If we got an InvalidField here, figure out if it's a hard error or if the field must simply be skipped.
// The only case where it's not a hard error is if the field has an unknown structure, as that case is
// covered by the ForwardCompatible flag (note that if the flag is off we would not get here
// in the first place, so we don't need to check for that flag again).
if (field->GetTraits() & ROOT::RFieldBase::kTraitInvalidField) {
const auto &invalid = static_cast<const RInvalidField &>(*field);
const auto cat = invalid.GetCategory();
bool mustThrow = cat != RInvalidField::RCategory::kUnknownStructure;
if (mustThrow)
throw invalid.GetError();

// Not a hard error: skip the field and go on.
continue;
}

if (options.GetReconstructProjections() && topDesc.IsProjectedField()) {
model->AddProjectedField(std::move(field), [this](const std::string &targetName) -> std::string {
Expand Down
52 changes: 36 additions & 16 deletions tree/ntuple/test/ntuple_compat.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ class ROOT::RField<ROOT::Internal::RTestFutureColumn> final : public RSimpleFiel
}

public:
static std::string TypeName() { return "FutureColumn"; }
static std::string TypeName() { return "ROOT::Internal::RTestFutureColumn"; }
explicit RField(std::string_view name) : RSimpleField(name, TypeName()) {}
RField(RField &&other) = default;
RField &operator=(RField &&other) = default;
Expand Down Expand Up @@ -196,18 +196,24 @@ TEST(RNTupleCompat, FutureColumnType)
const auto &cdesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[0]);
EXPECT_EQ(cdesc.GetType(), ROOT::ENTupleColumnType::kUnknown);

{
// Creating a model not in fwd-compatible mode should fail
EXPECT_THROW(desc.CreateModel(), ROOT::RException);
try {
desc.CreateModel();
FAIL() << "Creating a model not in fwd-compatible mode should fail";
} catch (const ROOT::RException &ex) {
EXPECT_THAT(ex.what(), testing::HasSubstr("unknown column types"));
}

{
auto modelOpts = RNTupleDescriptor::RCreateModelOptions();
modelOpts.SetForwardCompatible(true);
auto model = desc.CreateModel(modelOpts);

// The future column should not show up in the model
EXPECT_THROW(model->GetConstField("futureColumn"), ROOT::RException);
try {
model->GetConstField("futureColumn");
FAIL() << "the future column should not show up in the model";
} catch (const ROOT::RException &ex) {
EXPECT_THAT(ex.what(), testing::HasSubstr("invalid field"));
}

const auto &floatFld = model->GetConstField("float");
EXPECT_EQ(floatFld.GetTypeName(), "float");
Expand Down Expand Up @@ -244,22 +250,28 @@ TEST(RNTupleCompat, FutureColumnType_Nested)
const auto &desc = reader->GetDescriptor();
const auto futureId = desc.FindFieldId("future");
const auto &fdesc = desc.GetFieldDescriptor(desc.FindFieldId("vec._0", futureId));
GTEST_ASSERT_EQ(fdesc.GetLogicalColumnIds().size(), 1);
ASSERT_EQ(fdesc.GetLogicalColumnIds().size(), 1);
const auto &cdesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[0]);
EXPECT_EQ(cdesc.GetType(), ROOT::ENTupleColumnType::kUnknown);

{
// Creating a model not in fwd-compatible mode should fail
EXPECT_THROW(desc.CreateModel(), ROOT::RException);
try {
desc.CreateModel();
FAIL() << "Creating a model not in fwd-compatible mode should fail";
} catch (const ROOT::RException &ex) {
EXPECT_THAT(ex.what(), testing::HasSubstr("unknown column types"));
}

{
auto modelOpts = RNTupleDescriptor::RCreateModelOptions();
modelOpts.SetForwardCompatible(true);
auto model = desc.CreateModel(modelOpts);

// The future column should not show up in the model
EXPECT_THROW(model->GetConstField("future"), ROOT::RException);
try {
model->GetConstField("futureColumn");
FAIL() << "the future column should not show up in the model";
} catch (const ROOT::RException &ex) {
EXPECT_THAT(ex.what(), testing::HasSubstr("invalid field"));
}

const auto &floatFld = model->GetConstField("float");
EXPECT_EQ(floatFld.GetTypeName(), "float");
Expand Down Expand Up @@ -308,8 +320,12 @@ TEST(RNTupleCompat, FutureFieldStructuralRole)
const auto &fdesc = desc.GetFieldDescriptor(desc.FindFieldId("future"));
EXPECT_EQ(fdesc.GetLogicalColumnIds().size(), 0);

// Attempting to create a model with default options should fail
EXPECT_THROW(desc.CreateModel(), ROOT::RException);
try {
desc.CreateModel();
FAIL() << "Attempting to create a model with default options should fail";
} catch (const ROOT::RException &err) {
EXPECT_THAT(err.what(), testing::HasSubstr("unexpected on-disk field structure"));
}

auto modelOpts = RNTupleDescriptor::RCreateModelOptions();
modelOpts.SetForwardCompatible(true);
Expand Down Expand Up @@ -345,8 +361,12 @@ TEST(RNTupleCompat, FutureFieldStructuralRole_Nested)
const auto &fdesc = desc.GetFieldDescriptor(desc.FindFieldId("record"));
EXPECT_EQ(fdesc.GetLogicalColumnIds().size(), 0);

// Attempting to create a model with default options should fail
EXPECT_THROW(desc.CreateModel(), ROOT::RException);
try {
desc.CreateModel();
FAIL() << "attempting to create a model with default options should fail";
} catch (const ROOT::RException &ex) {
EXPECT_THAT(ex.what(), testing::HasSubstr("unexpected on-disk field structure"));
}

auto modelOpts = RNTupleDescriptor::RCreateModelOptions();
modelOpts.SetForwardCompatible(true);
Expand Down
Loading