Skip to content

Commit

Permalink
pdb2cif work
Browse files Browse the repository at this point in the history
  • Loading branch information
mhekkel committed Jan 29, 2024
1 parent e597503 commit 12ee4a7
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 81 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ Testing/
include/cif++/exports.hpp
docs/api
docs/conf.py
build_ci/
build_ci/
data/components.cif
117 changes: 48 additions & 69 deletions src/pdb/pdb2cif.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1123,9 +1123,6 @@ void PDBFileParser::PreParseInput(std::istream &is)
if (lookahead.back() == '\r')
lookahead.pop_back();

// if (cif::starts_with(lookahead, "HEADER") == false)
// throw std::runtime_error("This does not look like a PDB file, should start with a HEADER line");

auto contNr = [&lookahead](int offset, int len) -> int
{
std::string cs = lookahead.substr(offset, len);
Expand Down Expand Up @@ -1558,52 +1555,54 @@ void PDBFileParser::ParseTitle()
// 11 - 80 Specification compound Description of the molecular components.
// list

std::string value{ mRec->vS(11) };
if (value.find(':') == std::string::npos)
{
// special case for dumb, stripped files
auto &comp = GetOrCreateCompound(1);
comp.mInfo["MOLECULE"] = value;
}
else
if (mRec->is("COMPND"))
{
SpecificationListParser p(value);

for (;;)
std::string value{ mRec->vS(11) };
if (value.find(':') == std::string::npos)
{
std::string key, val;
std::tie(key, val) = p.GetNextSpecification();

if (key.empty())
break;
// special case for dumb, stripped files
auto &comp = GetOrCreateCompound(1);
comp.mInfo["MOLECULE"] = value;
}
else
{
SpecificationListParser p(value);

if (not iequals(key, "MOL_ID") and mCompounds.empty())
for (;;)
{
if (cif::VERBOSE > 0)
std::cerr << "Ignoring invalid COMPND record\n";
break;
}
std::string key, val;
std::tie(key, val) = p.GetNextSpecification();

if (key == "MOL_ID")
{
auto &comp = GetOrCreateCompound(stoi(val));
comp.mTitle = title;
}
else if (key == "CHAIN")
{
for (auto c : cif::split<std::string>(val, ","))
if (key.empty())
break;

if (not iequals(key, "MOL_ID") and mCompounds.empty())
{
cif::trim(c);
mCompounds.back().mChains.insert(c[0]);
if (cif::VERBOSE > 0)
std::cerr << "Ignoring invalid COMPND record\n";
break;
}

if (key == "MOL_ID")
{
auto &comp = GetOrCreateCompound(stoi(val));
comp.mTitle = title;
}
else if (key == "CHAIN")
{
for (auto c : cif::split<std::string>(val, ","))
{
cif::trim(c);
mCompounds.back().mChains.insert(c[0]);
}
}
else
mCompounds.back().mInfo[key] = val;
}
else
mCompounds.back().mInfo[key] = val;
}
}

if (mRec->is("COMPND"))
GetNextRecord();
}

// SOURCE
Match("SOURCE", false);
Expand Down Expand Up @@ -1740,7 +1739,7 @@ void PDBFileParser::ParseTitle()
int n = 1;
cat = getCategory("audit_author");

value = { mRec->vS(11) };
std::string value = { mRec->vS(11) };
for (auto author : cif::split<std::string>(value, ",", true))
{
// clang-format off
Expand Down Expand Up @@ -4556,7 +4555,7 @@ void PDBFileParser::ConstructEntities()
std::string formula;
std::string type;
std::string nstd = ".";
std::string formulaWeight;
std::optional<float> formulaWeight;

if (compound != nullptr)
{
Expand All @@ -4567,7 +4566,7 @@ void PDBFileParser::ConstructEntities()
nstd = "y";

formula = compound->formula();
formulaWeight = std::to_string(compound->formula_weight());
formulaWeight = compound->formula_weight();
}

if (name.empty())
Expand All @@ -4594,7 +4593,7 @@ void PDBFileParser::ConstructEntities()
{ "id", cc },
{ "name", name },
{ "formula", formula },
{ "formula_weight", formulaWeight },
{ "formula_weight", formulaWeight, 3 },
{ "mon_nstd_flag", nstd },
{ "type", type }
});
Expand Down Expand Up @@ -4709,7 +4708,7 @@ void PDBFileParser::ConstructEntities()
}

if (formula_weight > 0)
entity["formula_weight"] = formula_weight;
entity.assign({ { "formula_weight", formula_weight, 3 } });
}
}

Expand Down Expand Up @@ -5578,31 +5577,6 @@ void PDBFileParser::ParseCrystallographic()

GetNextRecord();
}
else
{
// clang-format off

// no cryst1, make a simple one, like this:
// CRYST1 1.000 1.000 1.000 90.00 90.00 90.00 P 1 1
getCategory("cell")->emplace({
{ "entry_id", mStructureID }, // 1 - 6 Record name "CRYST1"
{ "length_a", 1 }, // 7 - 15 Real(9.3) a a (Angstroms).
{ "length_b", 1 }, // 16 - 24 Real(9.3) b b (Angstroms).
{ "length_c", 1 }, // 25 - 33 Real(9.3) c c (Angstroms).
{ "angle_alpha", 90 }, // 34 - 40 Real(7.2) alpha alpha (degrees).
{ "angle_beta", 90 }, // 41 - 47 Real(7.2) beta beta (degrees).
{ "angle_gamma", 90 }, // 48 - 54 Real(7.2) gamma gamma (degrees).
/* goes into symmetry */ // 56 - 66 LString sGroup Space group.
{ "Z_PDB", 1 } // 67 - 70 Integer z Z value.
});

getCategory("symmetry")->emplace({
{ "entry_id", mStructureID },
{ "space_group_name_H-M", "P 1" },
{ "Int_Tables_number", 1 }
});
// clang-format on
}
}

void PDBFileParser::ParseCoordinateTransformation()
Expand Down Expand Up @@ -6463,7 +6437,12 @@ file read(std::istream &is)
// and so the very first character in a valid PDB file
// is 'H'. It is as simple as that.

if (ch == 'h' or ch == 'H')
// Well, not quite, Unfortunately... People insisted that
// having only ATOM records also makes up a valid PDB file...
// Since mmCIF files cannot validly start with a letter character
// the test has changed into the following:

if (std::isalpha(ch))
read_pdb_file(is, result);
else
{
Expand Down
19 changes: 13 additions & 6 deletions src/pdb/reconstruct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -491,12 +491,13 @@ void checkAtomAnisotropRecords(datablock &db)
auto &atom_site = db["atom_site"];
auto &atom_site_anisotrop = db["atom_site_anisotrop"];

auto m_validator = db.get_validator();
if (not m_validator)
return;
// auto m_validator = db.get_validator();
// if (not m_validator)
// return;

std::vector<row_handle> to_be_deleted;

bool warnReplaceTypeSymbol = true;
for (auto row : atom_site_anisotrop)
{
auto parents = atom_site_anisotrop.get_parents(row, atom_site);
Expand All @@ -512,6 +513,12 @@ void checkAtomAnisotropRecords(datablock &db)

if (row["type_symbol"].empty())
row["type_symbol"] = parent["type_symbol"].text();
else if (row["type_symbol"].text() != parent["type_symbol"].text())
{
if (cif::VERBOSE and std::exchange(warnReplaceTypeSymbol, false))
std::clog << "Replacing type_symbol in atom_site_anisotrop record(s)\n";
row["type_symbol"] != parent["type_symbol"].text();
}

if (row["pdbx_auth_alt_id"].empty())
row["pdbx_auth_alt_id"] = parent["pdbx_auth_alt_id"].text();
Expand Down Expand Up @@ -1019,9 +1026,6 @@ bool reconstruct_pdbx(file &file, std::string_view dictionary)
// Now see if atom records make sense at all
checkAtomRecords(db);

if (db.get("atom_site_anisotrop"))
checkAtomAnisotropRecords(db);

std::vector<std::string> invalidCategories;

// clean up each category
Expand Down Expand Up @@ -1244,6 +1248,9 @@ bool reconstruct_pdbx(file &file, std::string_view dictionary)

file.load_dictionary(dictionary);

if (db.get("atom_site_anisotrop"))
checkAtomAnisotropRecords(db);

// Now create any missing categories
// Next make sure we have struct_asym records
if (db.get("struct_asym") == nullptr)
Expand Down
22 changes: 17 additions & 5 deletions test/reconstruction-test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,24 @@ TEST_CASE("reconstruct")
{
std::cout << i->path() << '\n';

cif::file f(i->path());
if (i->path().extension() == ".pdb")
{
cif::file f = cif::pdb::read(i->path());

std::error_code ec;
CHECK_FALSE(cif::pdb::is_valid_pdbx_file(f, ec));
CHECK(ec != std::errc{});
std::error_code ec;

CHECK(cif::pdb::reconstruct_pdbx(f));
if (not cif::pdb::is_valid_pdbx_file(f, ec))
CHECK(cif::pdb::reconstruct_pdbx(f));
}
else
{
cif::file f(i->path());

std::error_code ec;
CHECK_FALSE(cif::pdb::is_valid_pdbx_file(f, ec));
CHECK(ec != std::errc{});

CHECK(cif::pdb::reconstruct_pdbx(f));
}
}
}

0 comments on commit 12ee4a7

Please sign in to comment.