Skip to content

Commit

Permalink
Merge pull request #131 from atillack/simple_analysis
Browse files Browse the repository at this point in the history
Distance-based pose analysis
  • Loading branch information
atillack authored Apr 22, 2021
2 parents 00ebaf3 + c262395 commit 98d875d
Show file tree
Hide file tree
Showing 9 changed files with 860 additions and 168 deletions.
147 changes: 84 additions & 63 deletions host/inc/getparameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

#define LS_METHOD_STRING_LEN 8


typedef struct
{
double AD4_coeff_vdW;
Expand All @@ -63,69 +64,77 @@ constexpr AD4_free_energy_coeffs unbound_models[3] = {

// Struct which contains the docking parameters (partly parameters for fpga)
typedef struct _Dockpars
{ // default values
int devnum = -1;
int devices_requested = 1; // this is AD-GPU ...
uint32_t seed[3] = {(uint32_t)time(NULL),(uint32_t)processid(),0};
unsigned long num_of_energy_evals = 2500000;
unsigned long num_of_generations = 42000;
bool nev_provided = false;
bool use_heuristics = true; // Flag if we want to use Diogo's heuristics
unsigned long heuristics_max = 12000000; // Maximum number of evaluations under the heuristics (12M max evaluates to 80% of 3M evals calculated by heuristics -> 2.4M)
float abs_max_dmov; // depends on grid spacing
float abs_max_dang = 90; // +/- 90°
float mutation_rate = 2; // 2%
float crossover_rate = 80; // 80%
float tournament_rate = 60; // 60%
float lsearch_rate = 100; // 1000%
float smooth = 0.5f;
int nr_deriv_atypes = 0; // this is to support: -derivtype C1,C2,C3=C
deriv_atype* deriv_atypes = NULL; // or even: -derivtype C1,C2,C3=C/S4=S/H5=HD
int nr_mod_atype_pairs = 0; // this is to support: -modpair C1:S4,1.60,1.200,13,7
pair_mod* mod_atype_pairs = NULL; // or even: -modpair C1:S4,1.60,1.200,13,7/C1:C3,1.20 0.025
char ls_method[LS_METHOD_STRING_LEN] = "ad"; // "sw": Solis-Wets,
// "sd": Steepest-Descent
// "fire": FIRE, https://www.math.uni-bielefeld.de/~gaehler/papers/fire.pdf
// "ad": ADADELTA, https://arxiv.org/abs/1212.5701
// "adam": ADAM (currently only on Cuda)
int initial_sw_generations = 0;
float rho_lower_bound = 0.01; // 0.01;
float base_dmov_mul_sqrt3; // depends on grid spacing
float base_dang_mul_sqrt3 = 75.0*sqrt(3.0); // 75°
unsigned long cons_limit = 4;
unsigned long max_num_of_iters = 300;
unsigned long pop_size = 150;
char* load_xml = NULL;
bool xml2dlg = false;
unsigned int xml_files = 0;
bool dlg2stdout = false;
int gen_pdbs = 0;
char* dpffile = NULL;
char* fldfile = NULL;
char* ligandfile = NULL;
char* flexresfile = NULL;
char* xrayligandfile = NULL; // by default will be ligand file name
char* resname = NULL; // by default will be ligand file basename
bool given_xrayligandfile = false; // That is, not given (explicitly by the user)
bool autostop = true;
unsigned int as_frequency = 5;
float stopstd = 0.15;
bool cgmaps = false; // default is false (use a single map for every CGx or Gx atom type)
unsigned long num_of_runs = 20;
unsigned int list_nr = 0;
bool reflig_en_required = false;
int unbound_model = 0; // bound same as unbound, the coefficients
AD4_free_energy_coeffs coeffs = unbound_models[0]; // are also set in get_filenames_and_ADcoeffs()
float elec_min_distance = 0.01;
bool handle_symmetry = true;
bool gen_finalpop = false;
bool gen_best = false;
float qasp = 0.01097f;
float rmsd_tolerance = 2.0; // 2 Angstroem
float adam_beta1 = 0.9f;
float adam_beta2 = 0.999f;
float adam_epsilon = 1.0e-8f;
bool output_xml = true; // xml output file will be generated
{ // default values
int devnum = -1;
int devices_requested = 1; // this is AD-GPU ...
uint32_t seed[3] = {(uint32_t)time(NULL),(uint32_t)processid(),0};
unsigned long num_of_energy_evals = 2500000;
unsigned long num_of_generations = 42000;
bool nev_provided = false;
bool use_heuristics = true; // Flag if we want to use Diogo's heuristics
unsigned long heuristics_max = 12000000; // Maximum number of evaluations under the heuristics (12M max evaluates to 80% of 3M evals calculated by heuristics -> 2.4M)
float abs_max_dmov; // depends on grid spacing
float abs_max_dang = 90; // +/- 90°
float mutation_rate = 2; // 2%
float crossover_rate = 80; // 80%
float tournament_rate = 60; // 60%
float lsearch_rate = 100; // 1000%
float smooth = 0.5f;
int nr_deriv_atypes = 0; // this is to support: -derivtype C1,C2,C3=C
deriv_atype* deriv_atypes = NULL; // or even: -derivtype C1,C2,C3=C/S4=S/H5=HD
int nr_mod_atype_pairs = 0; // this is to support: -modpair C1:S4,1.60,1.200,13,7
pair_mod* mod_atype_pairs = NULL; // or even: -modpair C1:S4,1.60,1.200,13,7/C1:C3,1.20 0.025
char ls_method[LS_METHOD_STRING_LEN] = "ad"; // "sw": Solis-Wets,
// "sd": Steepest-Descent
// "fire": FIRE, https://www.math.uni-bielefeld.de/~gaehler/papers/fire.pdf
// "ad": ADADELTA, https://arxiv.org/abs/1212.5701
// "adam": ADAM (currently only on Cuda)
int initial_sw_generations = 0;
float rho_lower_bound = 0.01; // 0.01;
float base_dmov_mul_sqrt3; // depends on grid spacing
float base_dang_mul_sqrt3 = 75.0*sqrt(3.0); // 75°
unsigned long cons_limit = 4;
unsigned long max_num_of_iters = 300;
unsigned long pop_size = 150;
char* load_xml = NULL;
bool xml2dlg = false;
bool contact_analysis = false; // by default no distance-based contact analysis is performed
std::vector<ReceptorAtom> receptor_atoms;
unsigned int nr_receptor_atoms = 0;
unsigned int* receptor_map = NULL;
unsigned int* receptor_map_list = NULL;
float R_cutoff = 2.1;
float H_cutoff = 3.7;
float V_cutoff = 4.0;
unsigned int xml_files = 0;
bool dlg2stdout = false;
int gen_pdbs = 0;
char* dpffile = NULL;
char* fldfile = NULL;
char* ligandfile = NULL;
char* flexresfile = NULL;
char* xrayligandfile = NULL; // by default will be ligand file name
char* resname = NULL; // by default will be ligand file basename
bool given_xrayligandfile = false; // That is, not given (explicitly by the user)
bool autostop = true;
unsigned int as_frequency = 5;
float stopstd = 0.15;
bool cgmaps = false; // default is false (use a single map for every CGx or Gx atom type)
unsigned long num_of_runs = 20;
unsigned int list_nr = 0;
bool reflig_en_required = false;
int unbound_model = 0; // bound same as unbound, the coefficients
AD4_free_energy_coeffs coeffs = unbound_models[0]; // are also set in get_filenames_and_ADcoeffs()
float elec_min_distance = 0.01;
bool handle_symmetry = true;
bool gen_finalpop = false;
bool gen_best = false;
float qasp = 0.01097f;
float rmsd_tolerance = 2.0; // 2 Angstroem
float adam_beta1 = 0.9f;
float adam_beta2 = 0.999f;
float adam_epsilon = 1.0e-8f;
bool output_xml = true; // xml output file will be generated
} Dockpars;

inline bool add_deriv_atype(
Expand Down Expand Up @@ -186,6 +195,18 @@ int get_commandpars(
const bool late_call = true
);

std::vector<ReceptorAtom> read_receptor_atoms(
const char* receptor_name
);

std::vector<ReceptorAtom> read_receptor(
const char* receptor_name,
Gridinfo* mygrid,
unsigned int* &in_reach_map,
unsigned int* &atom_map_list,
double cutoff = 4.2
);

void read_xml_filenames(
char* xml_filename,
char* &dpf_filename,
Expand Down
78 changes: 67 additions & 11 deletions host/inc/processligand.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,15 @@ typedef struct
int num_of_rotbonds;
// true_ligand_rotbonds: Number of rotatable bonds in the ligand only.
int true_ligand_rotbonds;
// atom_names: Each row (first index) contain the ligand atom name
char atom_names [MAX_NUM_OF_ATOMS][5];
// atom_types: Each row (first index) contain an atom type (as two characters),
// the row index is equal to the atom type code.
char atom_types [MAX_NUM_OF_ATOMS][4]; // there can be at most as many types (base+derived) as there are atoms
// base_atom_types: Each row (first index) contain an atom base type (for derived types it'll be different from atom_types),
// the row index is equal to the atom type code.
char base_atom_types [MAX_NUM_OF_ATOMS][4];
char base_atom_names [MAX_NUM_OF_ATOMS][4];
// atom_map_to_fgrids: Maps each moving atom to a (pre-loaded) map id
int atom_map_to_fgrids [MAX_NUM_OF_ATOMS];
// atom_idxyzq: Each row describes one atom of the ligand.
Expand Down Expand Up @@ -132,8 +135,39 @@ typedef struct
// it is necessary.
double rotbonds_moving_vectors [MAX_NUM_OF_ROTBONDS][3];
double rotbonds_unit_vectors [MAX_NUM_OF_ROTBONDS][3];
// acceptor, donor indicates if a given atom is a Hydrogen acceptor or donor
bool acceptor [MAX_NUM_OF_ATOMS];
bool donor [MAX_NUM_OF_ATOMS];
bool reactive [MAX_NUM_OF_ATOMS]; // atoms with 1,4,7 numbered atom types
} Liganddata;

// structure to store relevant receptor atom data
// ATOM 1 N SER A 1 -2.367 4.481 -16.909 1.00 1.00 0.185 N
typedef struct
{
unsigned int id; // 1
char name[5]; // "N"
char res_name[4]; // "SER"
char chain_id[2]; // "A"
unsigned int res_id; // 1
float x,y,z; // -2.367, 4.481, -16.909
char atom_type[4]; // "N"
bool acceptor;
bool donor;
} ReceptorAtom;

typedef struct
{
unsigned int type; // 0 .. reactive, 1 .. hydrogen bond, 2 .. vdW
unsigned int lig_id; // ligand atom id
const char* lig_name; // ligand atom name
unsigned int rec_id; // receptor/flex res atom id
const char* rec_name; // receptor/flex res atom name
const char* residue; // residue name
unsigned int res_id; // residue id
const char* chain; // chain id
} AnalysisData;

int init_liganddata(
const char*,
const char*,
Expand Down Expand Up @@ -199,7 +233,12 @@ double calc_rmsd(const Liganddata*, const Liganddata*, const bool);

double calc_ddd_Mehler_Solmajer(double);

int is_H_bond(const char*, const char*);
bool is_H_acceptor(const char* atype);

bool is_H_bond(
const char* atype1,
const char* atype2
);

void print_ref_lig_energies_f(
Liganddata,
Expand Down Expand Up @@ -249,6 +288,18 @@ void change_conform(
int debug
);

std::vector<AnalysisData> analyze_ligand_receptor(
const Gridinfo* mygrid,
const Liganddata* myligand,
const ReceptorAtom* receptor_atoms,
const unsigned int* receptor_map,
const unsigned int* receptor_map_list,
float outofgrid_tolerance,
int debug,
float H_cutoff,
float V_cutoff
);

float calc_interE_f(
const Gridinfo* mygrid,
const Liganddata* myligand,
Expand Down Expand Up @@ -303,16 +354,21 @@ struct IntraTables{
};

float calc_intraE_f(
const Liganddata* myligand,
float dcutoff,
float smooth,
bool ignore_desolv,
const float elec_min_distance,
IntraTables& tables,
int debug,
float& interflexE,
int nr_mod_atype_pairs,
pair_mod* mod_atype_pairs
const Liganddata* myligand,
float dcutoff,
float smooth,
bool ignore_desolv,
const float elec_min_distance,
IntraTables& tables,
int debug,
float& interflexE,
int nr_mod_atype_pairs,
pair_mod* mod_atype_pairs,
std::vector<AnalysisData> *analysis = NULL,
const ReceptorAtom* flexres_atoms = NULL,
float R_cutoff = 2.1,
float H_cutoff = 3.7,
float V_cutoff = 4.2
);

int map_to_all_maps(
Expand Down
29 changes: 15 additions & 14 deletions host/inc/processresult.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,20 +39,21 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

typedef struct
{
float* genotype; // a pointer here is sufficient and saves lots of memory copies
Liganddata reslig_realcoord;
float interE;
float interflexE;
float interE_elec;
float intraE;
float intraflexE;
float peratom_vdw [MAX_NUM_OF_ATOMS];
float peratom_elec [MAX_NUM_OF_ATOMS];
float rmsd_from_ref;
float rmsd_from_cluscent;
int clus_id;
int clus_subrank;
int run_number;
float* genotype; // a pointer here is sufficient and saves lots of memory copies
Liganddata reslig_realcoord;
float interE;
float interflexE;
float interE_elec;
float intraE;
float intraflexE;
float peratom_vdw [MAX_NUM_OF_ATOMS];
float peratom_elec [MAX_NUM_OF_ATOMS];
float rmsd_from_ref;
float rmsd_from_cluscent;
int clus_id;
int clus_subrank;
int run_number;
std::vector<AnalysisData> analysis;
} Ligandresult;


Expand Down
2 changes: 1 addition & 1 deletion host/src/calcenergy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ int prepare_const_fields_for_gpu(
floatpoi = reqm_AB + i*myligand_reference->num_of_atypes + j;
*floatpoi = (float) myligand_reference->reqm_AB[i][j];

if ((is_H_bond(myligand_reference->base_atom_types[i], myligand_reference->base_atom_types[j]) != 0) &&
if (is_H_bond(myligand_reference->base_atom_types[i], myligand_reference->base_atom_types[j]) &&
(!is_mod_pair(myligand_reference->atom_types[i], myligand_reference->atom_types[j], mypars->nr_mod_atype_pairs, mypars->mod_atype_pairs)))
{
floatpoi = VWpars_AC + i*myligand_reference->num_of_atypes + j;
Expand Down
Loading

0 comments on commit 98d875d

Please sign in to comment.