-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcalc_descriptors.py
72 lines (62 loc) · 1.84 KB
/
calc_descriptors.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import sys
from datetime import datetime
from multiprocessing import Pool
import multiprocessing as multi
from rdkit import Chem, rdBase
from rdkit.Chem import AllChem
from rdkit.ML.Descriptors import MoleculeDescriptors
rdBase.DisableLog('rdApp.warning')
def calc_desclist( smi ):
# MW, tPSA, AROM, HBA, HBD, ROTB, LogP, RING
desclist = ['MolWt', 'TPSA', 'NumAromaticRings', 'NumHAcceptors',\
'NumHDonors', 'NumRotatableBonds', 'MolLogP', 'RingCount']
calculator = MoleculeDescriptors.MolecularDescriptorCalculator( desclist )
#Calculation
res = []
try:
mol = Chem.MolFromSmiles(smi)
descs = calculator.CalcDescriptors(mol)
for num in range(0, len(desclist)):
if descs[num]:
res.append(str(descs[num]))
else:
res.append("")
resrow = "\t".join(res)
except Exception:
ex, ms, tb = sys.exc_info()
sys.stderr.write("ERROR : " + str(ms) + "\n")
return resrow
def run_calc_multi(inpfile, num_cpus=multi.cpu_count()):
with open(inpfile, 'r') as inF:
print("Calculation started")
counter = 0
smilist = []
for l in inF:
if counter != 0:
smi = l.rstrip()
smilist.append(smi)
counter += 1
print("Compounds processing :", len(smilist))
# Multiprocessing
p = Pool(num_cpus)
result = p.map(calc_desclist, smilist)
p.close()
# Output preparation
pref = inpfile.split(".")[0]
with open(pref + '_calc.tsv', 'w') as out:
out.write("SMILES MW TPSA AROM HBA " +\
"HBD ROTB LOGP RING\n")
for smi, line in zip(smilist, result):
out.write(smi + "\t" + line + "\n")
if __name__ == "__main__":
inF = sys.argv[1]
print(multi.cpu_count())
if inF:
for numcpu in [4, 2, 1]:
stime = datetime.now()
run_calc_multi(inF, numcpu)
etime = datetime.now()
delta = etime - stime
print("Num of CPUs: {}, calculation time: {}".format(numcpu, delta))
else:
print("Please specify the input file.")