Skip to content

Commit b006328

Browse files
Merge pull request #17 from volkamerlab/cadd_exercise_update
cleaning cadd exercise notebook
2 parents 75d0f1e + 1d2f85d commit b006328

File tree

1 file changed

+13
-30
lines changed

1 file changed

+13
-30
lines changed

week2_session1_cadd_exercise.ipynb

+13-30
Original file line numberDiff line numberDiff line change
@@ -184,10 +184,9 @@
184184
"outputs": [],
185185
"source": [
186186
"# Read activity data for EGFR into a pandas dataframe named df\n",
187+
"egfr_chembl25_link = 'https://github.com/volkamerlab/ai_in_medicine/raw/master/data/egfr_chembl25.csv'\n",
187188
"#################### <-- insert code below\n",
188-
"df = pd.read_csv('https://github.com/volkamerlab/ai_in_medicine/raw/update-2021.02/data/egfr_chembl25.csv')\n",
189-
"display(df.head())\n",
190-
"df.info()\n",
189+
"\n",
191190
"#################### <-- insert code above"
192191
]
193192
},
@@ -257,8 +256,7 @@
257256
"source": [
258257
"# Write the smiles for acetylsalicylic acid\n",
259258
"####################\n",
260-
"aspirin = Chem.MolFromSmiles('C1=C-C(C(O)=O)=C(OC(=O)C)-C=C1')\n",
261-
"aspirin\n",
259+
"\n",
262260
"####################"
263261
]
264262
},
@@ -379,9 +377,7 @@
379377
"source": [
380378
"# Mark every molecule as active with an IC50 < 500\n",
381379
"####################\n",
382-
"df['active'] = np.zeros(len(df))\n",
383-
"df.loc[df[df['IC50[nM]'] < 500].index, 'active'] = 1.0\n",
384-
"display(df.head())\n",
380+
"\n",
385381
"####################"
386382
]
387383
},
@@ -421,7 +417,7 @@
421417
"x, y = df['maccs'].to_list(), df['active'].to_list()\n",
422418
"# Split the features and labels into training and test sets\n",
423419
"####################\n",
424-
"x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)\n",
420+
"\n",
425421
"####################"
426422
]
427423
},
@@ -442,8 +438,7 @@
442438
"source": [
443439
"# train model\n",
444440
"####################\n",
445-
"svc = SVC(probability=True)\n",
446-
"svc.fit(x_train, y_train)\n",
441+
"\n",
447442
"####################"
448443
]
449444
},
@@ -465,7 +460,7 @@
465460
"source": [
466461
"# predict the activity of the test set\n",
467462
"####################\n",
468-
"y_test_pred = svc.predict(x_test)\n",
463+
"\n",
469464
"####################"
470465
]
471466
},
@@ -477,8 +472,7 @@
477472
"source": [
478473
"# calculate AUC\n",
479474
"####################\n",
480-
"svc_roc_auc = roc_auc_score(y_test, y_test_pred)\n",
481-
"print('AUC:', svc_roc_auc)\n",
475+
"\n",
482476
"####################"
483477
]
484478
},
@@ -490,16 +484,7 @@
490484
"source": [
491485
"# plot the ROC curve\n",
492486
"####################\n",
493-
"fpr, tpr, thresholds = roc_curve(y_test, svc.predict_proba(x_test)[:,1])\n",
494-
"plt.plot(fpr, tpr, label=f'SVC - AUC={round(svc_roc_auc,2)}')\n",
495-
"plt.xlim([-0.05, 1.05])\n",
496-
"plt.ylim([-0.05, 1.05])\n",
497-
"plt.plot([0, 1], [0, 1], linestyle='--', label='Random', lw=2, color=\"black\") # Random curve\n",
498-
"plt.xlabel('False positive rate', size=12)\n",
499-
"plt.ylabel('True positive rate', size=12)\n",
500-
"plt.tick_params(labelsize=12)\n",
501-
"plt.legend(fontsize=12)\n",
502-
"plt.show()\n",
487+
"\n",
503488
"####################"
504489
]
505490
},
@@ -519,10 +504,9 @@
519504
"outputs": [],
520505
"source": [
521506
"# load data and assign maccs keys \n",
507+
"egfr_candidates_link = 'https://github.com/volkamerlab/ai_in_medicine/raw/master/data/egfr_candidates.csv'\n",
522508
"####################\n",
523-
"df2 = pd.read_csv('https://github.com/volkamerlab/ai_in_medicine/raw/update-2021.02/data/egfr_candidates.csv')\n",
524-
"add_mols_and_maccs(df2)\n",
525-
"display(df2)\n",
509+
"\n",
526510
"####################"
527511
]
528512
},
@@ -534,8 +518,7 @@
534518
"source": [
535519
"# predict the activity\n",
536520
"####################\n",
537-
"y_pred_svc = svc.predict(df2['maccs'].tolist())\n",
538-
"display(y_pred_svc)\n",
521+
"\n",
539522
"####################"
540523
]
541524
},
@@ -563,7 +546,7 @@
563546
"name": "python",
564547
"nbconvert_exporter": "python",
565548
"pygments_lexer": "ipython3",
566-
"version": "3.8.3"
549+
"version": "3.9.1"
567550
}
568551
},
569552
"nbformat": 4,

0 commit comments

Comments
 (0)