|
184 | 184 | "outputs": [],
|
185 | 185 | "source": [
|
186 | 186 | "# Read activity data for EGFR into a pandas dataframe named df\n",
|
| 187 | + "egfr_chembl25_link = 'https://github.com/volkamerlab/ai_in_medicine/raw/master/data/egfr_chembl25.csv'\n", |
187 | 188 | "#################### <-- insert code below\n",
|
188 |
| - "df = pd.read_csv('https://github.com/volkamerlab/ai_in_medicine/raw/update-2021.02/data/egfr_chembl25.csv')\n", |
189 |
| - "display(df.head())\n", |
190 |
| - "df.info()\n", |
| 189 | + "\n", |
191 | 190 | "#################### <-- insert code above"
|
192 | 191 | ]
|
193 | 192 | },
|
|
257 | 256 | "source": [
|
258 | 257 | "# Write the smiles for acetylsalicylic acid\n",
|
259 | 258 | "####################\n",
|
260 |
| - "aspirin = Chem.MolFromSmiles('C1=C-C(C(O)=O)=C(OC(=O)C)-C=C1')\n", |
261 |
| - "aspirin\n", |
| 259 | + "\n", |
262 | 260 | "####################"
|
263 | 261 | ]
|
264 | 262 | },
|
|
379 | 377 | "source": [
|
380 | 378 | "# Mark every molecule as active with an IC50 < 500\n",
|
381 | 379 | "####################\n",
|
382 |
| - "df['active'] = np.zeros(len(df))\n", |
383 |
| - "df.loc[df[df['IC50[nM]'] < 500].index, 'active'] = 1.0\n", |
384 |
| - "display(df.head())\n", |
| 380 | + "\n", |
385 | 381 | "####################"
|
386 | 382 | ]
|
387 | 383 | },
|
|
421 | 417 | "x, y = df['maccs'].to_list(), df['active'].to_list()\n",
|
422 | 418 | "# Split the features and labels into training and test sets\n",
|
423 | 419 | "####################\n",
|
424 |
| - "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)\n", |
| 420 | + "\n", |
425 | 421 | "####################"
|
426 | 422 | ]
|
427 | 423 | },
|
|
442 | 438 | "source": [
|
443 | 439 | "# train model\n",
|
444 | 440 | "####################\n",
|
445 |
| - "svc = SVC(probability=True)\n", |
446 |
| - "svc.fit(x_train, y_train)\n", |
| 441 | + "\n", |
447 | 442 | "####################"
|
448 | 443 | ]
|
449 | 444 | },
|
|
465 | 460 | "source": [
|
466 | 461 | "# predict the activity of the test set\n",
|
467 | 462 | "####################\n",
|
468 |
| - "y_test_pred = svc.predict(x_test)\n", |
| 463 | + "\n", |
469 | 464 | "####################"
|
470 | 465 | ]
|
471 | 466 | },
|
|
477 | 472 | "source": [
|
478 | 473 | "# calculate AUC\n",
|
479 | 474 | "####################\n",
|
480 |
| - "svc_roc_auc = roc_auc_score(y_test, y_test_pred)\n", |
481 |
| - "print('AUC:', svc_roc_auc)\n", |
| 475 | + "\n", |
482 | 476 | "####################"
|
483 | 477 | ]
|
484 | 478 | },
|
|
490 | 484 | "source": [
|
491 | 485 | "# plot the ROC curve\n",
|
492 | 486 | "####################\n",
|
493 |
| - "fpr, tpr, thresholds = roc_curve(y_test, svc.predict_proba(x_test)[:,1])\n", |
494 |
| - "plt.plot(fpr, tpr, label=f'SVC - AUC={round(svc_roc_auc,2)}')\n", |
495 |
| - "plt.xlim([-0.05, 1.05])\n", |
496 |
| - "plt.ylim([-0.05, 1.05])\n", |
497 |
| - "plt.plot([0, 1], [0, 1], linestyle='--', label='Random', lw=2, color=\"black\") # Random curve\n", |
498 |
| - "plt.xlabel('False positive rate', size=12)\n", |
499 |
| - "plt.ylabel('True positive rate', size=12)\n", |
500 |
| - "plt.tick_params(labelsize=12)\n", |
501 |
| - "plt.legend(fontsize=12)\n", |
502 |
| - "plt.show()\n", |
| 487 | + "\n", |
503 | 488 | "####################"
|
504 | 489 | ]
|
505 | 490 | },
|
|
519 | 504 | "outputs": [],
|
520 | 505 | "source": [
|
521 | 506 | "# load data and assign maccs keys \n",
|
| 507 | + "egfr_candidates_link = 'https://github.com/volkamerlab/ai_in_medicine/raw/master/data/egfr_candidates.csv'\n", |
522 | 508 | "####################\n",
|
523 |
| - "df2 = pd.read_csv('https://github.com/volkamerlab/ai_in_medicine/raw/update-2021.02/data/egfr_candidates.csv')\n", |
524 |
| - "add_mols_and_maccs(df2)\n", |
525 |
| - "display(df2)\n", |
| 509 | + "\n", |
526 | 510 | "####################"
|
527 | 511 | ]
|
528 | 512 | },
|
|
534 | 518 | "source": [
|
535 | 519 | "# predict the activity\n",
|
536 | 520 | "####################\n",
|
537 |
| - "y_pred_svc = svc.predict(df2['maccs'].tolist())\n", |
538 |
| - "display(y_pred_svc)\n", |
| 521 | + "\n", |
539 | 522 | "####################"
|
540 | 523 | ]
|
541 | 524 | },
|
|
563 | 546 | "name": "python",
|
564 | 547 | "nbconvert_exporter": "python",
|
565 | 548 | "pygments_lexer": "ipython3",
|
566 |
| - "version": "3.8.3" |
| 549 | + "version": "3.9.1" |
567 | 550 | }
|
568 | 551 | },
|
569 | 552 | "nbformat": 4,
|
|
0 commit comments