Skip to content

Commit

Permalink
llama 405b exp 1-13
Browse files Browse the repository at this point in the history
  • Loading branch information
qcampbel committed Aug 20, 2024
1 parent 8373afe commit 7217c16
Show file tree
Hide file tree
Showing 13 changed files with 5,420 additions and 0 deletions.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,335 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"id": "8ff706fd-c267-4d05-af58-9a3848cce8ff",
"metadata": {},
"outputs": [],
"source": [
"from mdagent import MDAgent\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "c86c88a1-f5f8-473a-8342-7364252bcfba",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"date: 2024-08-19\n",
"time: 15:48:43\n"
]
}
],
"source": [
"#todays date and time\n",
"import datetime\n",
"\n",
"start = datetime.datetime.now()\n",
"date = start.strftime(\"%Y-%m-%d\")\n",
"print(\"date:\",date)\n",
"time = start.strftime(\"%H:%M:%S\")\n",
"print(\"time:\",time)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "f62bfc17-854b-4152-bb82-7e9e0ec4b854",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"LLM: accounts/fireworks/models/llama-v3p1-405b-instruct \n",
"Temperature: 0.1\n"
]
}
],
"source": [
"model=\"accounts/fireworks/models/llama-v3p1-405b-instruct\"\n",
"\n",
"agent = MDAgent(\n",
" agent_type='Structured', \n",
" model=model, \n",
" top_k_tools=\"all\"\n",
")\n",
"print(\"LLM: \", agent.llm.model_name,\"\\nTemperature: \", agent.llm.temperature)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "ad2f6f66-e6b8-4244-b908-68fbebbf6e8b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Thought: First, I need to download the PDB files for 8PFK and 8PFQ. Then, I can compare their secondary structures.\n",
"\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"PDBFileDownloader\",\n",
" \"action_input\": \"8PFK\"\n",
"}\n",
"```\n",
"PDB file found with this ID: 8PFK\n",
"Now that I have downloaded the PDB file for 8PFK, I need to download the PDB file for 8PFQ.\n",
"\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"PDBFileDownloader\",\n",
" \"action_input\": \"8PFQ\"\n",
"}\n",
"```\n",
"\n",
"(I will wait for the observation before proceeding)PDB file found with this ID: 8PFQ\n",
"Now that I have downloaded the PDB files for 8PFK and 8PFQ, I can compare their secondary structures.\n",
"\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"SummarizeProteinStructure\",\n",
" \"action_input\": {\n",
" \"traj_file\": \"8PFK_154847\",\n",
" \"top_file\": \"8PFK_154847\"\n",
" }\n",
"}\n",
"```\n",
"(I will wait for the observation before proceeding)Now that I have summarized the protein structure for 8PFK, I need to summarize the protein structure for 8PFQ.\n",
"\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"SummarizeProteinStructure\",\n",
" \"action_input\": {\n",
" \"traj_file\": \"8PFQ_154850\",\n",
" \"top_file\": \"8PFQ_154850\"\n",
" }\n",
"}\n",
"```Now that I have summarized the protein structures for 8PFK and 8PFQ, I can compare their secondary structures.\n",
"\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"ComputeDSSP\",\n",
" \"action_input\": {\n",
" \"traj_file\": \"8PFK_154847\",\n",
" \"top_file\": \"8PFK_154847\",\n",
" \"target_frames\": \"all\"\n",
" }\n",
"}\n",
"```\n",
"(I will wait for the observation before proceeding)Now that I have computed the DSSP for 8PFK, I need to compute the DSSP for 8PFQ.\n",
"\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"ComputeDSSP\",\n",
" \"action_input\": {\n",
" \"traj_file\": \"8PFQ_154850\",\n",
" \"top_file\": \"8PFQ_154850\",\n",
" \"target_frames\": \"all\"\n",
" }\n",
"}\n",
"```Now that I have computed the DSSP for 8PFK and 8PFQ, I can compare their secondary structures.\n",
"\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"Final Answer\",\n",
" \"action_input\": \"The PDB files for 8PFK and 8PFQ have been downloaded. The secondary structures of the two proteins have been compared. 8PFK has 487 atoms, 109 residues, 2 chains, and 420 bonds. 8PFQ has 950 atoms, 194 residues, 6 chains, and 833 bonds. Both proteins have no residues in helix, strand, or coil, and all residues are not assigned, not a protein residue.\"\n",
"}\n",
"```"
]
}
],
"source": [
"prompt11 = \"Download the PDB files for 8PFK and 8PFQ. Then, compare the secondary structures of the two proteins, including the number of atoms, secondary structures, number of chains, etc.\"\n",
"answer = agent.run(prompt11)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "d23fbcab-9ceb-46d5-ad3b-d6cf6687d1e4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"duration: 0.31 minutes\n"
]
}
],
"source": [
"end = datetime.datetime.now()\n",
"elapsed = end - start\n",
"print(f\"duration: {elapsed.total_seconds()/60:.2f} minutes\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "5d1f0c85-88d6-4d2a-aae1-6a2d763f280d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ckpt_92\n",
"Files found in registry: 8PFK_154847: PDB file downloaded from RSCB\n",
" PDBFile ID: 8PFK_154847\n",
" 8PFQ_154850: PDB file downloaded from RSCB\n",
" PDBFile ID: 8PFQ_154850\n",
" rec0_154856: dssp values for trajectory with id: 8PFK_154847\n",
" rec0_154859: dssp values for trajectory with id: 8PFQ_154850\n"
]
}
],
"source": [
"registry = agent.path_registry\n",
"print(os.path.basename(registry.ckpt_dir))\n",
"paths_and_descriptions = registry.list_path_names_and_descriptions()\n",
"print(\"\\n\".join(paths_and_descriptions.split(\",\")))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "2ab4e124-2086-46ab-b747-28f6aebb850e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"It is asserted that pdb files for 8PFK and 8PFQ exist\n"
]
}
],
"source": [
"import re\n",
"import os\n",
"match = re.search(rf\"8PFK_\\d+\", paths_and_descriptions)\n",
"file_id1 = match.group(0)\n",
"path1 = registry.get_mapped_path(file_id1)\n",
"assert os.path.exists(path1)\n",
"\n",
"match = re.search(rf\"8PFQ_\\d+\", paths_and_descriptions)\n",
"file_id2 = match.group(0)\n",
"path2 = registry.get_mapped_path(file_id2)\n",
"assert os.path.exists(path2)\n",
"print('It is asserted that pdb files for 8PFK and 8PFQ exist')"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "5c9a6ace-69e8-4042-9d35-ca598f4d00c2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"protein 8PFK\n",
"Number of chains: 2\n",
"Number of atoms: 487\n",
"Number of residues in sheets: 0\n",
"Number of residues in helices: 0\n",
"Number of residues in coils: 0\n",
"\n",
"protein 8PFQ\n",
"Number of chains: 6\n",
"Number of atoms: 950\n",
"Number of residues in sheets: 0\n",
"Number of residues in helices: 0\n",
"Number of residues in coils: 0\n"
]
}
],
"source": [
"import mdtraj as md\n",
"traj = md.load(path1)\n",
"top = traj.topology\n",
"number_of_chains = top.n_chains\n",
"number_of_atoms = top.n_atoms\n",
"print('protein 8PFK')\n",
"print('Number of chains: ', number_of_chains)\n",
"print('Number of atoms: ', number_of_atoms)\n",
"secondary_structure = md.compute_dssp(traj,simplified=True)\n",
"print(\"Number of residues in sheets: \",len([i for i in secondary_structure[0] if i == 'E']))\n",
"print(\"Number of residues in helices: \",len([i for i in secondary_structure[0] if i == 'H']))\n",
"print(\"Number of residues in coils: \",len([i for i in secondary_structure[0] if i == 'C']))\n",
"\n",
"traj = md.load(path2)\n",
"top = traj.topology\n",
"number_of_chains = top.n_chains\n",
"number_of_atoms = top.n_atoms\n",
"print('\\nprotein 8PFQ')\n",
"print('Number of chains: ', number_of_chains)\n",
"print('Number of atoms: ', number_of_atoms)\n",
"secondary_structure = md.compute_dssp(traj,simplified=True)\n",
"print(\"Number of residues in sheets: \",len([i for i in secondary_structure[0] if i == 'E']))\n",
"print(\"Number of residues in helices: \",len([i for i in secondary_structure[0] if i == 'H']))\n",
"print(\"Number of residues in coils: \",len([i for i in secondary_structure[0] if i == 'C']))"
]
},
{
"cell_type": "markdown",
"id": "541b835c",
"metadata": {},
"source": [
"# Experiment Result: ✅❌\n",
"\n",
"1. Completed without Exception or TimeOut Errors ✅\n",
"2. Attempted all necessary steps ✅\n",
"3. Completed without Hallucination ✅\n",
"4. Logic make sense ✅\n",
"5. Correct Answer ✅"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "05a624f4",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "mdagent",
"language": "python",
"name": "mdagent"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading

0 comments on commit 7217c16

Please sign in to comment.