#https://www.mdanalysis.org/pmda/api/hbond_analysis.html
import pickle
import numpy as np
np.set_printoptions(linewidth=100)
import pandas as pd

import matplotlib.pyplot as plt

import MDAnalysis as mda
from MDAnalysis.tests.datafiles import waterPSF, waterDCD
from pmda.hbond_analysis import HydrogenBondAnalysis

# the next line is necessary to display plots in Jupyter
%matplotlib inline
u = mda.Universe(waterPSF, waterDCD)

hbonds = HydrogenBondAnalysis(
    universe=u,
    donors_sel=None,
    hydrogens_sel="name H1 H2",
    acceptors_sel="name OH2",
    d_a_cutoff=3.0,
    d_h_a_angle_cutoff=150,
    update_selections=False
)

hbonds.run(
    start=None,
    stop=None,
    step=None
)

# We see there are 27 hydrogen bonds in total
print(hbonds.hbonds.shape)

print(hbonds.hbonds[0])

first_hbond = hbonds.hbonds[0]

frame, donor_ix, hydrogen_ix, acceptor_ix = first_hbond[:4].astype(int)

# select the correct frame and the atoms involved in the hydrogen bond
u.trajectory[frame]
atoms = u.atoms[[donor_ix, hydrogen_ix, acceptor_ix]]

plt.plot(hbonds.times, hbonds.count_by_time(), lw=2)

plt.title("Number of hydrogon bonds over time", weight="bold")
plt.xlabel("Time (ps)")
plt.ylabel(r"$N_{HB}$")

plt.show()

hbonds.count_by_type()

for donor, acceptor, count in hbonds.count_by_type():

    donor_resname, donor_type = donor.split(":")
    n_donors = u.select_atoms(f"resname {donor_resname} and type {donor_type}").n_atoms

    # average number of hbonds per donor molecule per frame
    mean_count = 2 * int(count) / (hbonds.n_frames * n_donors)  # multiply by two as each hydrogen bond involves two water molecules
    print(f"{donor} to {acceptor}: {mean_count:.2f}")
hbonds.count_by_type()

hbonds.count_by_ids()

counts = hbonds.count_by_ids()
most_common = counts[0]

print(f"Most common donor: {u.atoms[most_common[0]]}")
print(f"Most common hydrogen: {u.atoms[most_common[1]]}")
print(f"Most common acceptor: {u.atoms[most_common[2]]}")

# bins in z for the histogram
bin_edges = np.linspace(-25, 25, 51)
bin_centers = bin_edges[:-1] + 0.5

# results array (this is faster and more memory efficient than appending to a list)
counts = np.full(bin_centers.size, fill_value=0.0)

for frame, donor_ix, *_ in hbonds.results.hbonds:

    u.trajectory[frame.astype(int)]
    donor = u.atoms[donor_ix.astype(int)]

    zpos = donor.position[2]
    hist, *_ = np.histogram(zpos, bins=bin_edges)
    counts += hist * 2  # multiply by two as each hydrogen bond involves two water molecules

counts /= hbonds.n_frames

plt.plot(bin_centers, counts, lw=2)

plt.title(r"Number of hydrogen bonds as a funcion of height in $z$", weight="bold")
plt.xlabel(r"$z\ \rm (\AA)$")
plt.ylabel(r"$N_{HB}$")

plt.show()

mean_xy_area = np.mean(
    [np.product(ts.dimensions[:2]) for ts in u.trajectory[hbonds.frames]]
)
counts /= mean_xy_area

with open("hbonds.pkl", 'wb') as f:
    pickle.dump(hbonds, f)

with open("hbonds.pkl", 'rb') as f:
    hbonds = pickle.load(f)
    
np.save("hbonds.npy", hbonds.results.hbonds)

df = pd.DataFrame(hbonds.results.hbonds[:, :DISTANCE].astype(int),
                  columns=["Frame",
                           "Donor_ix",
                           "Hydrogen_ix",
                           "Acceptor_ix",])

df["Distances"] = hbonds.results.hbonds[:, DISTANCE]
df["Angles"] = hbonds.results.hbonds[:, ANGLE]

df["Donor resname"] = u.atoms[df.Donor_ix].resnames
df["Acceptor resname"] = u.atoms[df.Acceptor_ix].resnames
df["Donor resid"] = u.atoms[df.Donor_ix].resids
df["Acceptor resid"] = u.atoms[df.Acceptor_ix].resids
df["Donor name"] = u.atoms[df.Donor_ix].names
df["Acceptor name"] = u.atoms[df.Acceptor_ix].names

df.to_csv("hbonds.csv", index=False)