-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathassign_anc.py
executable file
·68 lines (50 loc) · 1.4 KB
/
assign_anc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/env python3
import pandas as pd
import optparse
# Script originally developed by Yumi Sims (yy5@sanger.ac.uk)
parser = optparse.OptionParser(version="%prog 1.0")
parser.add_option(
"-l",
"--locationfile",
dest="locationfile",
default="default.locationfile",
)
parser.add_option(
"-f",
"--fulltable",
dest="fulltable",
default="default.fulltable",
)
parser.add_option(
"-c",
"--csvfile",
dest="csvfile",
default="default.csvfile",
)
options, remainder = parser.parse_args()
locationfile = options.locationfile
fulltable = options.fulltable
csvfile = options.csvfile
location = pd.read_csv(locationfile, sep="\t", comment="#")
full_table = pd.read_csv(fulltable, sep="\t", header=None, comment="#")
fulltable_colnames = [
"buscoID",
"Status",
"Sequence",
"Gene Start",
"Gene End",
"Strand",
"Score",
"Length",
"OrthoDB url",
"Description",
]
full_table.columns = fulltable_colnames
df = location.merge(full_table, on="buscoID")
df_a = df.loc[:, "Sequence":"Gene End"]
df_new = df_a.join(df[["assigned_chr"]]).join(df[["Score"]]).join(df[["Strand"]]).join(df[["OrthoDB url"]])
df_new.fillna("NA", inplace=True)
dfnoNa = df_new[df_new.Sequence != "NA"]
df_final = dfnoNa.reset_index(drop=True)
df_final = df_final.astype({"Gene End": "int", "Gene Start": "int"})
df_final.to_csv(csvfile, index=False, header=False, sep="\t")