-
Notifications
You must be signed in to change notification settings - Fork 2
/
visualizeHumann.py
87 lines (76 loc) · 4.21 KB
/
visualizeHumann.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from .visualizeFunction import VisualizeFunction
import os
import pandas as pd
import re
from Bayegy.ampliconLibs.libShell import lefse
# import pdb
class VisualizeHumann(VisualizeFunction):
""""""
def __init__(self, *args, regroup=False, prefix=False, id_with_name=False, custom_to=False, **kwargs):
super(VisualizeHumann, self).__init__(*args, **kwargs, prefix=prefix)
self.regroup = regroup
self.custom_to = custom_to
self.set_attr(save_colon='T' if id_with_name else 'F')
if regroup or custom_to:
ob = custom_to.upper() if custom_to else regroup.replace('uniref90_', '').upper()
self.set_attr(prefix=prefix or ob + '_',
pre_abundance_table="{}All.{}.abundance.tsv".format(
self.out_dir, ob)
)
self.system("{humann2_home}/humann2_regroup_table --input {abundance_table} {par} --output {pre_abundance_table}",
par="-c {}/map_{}_uniref90.txt.gz".format(self.humann2_utility_mapping, custom_to) if custom_to else '--groups ' + regroup)
else:
self.set_attr(pre_abundance_table=self.abundance_table)
self.system(
"{humann2_home}/humann2_split_stratified_table -i {pre_abundance_table} -o {out_dir}")
self.set_path(force=False, abundance_table="{}{}_unstratified.tsv".format(
self.out_dir, self.get_file_name(self.pre_abundance_table)))
df = pd.read_csv(self.abundance_table, sep='\t')
nrow, ncol = df.shape
if nrow < 2:
print("Warning: features number is less than 2")
self.valid = False
if id_with_name:
df['Description'] = df.iloc[:, 0]
df = df.loc[(i not in ['UNMAPPED', 'UNINTEGRATED',
'UniRef90_unknown', 'UNGROUPED'] for i in df.iloc[:, 0]), :]
if id_with_name:
df.iloc[:, 0] = [re.search('^[^:]*', i).group()
for i in df.iloc[:, 0]]
# pdb.set_trace()
df.to_csv(self.abundance_table, sep='\t', index=False)
def get_file_name(self, path):
return os.path.splitext(os.path.basename(path))[0]
def __visualize_with_group__(self, *args, **kwargs):
if not self.valid:
print("Warning: visualize function was passed!")
return
super(VisualizeHumann, self).__visualize_with_group__(*args, **kwargs)
categories = [g.strip() for g in re.split(',', self.categories)]
for g in categories:
print(g)
bar_out = "{}4-SignificanceAnalysis/LEfSe/SignificantFeatures/".format(
self.out_dir)
self.set_path(force=True, bar_out=bar_out)
humann2_ft_lefse_lda = '{}4-SignificanceAnalysis/LEfSe/{}{}_lefse_LDA2.LDA.txt'.format(
self.out_dir, self.prefix, g)
self.set_path(
force=False, humann2_ft_lefse_lda=humann2_ft_lefse_lda)
df = pd.read_csv(humann2_ft_lefse_lda, sep='\t',
header=None, index_col=0)[2]
df = df[df.notna()]
bar_table = "{}table_for_stratification_bar.txt".format(bar_out)
self.set_attr(bar_table=bar_table)
self.system("{R_path} {bayegy_home}/write_data_for_lefse.R -i {pre_abundance_table} -m {mapping_file} -c {category} -o {bar_table} -u f -j F -e {isenzyme} -n {save_colon}",
category=g, isenzyme=("T" if (self.custom_to == 'level4ec' or self.regroup == 'uniref90_level4ec') else "F"))
features = [re.search(r'^[^:\|]*', f).group().strip()
for f in pd.read_csv(self.bar_table, sep='\t', index_col=0).index if not f.find('|') == -1]
features = list(set(features))
with lefse():
# pdb.set_trace()
for f in df.index:
print(f)
# f = f.replace('_', '-')
if f in features:
self.system("{base_dir}/humann2_barplot --input {bar_table} -s metadata --focal-feature {f} --focal-metadatum {g} --last-metadatum {g} --output {bar_out}/{prefix}{f}_stratification_bar.pdf",
f=f, g=g)