Skip to content

Commit

Permalink
filter ontology in barplot
Browse files Browse the repository at this point in the history
  • Loading branch information
caleb-easterly committed Apr 1, 2019
1 parent f7399a0 commit 53f705d
Show file tree
Hide file tree
Showing 10 changed files with 203 additions and 24 deletions.
6 changes: 3 additions & 3 deletions metaquantome/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,9 @@ def parse_args_cli():
' heatmap and PCA plot, so the colors can be tweaked to match. ')
bar.add_argument('--target_rank',
help="(Tax bar and FT dist). Taxonomic rank to restrict to in the plot. ")
bar.add_argument('--target_onto', choices=["mf", "bp", "cc"],
help="(Function and FT dist bar only) " +
"Ontology to restrict to, for function distribution.")
bar.add_argument("--whichway", choices=["f_dist", "t_dist"],
help="(FT dist only) " +
"Which distribution - functional distribution for a taxon (f_dist) or " +
Expand All @@ -254,9 +257,6 @@ def parse_args_cli():
bar.add_argument("--id",
help="(FT dist bar only) " +
"Taxonomic or functional term id - either a NCBI taxID or a GO term id (GO:XXXXXXX)")
bar.add_argument('--target_onto', choices=["mf", "bp", "cc"],
help="(FT dist bar only) " +
"Ontology to restrict to, for function distribution.")

volc = parser_viz.add_argument_group('Volcano Plot')
volc.add_argument('--fc_name',
Expand Down
86 changes: 86 additions & 0 deletions metaquantome/data/test/eggnog_out.tab

Large diffs are not rendered by default.

21 changes: 21 additions & 0 deletions metaquantome/data/test/eggnog_viz_file.tab
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
id name namespace NS_mean WS_mean int737NS int737WS int852NS int852WS int867NS int867WS int737NS_n_peptide int737WS_n_peptide int852NS_n_peptide int852WS_n_peptide int867NS_n_peptide int867WS_n_peptide int737NS_n_samp_children int737WS_n_samp_children int852NS_n_samp_children int852WS_n_samp_children int867NS_n_samp_children int867WS_n_samp_children
GO:0008152 metabolic process biological_process 9707895600.63897 34.4676527340745 32.7280807192344 33.639915914834 33.8500846328958 35.3680642871384 32.6033492340827 33.6566592988174 1837 1837 1837 1837 1837 1837 3 3 3 3 3 3
GO:0009058 biosynthetic process biological_process 3977879569.3907 33.2068587142223 31.5038426691202 32.3344633958136 32.5457060492145 34.154051958603 31.2863832807592 32.2790138218569 576 576 576 576 576 576 NA NA NA NA NA NA
GO:0006807 nitrogen compound metabolic process biological_process 3333526855.75693 33.089118314848 31.3274682605914 32.2651614156765 32.2393712217206 33.9854328532233 31.0581331702586 32.2877493545852 763 763 763 763 763 763 NA NA NA NA NA NA
GO:0006412 translation biological_process 2440026495.81359 32.4930473371394 30.8105936229541 31.6128211401251 31.846608199857 33.4455891939318 30.5527566639519 31.5536159837728 336 336 336 336 336 336 NA NA NA NA NA NA
GO:0006950 response to stress biological_process 1052880100.76931 31.3981405492947 29.6095404092094 30.439676321524 30.5689283794754 32.393517103388 29.4759257423614 30.3742688501823 124 124 124 124 124 124 NA NA NA NA NA NA
GO:0042221 response to chemical biological_process 965521061.018174 30.8296096443541 29.4808523042029 29.9278611878217 30.4629201331805 31.8156195431379 29.3138771113971 29.7828763814873 85 85 85 85 85 85 NA NA NA NA NA NA
GO:0044710 metabolic process biological_process 928705245.346089 30.6072989115392 29.3036916765872 30.0343848802313 30.4615672319471 31.2433837819293 29.2644431642638 30.237026652397 225 225 225 225 225 225 3 3 3 3 3 3
GO:0071840 cellular component organization or biogenesis biological_process 833223817.553338 31.5724843198215 29.5524724554893 31.0024286599174 30.1336470253456 32.3466932720115 28.99316994804 30.8738545987975 357 357 357 357 357 357 NA NA NA NA NA NA
GO:0005975 carbohydrate metabolic process biological_process 379644473.58704 30.2823742633555 27.4581777552516 28.9388737660123 29.2889137336267 31.4249235572303 28.1538245092415 28.9540435785897 76 76 76 76 76 76 NA NA NA NA NA NA
GO:0006091 generation of precursor metabolites and energy biological_process 373375658.545852 27.5844108414961 26.7198540418534 27.2161871812721 29.5462583955409 27.603990048666 27.749246320597 27.8613248072876 30 30 30 30 30 30 NA NA NA NA NA NA
GO:0006629 lipid metabolic process biological_process 346873604.466979 28.4302522117954 27.5680638918355 28.0217901314741 29.1700984804642 28.9561582409451 27.8238760468995 28.120421411102 37 37 37 37 37 37 NA NA NA NA NA NA
GO:0065003 protein-containing complex assembly biological_process 223362430.986751 29.4067502060857 27.7185066355636 29.0195327862999 28.1835372517938 30.0898188984621 27.1053181932927 28.7398209542951 83 83 83 83 83 83 NA NA NA NA NA NA
GO:0044711 biosynthetic process biological_process 220756695.513963 28.7739551105881 27.1093723370704 28.4003539740075 28.4104810597675 29.3934280879723 27.2597605688579 28.2424502003084 53 53 53 53 53 53 NA NA NA NA NA NA
GO:0044723 carbohydrate metabolic process biological_process 182807836.125096 28.93667511454 26.448399628704 27.9008315674069 28.2396393635986 29.9843751125869 27.0599307314212 27.7686749761679 34 34 34 34 34 34 NA NA NA NA NA NA
GO:0019222 regulation of metabolic process biological_process 173056856.95311 28.9389001060965 27.0494629499546 28.2721701007768 27.836284601413 29.6501969737214 27.0673584710354 28.4903755600971 69 69 69 69 69 69 NA NA NA NA NA NA
GO:0006810 transport biological_process 140537615.614438 28.7939728267842 27.1234873806423 27.9615606778428 27.4144285386699 29.5408914880366 26.524314216561 28.3984348617235 88 88 88 88 88 88 NA NA NA NA NA NA
GO:0016032 viral process biological_process 111058995.875521 30.4491645013834 26.3289762021563 29.2487087831025 27.1434558706649 31.5367058114516 26.5844879797392 29.263908857592 48 48 48 48 48 48 NA NA NA NA NA NA
GO:0016070 RNA metabolic process biological_process 91888884.7293597 28.1369040778828 26.1677866053689 27.693875726079 26.8519757597805 28.5989127651248 26.2381122887607 27.9638332212402 81 81 81 81 81 81 NA NA NA NA NA NA
GO:0006461 protein-containing complex assembly biological_process 88525220.719722 27.5427325704944 26.2738037562703 27.2967939297801 27.0298059946766 27.9798815285066 25.4990572646483 27.2258576018008 40 40 40 40 40 40 NA NA NA NA NA NA
GO:0006457 protein folding biological_process 75079182.3365918 29.268835134676 25.7050718748645 28.2845189662675 26.7348727386628 30.2745828042056 25.8105457595204 28.2294977490008 32 32 32 32 32 32 NA NA NA NA NA NA
Binary file added metaquantome/data/test/test_eggnog_viz.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 3 additions & 0 deletions metaquantome/data/test/viz_f_filt.tab
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id name namespace NS_mean WS_mean int737NS int737WS int852NS int852WS int867NS int867WS int737NS_n_peptide int737WS_n_peptide int852NS_n_peptide int852WS_n_peptide int867NS_n_peptide int867WS_n_peptide int737NS_n_samp_children int737WS_n_samp_children int852NS_n_samp_children int852WS_n_samp_children int867NS_n_samp_children int867WS_n_samp_children
GO:0003824 catalytic activity molecular_function 1385407584.13192 31.4539840896763 29.8549354693473 30.8881079072179 31.0695559374115 32.1062472477587 29.7951982154145 31.044367271616 460 460 460 460 460 460 6 6 6 6 6 6
GO:0003735 structural constituent of ribosome molecular_function 1326838449.29867 31.821379782899 29.9285877846662 30.9700260743199 30.9954165750759 32.7495445116799 29.607449636552 30.9400897387871 223 223 223 223 223 223 NA NA NA NA NA NA
2 changes: 1 addition & 1 deletion metaquantome/modules/run_viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def run_viz(plottype, img, infile, strip=None,
r_script_path = os.path.join(BASE_DIR, 'modules', 'viz.R')
cmd = ['Rscript', '--vanilla', r_script_path, plottype, img, infile]
if plottype == "bar":
cmd += [mode, meancol, nterms, width, height, target_rank, barcol, tabfile]
cmd += [mode, meancol, nterms, width, height, target_rank, target_onto, barcol, tabfile]
elif plottype == "volcano":
cmd += [str(textannot), fc_name, flip_fc, gosplit, width, height, tabfile]
elif plottype == "heatmap":
Expand Down
51 changes: 34 additions & 17 deletions metaquantome/modules/viz.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,31 @@ check_ranks <- function(df, target_rank) {
}
}

short_onto_to_long <- c("bp" = "biological_process",
"mf"="molecular_function",
"cc"="cellular_component")

filter_to_desired_onto <- function(df, target_onto) {
if (is.null(target_onto)) {
stop('must provide target ontology for function or function-taxonomy mode',
call. = FALSE)
}
long_onto <- short_onto_to_long[target_onto]
df <- df[df[, "namespace"] == long_onto, ]
# also, remove any bp, cc, or mf rows
df <- df[!(df[, "name"] %in% short_onto_to_long), ]
if (nrow(df) == 0) {
stop(paste0("No terms in the dataframe come from the desired ontology (", target_onto, ")"))
}
return(df)
}

####### ==================== #######
# BARPLOT #
####### ==================== #######
mq_barplot <- function(df, img, mode, meancol,
nterms, width, height, target_rank, int_barcol,
tabfile){
nterms, width, height, target_rank, target_onto,
int_barcol, tabfile){
if (!(meancol %in% names(df))){
stop('Mean column name not found in dataframe. Check spelling and try again.',
call. = FALSE)
Expand All @@ -114,8 +133,12 @@ mq_barplot <- function(df, img, mode, meancol,
# filter
df <- df[df[, "rank"] == target_rank, ]
}
if (mode == "f") {
df <- filter_to_desired_onto(df, target_onto)
}
# exponentiate
df[, meancol] <- 2^df[, meancol]

# reorder, for taking the top N terms
reord <- df[order(df[, meancol], decreasing=TRUE), ]
# take top N terms or number of rows, whichever is less
Expand All @@ -132,7 +155,7 @@ mq_barplot <- function(df, img, mode, meancol,

ggplot(sub_reord) +
geom_bar(aes_(x = reorder(sub_reord[, barnamecol], -sub_reord[, meancol]),
y = as.name(meancol)), stat = "identity", fill = barcol, col = "black") +
y = as.name(meancol)), stat = "identity", fill = barcol, col = "black", position = "dodge") +
theme_bw() +
labs(x = xlab, y = "Total Peptide Intensity") +
theme(axis.text.x = element_text(angle = X_AXIS_ROT, hjust = 1))
Expand Down Expand Up @@ -166,14 +189,18 @@ barplot_cli <- function(args){
width <- as.numeric(args[7])
height <- as.numeric(args[8])
target_rank <- args[9]
barcol <- as.numeric(args[10])
tabfile <- args[11]
if (target_rank == "None") target_rank <- NULL
target_onto <- args[10]
if (target_onto == "None") target_onto <- NULL
barcol <- as.numeric(args[11])
tabfile <- args[12]
if (tabfile == "None") tabfile <- NULL
plt <- mq_barplot(df, img=img, mode=mode,
meancol=meancol,
nterms=nterms,
height=height, width=width,
target_rank=target_rank, int_barcol=barcol,
target_rank=target_rank, target_onto = target_onto,
int_barcol=barcol,
tabfile=tabfile)
}

Expand Down Expand Up @@ -475,10 +502,6 @@ volcano_cli <- function(args){
####### ==================== #######
# FT DIST #
####### ==================== #######
short_onto_to_long <- c("bp" = "biological_process",
"mf"="molecular_function",
"cc"="cellular_component")

mq_ft_dist <- function(df, img, whichway, name, id, meancol,
nterms, width, height, target_rank, target_onto,
int_barcol, tabfile){
Expand Down Expand Up @@ -515,13 +538,7 @@ mq_ft_dist <- function(df, img, whichway, name, id, meancol,
check_ranks(df, target_rank)
df <- df[df[, "rank"] == target_rank, ]
} else if (whichway == "f_dist"){
long_onto <- short_onto_to_long[target_onto]
df <- df[df[, "namespace"] == long_onto, ]
# also, remove any bp, cc, or mf rows
df <- df[!(df[, "name"] %in% short_onto_to_long), ]
if (nrow(df) == 0) {
stop(paste0("No terms in the dataframe come from the desired ontology (", target_onto, ")"))
}
df <- filter_to_desired_onto(df, target_onto)
} else {
stop("Wrong whichway - should be t_dist or f_dist.")
}
Expand Down
16 changes: 16 additions & 0 deletions tests/travis/testCLI.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,22 @@ def testVizTabfile(self):
self.assertEqual(b'3', nline.stdout.strip().split()[0])
os.remove(tabfile)

def testFuncBar(self):
infile = testfile('eggnog_out.tab')
imgfile = testfile('test_eggnog_viz.png')
samps = testfile('rudney_samples.tab')
tabfile = testfile("eggnog_viz_file.tab")
cmd = ' '.join([
'python3 metaquantome/cli.py viz -m f --plottype bar '
'--infile', infile,
'--img', imgfile,
'--samps', samps,
'--nterms 20 --meancol NS_mean --target_onto bp',
'--tabfile', tabfile
])
test_status = subprocess.call(cmd, shell=True)
self.assertEqual(test_status, 0)

def testHeatmapViz(self):
infile = testfile('ec_ttest_tested.tab')
imgfile = testfile('cli_heatmap_viz.png')
Expand Down
7 changes: 4 additions & 3 deletions tests/travis/testExpand.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ def testSingleInt(self):
self.assertEqual(go_df.loc["GO:0008152"]['int'], np.log2(100))

def testMultipleInt(self):
func=testfile('multiple_func.tab')
int=testfile('multiple_int.tab')
func = testfile('multiple_func.tab')
int = testfile('multiple_int.tab')
go_df = expand.expand('f', sinfo='{"s1": ["int1", "int2", "int3"]}', int_file=int, pep_colname_int='peptide',
pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, func_file=func,
func_colname='go', ontology='go')
Expand All @@ -84,10 +84,11 @@ def testNopep(self):
def testSlimDown(self):
func=testfile('func_eggnog.tab')
int=testfile('int_eggnog.tab')
outfile=testfile('eggnog_out.tab')
sinfo='{"NS": ["int737NS", "int852NS", "int867NS"], "WS": ["int737WS", "int852WS", "int867WS"]}'
go_df = expand.expand('f', sinfo=sinfo, int_file=int, pep_colname_int='peptide', pep_colname_func='peptide',
pep_colname_tax='peptide', data_dir=TEST_DIR, func_file=func, func_colname='go', ontology='go',
slim_down=True)
slim_down=True, outfile=outfile)
# test that all go terms are in slim
# load slim
returned_gos = set(go_df['id'])
Expand Down
35 changes: 35 additions & 0 deletions tests/travis/testRunViz.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import unittest
import os
import pandas as pd

from metaquantome.util.testutils import testfile, TTEST_SINFO
from metaquantome.modules.run_viz import run_viz
Expand All @@ -21,6 +22,33 @@ def testBasicTaxBar(self):
nterms='2', meancol='samp1_mean',
target_rank="genus", barcol="6")

def testFuncBar(self):
infile = testfile('eggnog_out.tab')
tabfile = testfile('viz_f_filt.tab')
# bp
run_viz('bar', self.img, infile, mode='f',
nterms='2', meancol='NS_mean',
target_onto='bp', barcol='1', tabfile=tabfile)
df = pd.read_csv(tabfile, sep='\t')
namespace = df.namespace.unique()
self.assertEqual(namespace, ['biological_process'])

# cc
run_viz('bar', self.img, infile, mode='f',
nterms='2', meancol='NS_mean',
target_onto='cc', barcol='1', tabfile=tabfile)
df = pd.read_csv(tabfile, sep='\t')
namespace = df.namespace.unique()
self.assertEqual(namespace, ['cellular_component'])

# mf
run_viz('bar', self.img, infile, mode='f',
nterms='2', meancol='NS_mean',
target_onto='mf', barcol='1', tabfile=tabfile)
df = pd.read_csv(tabfile, sep='\t')
namespace = df.namespace.unique()
self.assertEqual(namespace, ['molecular_function'])

def testVolcano(self):
infile = testfile('cli_mult_test_out.tab')
run_viz('volcano', self.img, infile,
Expand Down Expand Up @@ -70,6 +98,13 @@ def testFtDist(self):
target_rank="genus",
nterms="all")

# f dist
run_viz('ft_dist', self.img, infile,
meancol="s1_mean",
whichway='f_dist',
target_onto="bp",
nterms="all", id=209)

# test tabfile
tabfile = testfile("tmp")
run_viz('ft_dist', self.img, infile,
Expand Down

0 comments on commit 53f705d

Please sign in to comment.