Skip to content
This repository has been archived by the owner on Apr 19, 2023. It is now read-only.

Scrublet: ValueError: columns overlap but no suffix specified: Index(['nGene', 'nUMI'], dtype='object') [BUG] #353

Open
cbravo93 opened this issue Aug 9, 2021 · 1 comment
Labels
bug Something isn't working

Comments

@cbravo93
Copy link

cbravo93 commented Aug 9, 2021

Describe the bug

Error executing process > 'single_sample_scrublet:SCRUBLET__DOUBLET_REMOVAL:SC__SCRUBLET__DOUBLET_DETECTION_REPORT (1)'                                                                                                     
                                                                                                                                                                                                                            
Caused by:                                                                                                                                                                                                                  
  Process `single_sample_scrublet:SCRUBLET__DOUBLET_REMOVAL:SC__SCRUBLET__DOUBLET_DETECTION_REPORT (1)` terminated with an error exit status (1)                                                                            
                                                                                                                                                                                                                            
Command executed:                                                                                                                                                                                                           
                                                                                                                                                                                                                            
  papermill sc_doublet_detection_report.ipynb  changing     --report-mode k dir and showing the snare_mouse_cortex_rna.SC_Scrublet_doublet_detection_report.ipynb                       -p SCRUBLET_OBJECT_FILE snare_mouse_
cortex_rna.SC__SCRUBLET__DOUBLET_DETECTION.ScrubletObject.pklz             -p H5AD_WITH_SCRUBLET_INFO snare_mouse_cortex_rna.SCRUBLET.SC__ANNOTATE_BY_CELL_METADATA.h5ad             -p H5AD_WITH_DIM_RED snare_mouse_cortex
_rna.SC__SCANPY__PARAM_EXPLORE_MARKER_GENES.leiden_0.6.h5ad                     -p WORKFLOW_MANIFEST '{"nextflowVersion":"!20.04.1","defaultBranch":"master","version":"0.24.0","homePage":"https://github.com/vib-singlecel
l-nf/vsn-pipelines","gitmodules":null,"description":"A repository of pipelines for single-cell data in Nextflow DSL2","name":"vib-singlecell-nf/vsn-pipelines","mainScript":"main.nf","author":null}'                   -p W
ORKFLOW_PARAMETERS '{"global":{"project_name":"snare_mouse_cortex","outdir":"out","species":"mouse","genome":{"assembly":"mm10"},"seed":240},"misc":{"test":{"enabled":false},"manifestAsJSON":"{\"nextflowVersion\":\"!20.0
4.1\",\"defaultBranch\":\"master\",\"version\":\"0.24.0\",\"homePage\":\"https://github.com/vib-singlecell-nf/vsn-pipelines\",\"gitmodules\":null,\"description\":\"A repository of pipelines for single-cell data in Nextfl
ow DSL2\",\"name\":\"vib-singlecell-nf/vsn-pipelines\",\"mainScript\":\"main.nf\",\"author\":null}"},"utils":{"container":"vibsinglecellnf/utils:0.3.0","publish":{"compressionLevel":6,"annotateWithBatchVariableName":fals
e}},"sc":{"file_converter":{"off":"h5ad","tagCellWithSampleId":true,"useFilteredMatrix":true,"makeVarIndexUnique":false},"scanpy":{"container":"vibsinglecellnf/scanpy:0.5.2","report":{"annotations_to_plot":[]},"feature_s
election":{"report_ipynb":"/src/scanpy/bin/reports/sc_select_variable_genes_report.ipynb","method":"mean_disp_plot","minMean":0.0125,"maxMean":3,"minDisp":0.5,"off":"h5ad"},"feature_scaling":{"method":"zscore_scale","max
SD":10,"off":"h5ad"},"neighborhood_graph":{"off":"h5ad"},"dim_reduction":{"report_ipynb":"/src/scanpy/bin/reports/sc_dim_reduction_report.ipynb","pca":{"method":"pca","off":"h5ad"},"umap":{"method":"umap","off":"h5ad"},"
tsne":{"method":"tsne","off":"h5ad"}},"clustering":{"preflight_checks":true,"report_ipynb":"/src/scanpy/bin/reports/sc_clustering_report.ipynb","method":"leiden","resolutions":[0.3,0.6,0.9,1.2],"off":"h5ad"},"marker_gene
s":{"method":"wilcoxon","ngenes":0,"groupby":"leiden","off":"h5ad"},"filter":{"report_ipynb":"/src/scanpy/bin/reports/sc_filter_qc_report.ipynb","cellFilterStrategy":"fixedthresholds","cellFilterMinNCounts":800,"cellFilt
erMaxPercentMito":0.05,"geneFilterMinNCells":3,"off":"h5ad","outdir":"out"},"data_transformation":{"method":"log1p","off":"h5ad"},"normalization":{"method":"cpx","countsPerCellAfter":10000,"off":"h5ad"}},"scope":{"genome":"mm10","tree":{"level_1":"snare_mouse_cortex","level_2":"rna-vsn","level_3":""}},"scrublet":{"container":"vibsinglecellnf/scrublet:0.1.4","doublet_detection":{"report_ipynb":"/src/scrublet/bin/reports/sc_doublet_detection_report.ipynb","useVariableFeatures":"False","technology":"10x","off":"h5ad"},"cell_annotate":{"off":"h5ad","method":"obo","indexColumnName":"index"},"cell_filter":{"off":"h5ad","method":"internal","filters":[{"id":"NO_DOUBLETS","sampleColumnName":"sample_id","filterColumnName":"scrublet__predicted_doublets","valuesToKeepFromFilterColumn":["False"]}]}}},"data":{"loom":{"file_paths":"/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/SNARE_mouse/data/rna/snare_mouse_cortex_rna.loom","suffix":".loom"}},"pcacv":{"container":"vibsinglecellnf/pcacv:0.2.0","find_optimal_npcs":{"accessor":"@assays$RNA@scale.data"}}}'

Command exit status:
  1

Command output:
  (empty)

Command error:
    File "/opt/venv/lib/python3.7/site-packages/papermill/execute.py", line 222, in raise_for_execution_errors 
      raise error
  papermill.exceptions.PapermillExecutionError: 
  ---------------------------------------------------------------------------
  Exception encountered at "In [8]":
  ---------------------------------------------------------------------------
  ValueError                                Traceback (most recent call last)
  <ipython-input-8-7dd5d14d7bd7> in <module>
        3 )
                                                                                                                                           4 adata_dr.obs = adata_dr.obs.join(
  ----> 5     other=adata_sl_obs
        6 )
  
  /opt/venv/lib/python3.7/site-packages/pandas/core/frame.py in join(self, other, on, how, lsuffix, rsuffix, sort)
     7244         # For SparseDataFrame's benefit
     7245         return self._join_compat(
  -> 7246             other, on=on, how=how, lsuffix=lsuffix, rsuffix=rsuffix, sort=sort
     7247         )
     7248 
  
  /opt/venv/lib/python3.7/site-packages/pandas/core/frame.py in _join_compat(self, other, on, how, lsuffix, rsuffix, sort)
     7267                 right_index=True,
     7268                 suffixes=(lsuffix, rsuffix),
  -> 7269                 sort=sort,
     7270             )
     7271         else:
  
  /opt/venv/lib/python3.7/site-packages/pandas/core/reshape/merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
       81         validate=validate,
       82     )
  ---> 83     return op.get_result()
       84 
       85 
  
  /opt/venv/lib/python3.7/site-packages/pandas/core/reshape/merge.py in get_result(self)
      646 
      647         llabels, rlabels = _items_overlap_with_suffix(
  --> 648             ldata.items, lsuf, rdata.items, rsuf
      649         )
      650 
  
  /opt/venv/lib/python3.7/site-packages/pandas/core/reshape/merge.py in _items_overlap_with_suffix(left, lsuffix, right, rsuffix)
     2009         raise ValueError(
     2010             "columns overlap but no suffix specified: "
  -> 2011             "{rename}".format(rename=to_rename)
     2012         )
     2013 
  
  ValueError: columns overlap but no suffix specified: Index(['nGene', 'nUMI'], dtype='object')

Work dir:
  /lustre1/project/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/SNARE_mouse/output/rna/vsn/scrublet/work/1c/e18edb995c50de753d1a874642c070

Tip: you can try to figure out what's wrong by changing to the process work dir and showing the script file named `.command.sh`

To Reproduce
It is the first time I see this error. Something that may be relevant, this is the first time I run scrublet using loom as input.

Additional context
Add any other context about the problem here.

@cbravo93 cbravo93 added the bug Something isn't working label Aug 9, 2021
@cbravo93
Copy link
Author

cbravo93 commented Aug 9, 2021

UPDATE: Removing nGene and nUMI from the loom metadata works.

library(SCopeLoomR)
loom <- open_loom('/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/analysis/SNARE_mouse/data/rna/snare_mouse_cortex_rna.loom', mode='r+')
get_cell_annotation(loom)
remove_col_attr(loom, 'nGene')
remove_col_attr(loom, 'nUMI')
gmd<-get_global_meta_data(loom = loom)
gmd[['metrics']] <- NULL
update_global_meta_data(loom = loom, meta.data.json = rjson::toJSON(x = gmd))

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

1 participant