Skip to content

Commit

Permalink
Check early if open file limit is too low, resolves #384
Browse files Browse the repository at this point in the history
  • Loading branch information
davidemms committed May 7, 2020
1 parent 7ed43e7 commit 94c1148
Show file tree
Hide file tree
Showing 5 changed files with 150 additions and 67 deletions.
62 changes: 54 additions & 8 deletions scripts_of/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1604,8 +1604,16 @@ def ProcessesNewFasta(fastaDir, speciesInfoObj_prev = None, speciesToUse_prev_na
speciesInfoObj.speciesToUse = speciesInfoObj.speciesToUse + newSpeciesIDs
speciesInfoObj.nSpAll = max(speciesInfoObj.speciesToUse) + 1 # will be one of the new species
return speciesInfoObj

def CheckOptions(options):

def DeleteDirectoryTree(d):
if os.path.exists(d):
try:
shutil.rmtree(d)
except OSError:
time.sleep(1)
shutil.rmtree(d, True)

def CheckOptions(options, speciesToUse):
"""Check any optional arguments are valid once we know what species are in the analysis
- user supplied species tree
"""
Expand All @@ -1619,7 +1627,45 @@ def CheckOptions(options):
if options.qStopAfterAlignments and (not options.qMSATrees):
print("ERROR: Must use '-M msa' option to generate sequence files and infer multiple sequence alignments for orthogroups")
util.Fail()


# check can open enough files
n_extra = 50
q_do_orthologs = not any((options.qStopAfterPrepare, options.qStopAfterGroups, options.qStopAfterSeqs, options.qStopAfterAlignments, options.qStopAfterTrees))
if q_do_orthologs and not options.qStartFromTrees:
n_sp = len(speciesToUse)
wd = files.FileHandler.GetWorkingDirectory_Write()
wd_files_test = wd + "Files_test/"
fh = []
try:
if not os.path.exists(wd_files_test):
os.mkdir(wd_files_test)
for i_sp in range(n_sp):
di = wd_files_test + "Sp%d/" % i_sp
if not os.path.exists(di):
os.mkdir(di)
for j_sp in range(n_sp):
fnij = di + "Sp%d.txt" % j_sp
fh.append(open(fnij, 'w'))
# create a few extra files to be safe
for i_extra in range(n_extra):
fh.append(open(wd_files_test + "Extra%d.txt" % i_extra, 'w'))
# close the files again and delete
for fhh in fh:
fhh.close()
DeleteDirectoryTree(wd_files_test)
except IOError as e:
if str(e).startswith("[Errno 24] Too many open files"):
util.number_open_files_exception_advice(len(speciesToUse), False)
for fhh in fh:
fhh.close()
DeleteDirectoryTree(wd_files_test)
util.Fail()
else:
for fhh in fh:
fhh.close()
DeleteDirectoryTree(wd_files_test)
print("ERROR: Attempted to open required files for OrthoFinder run but an unexpected error occurred. \n\nStacktrace:")
raise
return options

def main(args=None):
Expand All @@ -1646,7 +1692,7 @@ def main(args=None):
# 3.
speciesInfoObj = ProcessesNewFasta(fastaDir, speciesInfoObj, speciesToUse_names)
files.FileHandler.LogSpecies()
options = CheckOptions(options)
options = CheckOptions(options, speciesInfoObj.speciesToUse)
# 4.
seqsInfo = util.GetSeqsInfo(files.FileHandler.GetWorkingDirectory1_Read(), speciesInfoObj.speciesToUse, speciesInfoObj.nSpAll)
# 5.
Expand All @@ -1667,7 +1713,7 @@ def main(args=None):
speciesInfoObj = None
speciesInfoObj = ProcessesNewFasta(fastaDir)
files.FileHandler.LogSpecies()
options = CheckOptions(options)
options = CheckOptions(options, speciesInfoObj.speciesToUse)
# 4
seqsInfo = util.GetSeqsInfo(files.FileHandler.GetWorkingDirectory1_Read(), speciesInfoObj.speciesToUse, speciesInfoObj.nSpAll)
# 5.
Expand All @@ -1688,7 +1734,7 @@ def main(args=None):
speciesInfoObj, _ = ProcessPreviousFiles(files.FileHandler.GetWorkingDirectory1_Read(), options.qDoubleBlast)
files.FileHandler.LogSpecies()
print("Using previously calculated BLAST results in %s" % (files.FileHandler.GetWorkingDirectory1_Read()[0]))
options = CheckOptions(options)
options = CheckOptions(options, speciesInfoObj.speciesToUse)
# 4.
seqsInfo = util.GetSeqsInfo(files.FileHandler.GetWorkingDirectory1_Read(), speciesInfoObj.speciesToUse, speciesInfoObj.nSpAll)
# 5.
Expand All @@ -1703,13 +1749,13 @@ def main(args=None):
# 0.
speciesInfoObj, _ = ProcessPreviousFiles(continuationDir, options.qDoubleBlast)
files.FileHandler.LogSpecies()
options = CheckOptions(options)
options = CheckOptions(options, speciesInfoObj.speciesToUse)
# 9
GetOrthologues(speciesInfoObj, options, prog_caller)
elif options.qStartFromTrees:
speciesInfoObj, _ = ProcessPreviousFiles(files.FileHandler.GetWorkingDirectory1_Read(), options.qDoubleBlast)
files.FileHandler.LogSpecies()
options = CheckOptions(options)
options = CheckOptions(options, speciesInfoObj.speciesToUse)
GetOrthologues_FromTrees(options)
else:
raise NotImplementedError
Expand Down
2 changes: 1 addition & 1 deletion scripts_of/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -795,7 +795,7 @@ def InitialiseFileHandler(options, fastaDir=None, continuationDir=None, resultsD
Implementation
1. Working out if an old directory structure is being used
2. Construct and apporpriate PreviousFilesLocator if necessary - this locates all required files
2. Construct and appropriate PreviousFilesLocator if necessary - this locates all required files
3. Pass this to FileHandler - this creates the directory structure required for this run
4. if error: print and exit
5. Return FileHandler
Expand Down
2 changes: 1 addition & 1 deletion scripts_of/parallel_task_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ def Fail():
sys.stderr.flush()
ptm = ParallelTaskManager_singleton()
ptm.Stop()
print("ERROR: An error occurred, please review error messages for more information.")
print("ERROR: An error occurred, ***please review the error messages*** they may contain useful information about the problem.")
sys.exit(1)


119 changes: 63 additions & 56 deletions scripts_of/trees2ologs_of.py
Original file line number Diff line number Diff line change
Expand Up @@ -784,62 +784,69 @@ def __exit__(self, type, value, traceback):
def DoOrthologuesForOrthoFinder(ogSet, species_tree_rooted_labelled, GeneToSpecies, all_stride_dup_genes, qNoRecon, hog_writer):
"""
"""
# Create directory structure
speciesDict = ogSet.SpeciesDict()
SequenceDict = ogSet.SequenceDict()
# Write directory and file structure
qInitialisedSuspectGenesDirs = False
speciesIDs = ogSet.speciesToUse
nspecies = len(speciesIDs)
dResultsOrthologues = files.FileHandler.GetOrthologuesDirectory()
for index1 in xrange(nspecies):
d = dResultsOrthologues + "Orthologues_" + speciesDict[str(speciesIDs[index1])] + "/"
if not os.path.exists(d): os.mkdir(d)
for index2 in xrange(nspecies):
if index2 == index1: continue
with open(d + '%s__v__%s.tsv' % (speciesDict[str(speciesIDs[index1])], speciesDict[str(speciesIDs[index2])]), csv_write_mode) as outfile:
writer1 = csv.writer(outfile, delimiter="\t")
writer1.writerow(("Orthogroup", speciesDict[str(speciesIDs[index1])], speciesDict[str(speciesIDs[index2])]))
neighbours = GetSpeciesNeighbours(species_tree_rooted_labelled)
# Infer orthologues and write them to file
nOgs = len(ogSet.OGs())
nOrthologues_SpPair = util.nOrtho_sp(nspecies)
species = list(speciesDict.keys())
reconTreesRenamedDir = files.FileHandler.GetOGsReconTreeDir(True)
spec_seq_dict = ogSet.Spec_SeqDict()
sp_to_index = {str(sp):i for i, sp in enumerate(ogSet.speciesToUse)}
with open(files.FileHandler.GetDuplicationsFN(), csv_write_mode) as outfile, OrthologsFiles(dResultsOrthologues, speciesDict, ogSet.speciesToUse, nspecies, sp_to_index) as (ortholog_file_writers, suspect_genes_file_writers):
dupWriter = csv.writer(outfile, delimiter="\t")
dupWriter.writerow(["Orthogroup", "Species Tree Node", "Gene Tree Node", "Support", "Type", "Genes 1", "Genes 2"])
for iog in range(nOgs):
rooted_tree_ids, qHaveSupport = CheckAndRootTree(files.FileHandler.GetOGsTreeFN(iog), species_tree_rooted_labelled, GeneToSpecies) # this can be parallelised easily
if rooted_tree_ids is None: continue
# Write rooted tree with accessions
util.RenameTreeTaxa(rooted_tree_ids, files.FileHandler.GetOGsTreeFN(iog, True), spec_seq_dict, qSupport=qHaveSupport, qFixNegatives=True, qViaCopy=True)
orthologues, recon_tree, suspect_genes = GetOrthologues_from_tree(iog, rooted_tree_ids, species_tree_rooted_labelled, GeneToSpecies, neighbours, dupsWriter=dupWriter, seqIDs=spec_seq_dict, spIDs=ogSet.SpeciesDict(), all_stride_dup_genes=all_stride_dup_genes, qNoRecon=qNoRecon)
GetHOGs_from_tree(iog, recon_tree, hog_writer)
qContainsSuspectGenes = len(suspect_genes) > 0
if (not qInitialisedSuspectGenesDirs) and qContainsSuspectGenes:
qInitialisedSuspectGenesDirs = True
dSuspectGenes = files.FileHandler.GetSuspectGenesDir()
dSuspectOrthologues = files.FileHandler.GetPutativeXenelogsDir()
for index1 in xrange(nspecies):
with open(dSuspectOrthologues + '%s.tsv' % speciesDict[str(speciesIDs[index1])], csv_write_mode) as outfile:
writer1 = csv.writer(outfile, delimiter="\t")
writer1.writerow(("Orthogroup", speciesDict[str(speciesIDs[index1])], "Other"))
for index0 in xrange(nspecies):
strsp0 = species[index0]
strsp0_ = strsp0+"_"
these_genes = [g for g in suspect_genes if g.startswith(strsp0_)]
if len(these_genes) > 0:
with open(dSuspectGenes + speciesDict[strsp0] + ".txt", 'a') as outfile:
outfile.write("\n".join([SequenceDict[g] for g in these_genes]) + "\n")
allOrthologues = [(iog, orthologues)]
# don't relabel nodes, they've already been done
util.RenameTreeTaxa(recon_tree, reconTreesRenamedDir + "OG%07d_tree.txt" % iog, spec_seq_dict, qSupport=False, qFixNegatives=True)
if iog >= 0 and divmod(iog, 10 if nOgs <= 200 else 100 if nOgs <= 2000 else 1000)[1] == 0:
util.PrintTime("Done %d of %d" % (iog, nOgs))
nOrthologues_SpPair += AppendOrthologuesToFiles(allOrthologues, speciesDict, ogSet.speciesToUse, SequenceDict, dResultsOrthologues, ortholog_file_writers, suspect_genes_file_writers, qContainsSuspectGenes)
try:
# Create directory structure
speciesDict = ogSet.SpeciesDict()
SequenceDict = ogSet.SequenceDict()
# Write directory and file structure
qInitialisedSuspectGenesDirs = False
speciesIDs = ogSet.speciesToUse
nspecies = len(speciesIDs)
dResultsOrthologues = files.FileHandler.GetOrthologuesDirectory()
for index1 in xrange(nspecies):
d = dResultsOrthologues + "Orthologues_" + speciesDict[str(speciesIDs[index1])] + "/"
if not os.path.exists(d): os.mkdir(d)
for index2 in xrange(nspecies):
if index2 == index1: continue
with open(d + '%s__v__%s.tsv' % (speciesDict[str(speciesIDs[index1])], speciesDict[str(speciesIDs[index2])]), csv_write_mode) as outfile:
writer1 = csv.writer(outfile, delimiter="\t")
writer1.writerow(("Orthogroup", speciesDict[str(speciesIDs[index1])], speciesDict[str(speciesIDs[index2])]))
neighbours = GetSpeciesNeighbours(species_tree_rooted_labelled)
# Infer orthologues and write them to file
nOgs = len(ogSet.OGs())
nOrthologues_SpPair = util.nOrtho_sp(nspecies)
species = list(speciesDict.keys())
reconTreesRenamedDir = files.FileHandler.GetOGsReconTreeDir(True)
spec_seq_dict = ogSet.Spec_SeqDict()
sp_to_index = {str(sp):i for i, sp in enumerate(ogSet.speciesToUse)}
with open(files.FileHandler.GetDuplicationsFN(), csv_write_mode) as outfile, OrthologsFiles(dResultsOrthologues, speciesDict, ogSet.speciesToUse, nspecies, sp_to_index) as (ortholog_file_writers, suspect_genes_file_writers):
dupWriter = csv.writer(outfile, delimiter="\t")
dupWriter.writerow(["Orthogroup", "Species Tree Node", "Gene Tree Node", "Support", "Type", "Genes 1", "Genes 2"])
for iog in range(nOgs):
rooted_tree_ids, qHaveSupport = CheckAndRootTree(files.FileHandler.GetOGsTreeFN(iog), species_tree_rooted_labelled, GeneToSpecies) # this can be parallelised easily
if rooted_tree_ids is None: continue
# Write rooted tree with accessions
util.RenameTreeTaxa(rooted_tree_ids, files.FileHandler.GetOGsTreeFN(iog, True), spec_seq_dict, qSupport=qHaveSupport, qFixNegatives=True, qViaCopy=True)
orthologues, recon_tree, suspect_genes = GetOrthologues_from_tree(iog, rooted_tree_ids, species_tree_rooted_labelled, GeneToSpecies, neighbours, dupsWriter=dupWriter, seqIDs=spec_seq_dict, spIDs=ogSet.SpeciesDict(), all_stride_dup_genes=all_stride_dup_genes, qNoRecon=qNoRecon)
GetHOGs_from_tree(iog, recon_tree, hog_writer)
qContainsSuspectGenes = len(suspect_genes) > 0
if (not qInitialisedSuspectGenesDirs) and qContainsSuspectGenes:
qInitialisedSuspectGenesDirs = True
dSuspectGenes = files.FileHandler.GetSuspectGenesDir()
dSuspectOrthologues = files.FileHandler.GetPutativeXenelogsDir()
for index1 in xrange(nspecies):
with open(dSuspectOrthologues + '%s.tsv' % speciesDict[str(speciesIDs[index1])], csv_write_mode) as outfile:
writer1 = csv.writer(outfile, delimiter="\t")
writer1.writerow(("Orthogroup", speciesDict[str(speciesIDs[index1])], "Other"))
for index0 in xrange(nspecies):
strsp0 = species[index0]
strsp0_ = strsp0+"_"
these_genes = [g for g in suspect_genes if g.startswith(strsp0_)]
if len(these_genes) > 0:
with open(dSuspectGenes + speciesDict[strsp0] + ".txt", 'a') as outfile:
outfile.write("\n".join([SequenceDict[g] for g in these_genes]) + "\n")
allOrthologues = [(iog, orthologues)]
# don't relabel nodes, they've already been done
util.RenameTreeTaxa(recon_tree, reconTreesRenamedDir + "OG%07d_tree.txt" % iog, spec_seq_dict, qSupport=False, qFixNegatives=True)
if iog >= 0 and divmod(iog, 10 if nOgs <= 200 else 100 if nOgs <= 2000 else 1000)[1] == 0:
util.PrintTime("Done %d of %d" % (iog, nOgs))
nOrthologues_SpPair += AppendOrthologuesToFiles(allOrthologues, speciesDict, ogSet.speciesToUse, SequenceDict, dResultsOrthologues, ortholog_file_writers, suspect_genes_file_writers, qContainsSuspectGenes)
except IOError as e:
if str(e).startswith("[Errno 24] Too many open files"):
util.number_open_files_exception_advice(len(ogSet.speciesToUse), True)
util.Fail()
else:
raise
return nOrthologues_SpPair


Expand Down
32 changes: 31 additions & 1 deletion scripts_of/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,37 @@ def FlowText(text, n=60):
lines += text
text = ""
return lines


def number_open_files_exception_advice(n_species, q_at_trees):
"""
Prints advice for user on "IOError: [Errno 24] Too many open files" exception
Args:
n_species - the number of species in the analysis
q_at_trees - has this error occurred at the orthologs from trees stage
"""
# parallel_task_manager.RunCommand("ulimit -Hn")
n_req = n_species*n_species + 100
msg="\nERROR: The system limits on the number of files a process can open is too low. For %d species \
OrthoFinder needs to be able to open at least r=%d files. Please increase the limit and restart OrthoFinder\n\
1. Check the hard and soft limits on the number of open files for your system:\n\
$ ulimit -Hn\n\
$ ulimit -Sn\n\
2. If hard limit, h > r already, then you just need to increase the soft limit:\n\
$ ulimit -n %d\n\
3. Alternatively, if h < r then you need to edit the file '/etc/security/limits.conf', this requires root privileges. \
To increase the limit to %d for user called 'emms' add the lines:\n\
emms hard nofile %d\n\
emms soft nofile %d\n" % (n_species, n_req, n_req, n_req, n_req, n_req)
msg +=" (edit these lines to match your username)\n\
4. Check the limit has now been updated (if you changed the hard limit you'll need to open a new session and confirm it's updated):\n\
$ ulimit -Sn"

if q_at_trees:
msg_part_2 = "5. Once the limit is updated restart OrthoFinder 'from trees' using the '-ft' command"
else:
msg_part_2 = "5. Once the limit is updated restart OrthoFinder with the original command"
msg_part_3 = "\nFor full details see: https://github.com/davidemms/OrthoFinder/issues/384"
print(msg + "\n" + msg_part_2 + "\n" + msg_part_3 + "\n")
"""
-------------------------------------------------------------------------------
"""
Expand Down

0 comments on commit 94c1148

Please sign in to comment.