Skip to content

Commit

Permalink
Tidied the verbose log, added summaries for skipped search files to d…
Browse files Browse the repository at this point in the history
…ebug log
  • Loading branch information
ewels committed Apr 5, 2022
1 parent ca298b1 commit 52227bc
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 27 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
- Fixed bug raised when removing logging file handlers between calls that arose when configuring the root logger with dictConfig ([#1643](https://github.com/ewels/MultiQC/issues/1643))
- Added new config option `custom_table_header_config` to override any config for any table header
- Fixed edge-case bug in custom content where a `description` that doesn't terminate in `.` gave duplicate section descriptions.
- Tidied the verbose log to remove some very noisy statements and add summaries for skipped files in the search

### New Modules

Expand Down
5 changes: 2 additions & 3 deletions multiqc/modules/base_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,8 @@ def find_log_files(self, sp_key, filecontents=True, filehandles=False):
f["f"] = fh.read()
yield f
except (IOError, OSError, ValueError, UnicodeDecodeError) as e:
if config.report_readerrors:
logger.debug("Couldn't open filehandle when returning file: {}\n{}".format(f["fn"], e))
f["f"] = None
logger.debug("Couldn't open filehandle when returning file: {}\n{}".format(f["fn"], e))
f["f"] = None
else:
yield f

Expand Down
2 changes: 1 addition & 1 deletion multiqc/modules/fastqc/fastqc.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def __init__(self):
fqc_zip = zipfile.ZipFile(os.path.join(f["root"], f["fn"]))
except Exception as e:
log.warning("Couldn't read '{}' - Bad zip file".format(f["fn"]))
log.debug("Bad zip file error:\n{}".format(e))
log.debug("Bad zip file error: {}".format(e))
continue
# FastQC zip files should have just one directory inside, containing report
d_name = fqc_zip.namelist()[0]
Expand Down
1 change: 0 additions & 1 deletion multiqc/multiqc.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,6 @@ def run(

plugin_hooks.mqc_trigger("execution_start")

logger.debug("Command : {}".format(" ".join(sys.argv)))
logger.debug("Working dir : {}".format(os.getcwd()))
if make_pdf:
logger.info("--pdf specified. Using non-interactive HTML template.")
Expand Down
24 changes: 17 additions & 7 deletions multiqc/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,6 @@ def mqc_load_config(yaml_config):
except yaml.scanner.ScannerError as e:
logger.error("Error parsing config YAML: {}".format(e))
sys.exit(1)
else:
logger.debug("No MultiQC config found: {}".format(yaml_config))


def mqc_cl_config(cl_config):
Expand All @@ -157,19 +155,23 @@ def mqc_cl_config(cl_config):
def mqc_add_config(conf, conf_path=None):
"""Add to the global config with given MultiQC config dict"""
global custom_css_files, fn_clean_exts, fn_clean_trim
log_new_config = {}
log_filename_patterns = []
log_filename_clean_extensions = []
log_filename_clean_trimmings = []
for c, v in conf.items():
if c == "sp":
# Merge filename patterns instead of replacing
sp.update(v)
logger.debug("Added to filename patterns: {}".format(v))
log_filename_patterns.append(v)
elif c == "extra_fn_clean_exts":
# Prepend to filename cleaning patterns instead of replacing
fn_clean_exts[0:0] = v
logger.debug("Added to filename clean extensions: {}".format(v))
log_filename_clean_extensions.append(v)
elif c == "extra_fn_clean_trim":
# Prepend to filename cleaning patterns instead of replacing
fn_clean_trim[0:0] = v
logger.debug("Added to filename clean trimmings: {}".format(v))
log_filename_clean_trimmings.append(v)
elif c in ["custom_logo"] and v:
# Resolve file paths - absolute or cwd, or relative to config file
fpath = v
Expand All @@ -180,7 +182,7 @@ def mqc_add_config(conf, conf_path=None):
else:
logger.error("Config '{}' path not found, skipping ({})".format(c, fpath))
continue
logger.debug("New config '{}': {}".format(c, fpath))
log_new_config[c] = fpath
update({c: fpath})
elif c == "custom_css_files":
for fpath in v:
Expand All @@ -196,8 +198,16 @@ def mqc_add_config(conf, conf_path=None):
custom_css_files = []
custom_css_files.append(fpath)
else:
logger.debug("New config '{}': {}".format(c, v))
log_new_config[c] = v
update({c: v})
if len(log_new_config) > 0:
logger.debug(f"New config: {log_new_config}")
if len(log_filename_patterns) > 0:
logger.debug(f"Added to filename patterns: {log_filename_patterns}")
if len(log_filename_clean_extensions) > 0:
logger.debug(f"Added to filename clean extensions: {log_filename_clean_extensions}")
if len(log_filename_clean_trimmings) > 0:
logger.debug(f"Added to filename clean trimmings: {log_filename_clean_trimmings}")


#### Function to load file containing a list of alternative sample-name swaps
Expand Down
1 change: 1 addition & 0 deletions multiqc/utils/config_defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ ignore_symlinks: false
ignore_images: true
fn_ignore_dirs:
- "multiqc_data"
- ".git"
- "icarus_viewers" # quast
- "runs_per_reference" # quast
- "not_aligned" # quast
Expand Down
39 changes: 24 additions & 15 deletions multiqc/utils/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,10 @@ def init():
"skipped_not_a_file": 0,
"skipped_ignore_pattern": 0,
"skipped_filesize_limit": 0,
"skipped_module_specific_max_filesize": 0,
"skipped_no_match": 0,
"skipped_directory_fn_ignore_dirs": 0,
"skipped_file_contents_search_errors": 0,
}

global searchfiles
Expand Down Expand Up @@ -181,7 +184,6 @@ def add_file(fn, root):
# Check that we don't want to ignore this file
i_matches = [n for n in config.fn_ignore_files if fnmatch.fnmatch(fn, n)]
if len(i_matches) > 0:
logger.debug("Ignoring file as matched an ignore pattern: {}".format(fn))
file_search_stats["skipped_ignore_pattern"] += 1
return False

Expand Down Expand Up @@ -250,26 +252,24 @@ def add_file(fn, root):
removed_dirs = [
os.path.join(root, d) for d in set(orig_dirnames).symmetric_difference(set(dirnames))
]
logger.debug("Ignoring directory as matched fn_ignore_dirs: {}".format(", ".join(removed_dirs)))
file_search_stats["skipped_directory_fn_ignore_dirs"] += len(removed_dirs)
orig_dirnames = dirnames[:]
for n in config.fn_ignore_paths:
dirnames[:] = [d for d in dirnames if not fnmatch.fnmatch(os.path.join(root, d), n.rstrip(os.sep))]
if len(orig_dirnames) != len(dirnames):
removed_dirs = [
os.path.join(root, d) for d in set(orig_dirnames).symmetric_difference(set(dirnames))
]
logger.debug(
"Ignoring directory as matched fn_ignore_paths: {}".format(", ".join(removed_dirs))
)
file_search_stats["skipped_directory_fn_ignore_dirs"] += len(removed_dirs)

# Skip *this* directory if matches ignore params
d_matches = [n for n in config.fn_ignore_dirs if fnmatch.fnmatch(bname, n.rstrip(os.sep))]
if len(d_matches) > 0:
logger.debug("Ignoring directory as matched fn_ignore_dirs: {}".format(bname))
file_search_stats["skipped_directory_fn_ignore_dirs"] += 1
continue
p_matches = [n for n in config.fn_ignore_paths if fnmatch.fnmatch(root, n.rstrip(os.sep))]
if len(p_matches) > 0:
logger.debug("Ignoring directory as matched fn_ignore_paths: {}".format(root))
file_search_stats["skipped_directory_fn_ignore_dirs"] += 1
continue

# Sanity check - make sure that we're not just running in the installation directory
Expand Down Expand Up @@ -313,12 +313,20 @@ def add_file(fn, root):

runtimes["total_sp"] = time.time() - total_sp_starttime

# Debug log summary about what we skipped
summaries = []
for key in sorted(file_search_stats, key=file_search_stats.get, reverse=True):
if "skipped_" in key and file_search_stats[key] > 0:
summaries.append(f"{key}: {file_search_stats[key]}")
logger.debug(f"Summary of files that were skipped by the search: [{'] // ['.join(summaries)}]")


def search_file(pattern, f, module_key):
"""
Function to searach a single file for a single search pattern.
"""

global file_search_stats
fn_matched = False
contents_matched = False

Expand All @@ -333,9 +341,7 @@ def search_file(pattern, f, module_key):
# Search pattern specific filesize limit
if pattern.get("max_filesize") is not None and "filesize" in f:
if f["filesize"] > pattern.get("max_filesize"):
logger.debug(
"File ignored by {} because it exceeded search pattern filesize limit: {}".format(module_key, f["fn"])
)
file_search_stats["skipped_module_specific_max_filesize"] += 1
return False

# Search by file name (glob)
Expand All @@ -357,9 +363,10 @@ def search_file(pattern, f, module_key):
if pattern.get("contents_re") is not None:
repattern = re.compile(pattern["contents_re"])
try:
with io.open(os.path.join(f["root"], f["fn"]), "r", encoding="utf-8") as f:
file_path = os.path.join(f["root"], f["fn"])
with io.open(file_path, "r", encoding="utf-8") as fh:
l = 1
for line in f:
for line in fh:
# Search by file contents (string)
if pattern.get("contents") is not None:
if pattern["contents"] in line:
Expand All @@ -378,10 +385,12 @@ def search_file(pattern, f, module_key):
if pattern.get("num_lines") and l >= pattern.get("num_lines"):
break
l += 1
except (IOError, OSError, ValueError, UnicodeDecodeError):
# Can't open file - usually because it's a binary file and we're reading as utf-8
except (IOError, OSError, ValueError, UnicodeDecodeError) as e:
if config.report_readerrors:
logger.debug("Couldn't read file when looking for output: {}".format(f["fn"]))
return False
logger.debug(f"Couldn't read file when looking for output: {file_path}, {e}")
file_search_stats["skipped_file_contents_search_errors"] += 1
return False

return fn_matched and contents_matched

Expand Down

0 comments on commit 52227bc

Please sign in to comment.