diff --git a/CHANGELOG.md b/CHANGELOG.md index 06c3918158..300058d240 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ - Fixed bug raised when removing logging file handlers between calls that arose when configuring the root logger with dictConfig ([#1643](https://github.com/ewels/MultiQC/issues/1643)) - Added new config option `custom_table_header_config` to override any config for any table header - Fixed edge-case bug in custom content where a `description` that doesn't terminate in `.` gave duplicate section descriptions. +- Tidied the verbose log to remove some very noisy statements and add summaries for skipped files in the search ### New Modules diff --git a/multiqc/modules/base_module.py b/multiqc/modules/base_module.py index e0ea7b5fad..bf3dd8abb7 100755 --- a/multiqc/modules/base_module.py +++ b/multiqc/modules/base_module.py @@ -177,9 +177,8 @@ def find_log_files(self, sp_key, filecontents=True, filehandles=False): f["f"] = fh.read() yield f except (IOError, OSError, ValueError, UnicodeDecodeError) as e: - if config.report_readerrors: - logger.debug("Couldn't open filehandle when returning file: {}\n{}".format(f["fn"], e)) - f["f"] = None + logger.debug("Couldn't open filehandle when returning file: {}\n{}".format(f["fn"], e)) + f["f"] = None else: yield f diff --git a/multiqc/modules/fastqc/fastqc.py b/multiqc/modules/fastqc/fastqc.py index adf88f627f..09073ec938 100755 --- a/multiqc/modules/fastqc/fastqc.py +++ b/multiqc/modules/fastqc/fastqc.py @@ -62,7 +62,7 @@ def __init__(self): fqc_zip = zipfile.ZipFile(os.path.join(f["root"], f["fn"])) except Exception as e: log.warning("Couldn't read '{}' - Bad zip file".format(f["fn"])) - log.debug("Bad zip file error:\n{}".format(e)) + log.debug("Bad zip file error: {}".format(e)) continue # FastQC zip files should have just one directory inside, containing report d_name = fqc_zip.namelist()[0] diff --git a/multiqc/multiqc.py b/multiqc/multiqc.py index 2ae86df5ff..5f75b221da 100644 --- a/multiqc/multiqc.py +++ b/multiqc/multiqc.py @@ -467,7 +467,6 @@ def run( plugin_hooks.mqc_trigger("execution_start") - logger.debug("Command : {}".format(" ".join(sys.argv))) logger.debug("Working dir : {}".format(os.getcwd())) if make_pdf: logger.info("--pdf specified. Using non-interactive HTML template.") diff --git a/multiqc/utils/config.py b/multiqc/utils/config.py index 3d3e302826..cf2332056f 100755 --- a/multiqc/utils/config.py +++ b/multiqc/utils/config.py @@ -132,8 +132,6 @@ def mqc_load_config(yaml_config): except yaml.scanner.ScannerError as e: logger.error("Error parsing config YAML: {}".format(e)) sys.exit(1) - else: - logger.debug("No MultiQC config found: {}".format(yaml_config)) def mqc_cl_config(cl_config): @@ -157,19 +155,23 @@ def mqc_cl_config(cl_config): def mqc_add_config(conf, conf_path=None): """Add to the global config with given MultiQC config dict""" global custom_css_files, fn_clean_exts, fn_clean_trim + log_new_config = {} + log_filename_patterns = [] + log_filename_clean_extensions = [] + log_filename_clean_trimmings = [] for c, v in conf.items(): if c == "sp": # Merge filename patterns instead of replacing sp.update(v) - logger.debug("Added to filename patterns: {}".format(v)) + log_filename_patterns.append(v) elif c == "extra_fn_clean_exts": # Prepend to filename cleaning patterns instead of replacing fn_clean_exts[0:0] = v - logger.debug("Added to filename clean extensions: {}".format(v)) + log_filename_clean_extensions.append(v) elif c == "extra_fn_clean_trim": # Prepend to filename cleaning patterns instead of replacing fn_clean_trim[0:0] = v - logger.debug("Added to filename clean trimmings: {}".format(v)) + log_filename_clean_trimmings.append(v) elif c in ["custom_logo"] and v: # Resolve file paths - absolute or cwd, or relative to config file fpath = v @@ -180,7 +182,7 @@ def mqc_add_config(conf, conf_path=None): else: logger.error("Config '{}' path not found, skipping ({})".format(c, fpath)) continue - logger.debug("New config '{}': {}".format(c, fpath)) + log_new_config[c] = fpath update({c: fpath}) elif c == "custom_css_files": for fpath in v: @@ -196,8 +198,16 @@ def mqc_add_config(conf, conf_path=None): custom_css_files = [] custom_css_files.append(fpath) else: - logger.debug("New config '{}': {}".format(c, v)) + log_new_config[c] = v update({c: v}) + if len(log_new_config) > 0: + logger.debug(f"New config: {log_new_config}") + if len(log_filename_patterns) > 0: + logger.debug(f"Added to filename patterns: {log_filename_patterns}") + if len(log_filename_clean_extensions) > 0: + logger.debug(f"Added to filename clean extensions: {log_filename_clean_extensions}") + if len(log_filename_clean_trimmings) > 0: + logger.debug(f"Added to filename clean trimmings: {log_filename_clean_trimmings}") #### Function to load file containing a list of alternative sample-name swaps diff --git a/multiqc/utils/config_defaults.yaml b/multiqc/utils/config_defaults.yaml index 4a0fbff1d4..bf99e58951 100644 --- a/multiqc/utils/config_defaults.yaml +++ b/multiqc/utils/config_defaults.yaml @@ -98,6 +98,7 @@ ignore_symlinks: false ignore_images: true fn_ignore_dirs: - "multiqc_data" + - ".git" - "icarus_viewers" # quast - "runs_per_reference" # quast - "not_aligned" # quast diff --git a/multiqc/utils/report.py b/multiqc/utils/report.py index fc4bfa870e..59de52d56e 100644 --- a/multiqc/utils/report.py +++ b/multiqc/utils/report.py @@ -87,7 +87,10 @@ def init(): "skipped_not_a_file": 0, "skipped_ignore_pattern": 0, "skipped_filesize_limit": 0, + "skipped_module_specific_max_filesize": 0, "skipped_no_match": 0, + "skipped_directory_fn_ignore_dirs": 0, + "skipped_file_contents_search_errors": 0, } global searchfiles @@ -181,7 +184,6 @@ def add_file(fn, root): # Check that we don't want to ignore this file i_matches = [n for n in config.fn_ignore_files if fnmatch.fnmatch(fn, n)] if len(i_matches) > 0: - logger.debug("Ignoring file as matched an ignore pattern: {}".format(fn)) file_search_stats["skipped_ignore_pattern"] += 1 return False @@ -250,7 +252,7 @@ def add_file(fn, root): removed_dirs = [ os.path.join(root, d) for d in set(orig_dirnames).symmetric_difference(set(dirnames)) ] - logger.debug("Ignoring directory as matched fn_ignore_dirs: {}".format(", ".join(removed_dirs))) + file_search_stats["skipped_directory_fn_ignore_dirs"] += len(removed_dirs) orig_dirnames = dirnames[:] for n in config.fn_ignore_paths: dirnames[:] = [d for d in dirnames if not fnmatch.fnmatch(os.path.join(root, d), n.rstrip(os.sep))] @@ -258,18 +260,16 @@ def add_file(fn, root): removed_dirs = [ os.path.join(root, d) for d in set(orig_dirnames).symmetric_difference(set(dirnames)) ] - logger.debug( - "Ignoring directory as matched fn_ignore_paths: {}".format(", ".join(removed_dirs)) - ) + file_search_stats["skipped_directory_fn_ignore_dirs"] += len(removed_dirs) # Skip *this* directory if matches ignore params d_matches = [n for n in config.fn_ignore_dirs if fnmatch.fnmatch(bname, n.rstrip(os.sep))] if len(d_matches) > 0: - logger.debug("Ignoring directory as matched fn_ignore_dirs: {}".format(bname)) + file_search_stats["skipped_directory_fn_ignore_dirs"] += 1 continue p_matches = [n for n in config.fn_ignore_paths if fnmatch.fnmatch(root, n.rstrip(os.sep))] if len(p_matches) > 0: - logger.debug("Ignoring directory as matched fn_ignore_paths: {}".format(root)) + file_search_stats["skipped_directory_fn_ignore_dirs"] += 1 continue # Sanity check - make sure that we're not just running in the installation directory @@ -313,12 +313,20 @@ def add_file(fn, root): runtimes["total_sp"] = time.time() - total_sp_starttime + # Debug log summary about what we skipped + summaries = [] + for key in sorted(file_search_stats, key=file_search_stats.get, reverse=True): + if "skipped_" in key and file_search_stats[key] > 0: + summaries.append(f"{key}: {file_search_stats[key]}") + logger.debug(f"Summary of files that were skipped by the search: [{'] // ['.join(summaries)}]") + def search_file(pattern, f, module_key): """ Function to searach a single file for a single search pattern. """ + global file_search_stats fn_matched = False contents_matched = False @@ -333,9 +341,7 @@ def search_file(pattern, f, module_key): # Search pattern specific filesize limit if pattern.get("max_filesize") is not None and "filesize" in f: if f["filesize"] > pattern.get("max_filesize"): - logger.debug( - "File ignored by {} because it exceeded search pattern filesize limit: {}".format(module_key, f["fn"]) - ) + file_search_stats["skipped_module_specific_max_filesize"] += 1 return False # Search by file name (glob) @@ -357,9 +363,10 @@ def search_file(pattern, f, module_key): if pattern.get("contents_re") is not None: repattern = re.compile(pattern["contents_re"]) try: - with io.open(os.path.join(f["root"], f["fn"]), "r", encoding="utf-8") as f: + file_path = os.path.join(f["root"], f["fn"]) + with io.open(file_path, "r", encoding="utf-8") as fh: l = 1 - for line in f: + for line in fh: # Search by file contents (string) if pattern.get("contents") is not None: if pattern["contents"] in line: @@ -378,10 +385,12 @@ def search_file(pattern, f, module_key): if pattern.get("num_lines") and l >= pattern.get("num_lines"): break l += 1 - except (IOError, OSError, ValueError, UnicodeDecodeError): + # Can't open file - usually because it's a binary file and we're reading as utf-8 + except (IOError, OSError, ValueError, UnicodeDecodeError) as e: if config.report_readerrors: - logger.debug("Couldn't read file when looking for output: {}".format(f["fn"])) - return False + logger.debug(f"Couldn't read file when looking for output: {file_path}, {e}") + file_search_stats["skipped_file_contents_search_errors"] += 1 + return False return fn_matched and contents_matched