diff --git a/.editorconfig b/.editorconfig
index 63eff3a4..8719a7f9 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -30,3 +30,7 @@ indent_style = unset
 # ignore python
 [*.{py}]
 indent_style = unset
+
+# ignore perl
+[*.{pl,pm}]
+indent_size = unset
diff --git a/bin/FAlite_a93cba2.pm b/bin/FAlite_a93cba2.pm
new file mode 100644
index 00000000..bebd8ed5
--- /dev/null
+++ b/bin/FAlite_a93cba2.pm
@@ -0,0 +1,128 @@
+package FAlite_a93cba2;
+use strict;
+sub new {
+	my ($class, $fh) = @_;
+	if (ref $fh !~ /GLOB/)
+		{die ref $fh, "\n", "FAlite_a93cba2 ERROR: expect a GLOB reference\n"}
+	my $this = bless {};
+	$this->{FH} = $fh;
+	while(<$fh>) {last if $_ =~ /\S/} # not supposed to have blanks, but...
+	my $firstline = $_;
+	if (not defined $firstline) {warn "FAlite_a93cba2: Empty\n"; return $this}
+	if ($firstline !~ /^>/) {warn "FAlite_a93cba2: Not FASTA formatted\n"; return $this}
+	$this->{LASTLINE} = $firstline;
+	chomp $this->{LASTLINE};
+	return $this;
+}
+sub nextEntry {
+	my ($this) = @_;
+	return 0 if not defined $this->{LASTLINE};
+	my $fh = $this->{FH};
+	my $def = $this->{LASTLINE};
+	my @seq;
+	my $lines_read = 0;
+	while(<$fh>) {
+		$lines_read++;
+		if ($_ =~ /^>/) {
+			$this->{LASTLINE} = $_;
+			chomp $this->{LASTLINE};
+			last;
+		}
+		push @seq, $_;
+	}
+	return 0 if $lines_read == 0;
+	chomp @seq;
+	my $entry = FAlite_a93cba2::Entry::new($def, \@seq);
+	return $entry;
+}
+
+package FAlite_a93cba2::Entry;
+use overload '""' => 'all';
+sub new {
+	my ($def, $seqarry) = @_;
+	my $this = bless {};
+	$this->{DEF} = $def;
+	$this->{SEQ} = join("", @$seqarry);
+	$this->{SEQ} =~ s/\s//g; # just in case more spaces
+	return $this;
+}
+sub def {shift->{DEF}}
+sub seq {shift->{SEQ}}
+sub all {my $e = shift; return $e->{DEF}."\n".$e->{SEQ}."\n"}
+
+1;
+
+__END__
+
+=head1 NAME
+
+FAlite_a93cba2;
+
+=head1 SYNOPSIS
+
+ use FAlite_a93cba2;
+ my $fasta = new FAlite_a93cba2(\*STDIN);
+ while(my $entry = $fasta->nextEntry) {
+     $entry->def;
+     $entry->seq;
+ }
+
+=head1 DESCRIPTION
+
+FAlite_a93cba2 is a package for parsing FASTA files and databases. The FASTA format is
+widely used in bioinformatics. It consists of a definition line followed by
+sequence with an arbitrary number of lines and line lengths.
+
+A FASTA file looks like this:
+
+ >identifier descriptive text
+ GAATTC
+
+A FASTA database looks like this:
+
+ >identifier1 some text describing this entry
+ GAATTC
+ ACTAGT
+ >identifier2 some text describing this entry
+ AAACCT
+ GCTAAT
+
+=head2 Object
+
+FAlite_a93cba2 has two kinds of objects, the file and the entry.
+
+ my $fasta_file = new FAlite_a93cba2(\*STDIN); # or any other filehandle
+ $entry = $fasta_file->nextEntry; # single fasta fle
+ while(my $entry = $fasta_file->nextEntry) {
+     # canonical form of use for fasta database
+ }
+
+The entry has two attributes (def and seq).
+
+ $entry->def; # access the def line
+ $entry->seq; # access the sequence
+ "$entry";    # overload to fasta file ($entry->def . "\n" . $entry->seq)
+
+=head1 AUTHOR
+
+Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf)
+
+=head1 ACKNOWLEDGEMENTS
+
+This software was developed at the Genome Sequencing Center at Washington
+Univeristy, St. Louis, MO.
+
+=head1 COPYRIGHT
+
+Copyright (C) 1999 Ian Korf. All Rights Reserved.
+
+=head1 DISCLAIMER
+
+This software is provided "as is" without warranty of any kind.
+
+=cut
+
+
+
+
+
diff --git a/bin/assemblathon_stats_a93cba2.pl b/bin/assemblathon_stats_a93cba2.pl
new file mode 100755
index 00000000..d10ba565
--- /dev/null
+++ b/bin/assemblathon_stats_a93cba2.pl
@@ -0,0 +1,528 @@
+#!/usr/bin/perl
+#
+# assemblathon_stats.pl
+#
+# A script to calculate a basic set of metrics from a genome assembly
+#
+# Author: Keith Bradnam, Genome Center, UC Davis
+# This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License.
+# This software is provided AS IS, without warranty of any kind.
+
+use strict;
+use warnings;
+use FAlite_a93cba2;
+use Getopt::Long;
+use List::Util qw(sum max min);
+
+###############################################
+#
+#  C o m m a n d   l i n e   o p t i o n s
+#
+###############################################
+
+my $limit;       # limit processing of data to first $limit sequences (for quick testing)
+my $graph;       # produce some output ready for Excel or R
+my $csv;         # produce CSV output file of results
+my $n_limit;     # how many N characters should be used to split scaffolds into contigs
+my $genome_size; # estimated or known genome size (will be used for some stats)
+
+GetOptions ("limit=i"       => \$limit,
+			"csv"           => \$csv,
+			"graph"         => \$graph,
+			"n=i"           => \$n_limit,
+			"genome_size=i" => \$genome_size);
+
+# set defaults
+$limit = 1000000000 if (!$limit);
+$n_limit = 25       if (!$n_limit);
+
+
+# check we have a suitable input file
+my $usage = "Usage: assemblathon_stats.pl <assembly_scaffolds_file>
+options:
+	-limit <int> limit analysis to first <int> sequences (useful for testing)
+	-csv         produce a CSV output file of all results
+	-graph       produce a CSV output file of NG(X) values (NG1 through to NG99), suitable for graphing
+	-n <int>     specify how many consecutive N characters should be used to split scaffolds into contigs
+	-genome_size <int> estimated or known genome size
+";
+
+die "$usage" unless (@ARGV == 1);
+my ($file) = @ARGV;
+
+###############################################
+#
+#  S o m e   G l o b a l   v a r i a b l e s
+#
+###############################################
+
+my $scaffolded_contigs = 0;			# how many contigs that are part of scaffolds (sequences must have $n_limit consecutive Ns)
+my $scaffolded_contig_length = 0;		# total length of all scaffolded contigs
+my $unscaffolded_contigs = 0;			# how many 'orphan' contigs, not part of a scaffold
+my $unscaffolded_contig_length = 0;		# total length of all contigs not part of scaffold
+my $w = 60;					# formatting width for output
+my %data;					# data structure to hold all sequence info key is either 'scaffold', 'contig' or intermediate', values are seqs & length arrays
+my (@results, @headers);			# arrays to store results (for use with -csv option)
+
+
+
+# make first loop through file, capture some basic info and add sequences to arrays
+process_FASTA($file);
+
+print "\n---------------- Information for assembly \'$file\' ----------------\n\n";
+
+if(defined($genome_size)){
+	my $mbp_size = sprintf("%.2f", $genome_size / 1000000);
+	printf "%${w}s %10s\n", "Assumed genome size (Mbp)", $mbp_size;
+}
+
+# produce scaffold statistics
+sequence_statistics('scaffold');
+
+# produce a couple of intermediate statistics based on scaffolded contigs vs unscaffolded contigs
+sequence_statistics('intermediate');
+
+# finish with contig stats
+sequence_statistics('contig');
+
+# produce CSV output if required
+write_csv($file) if ($csv);
+
+exit(0);
+
+
+
+##########################################
+#
+#
+#    S  U  B  R  O  U  T  I  N  E  S
+#
+#
+##########################################
+
+
+##########################################
+#    M A I N  loop through FASTA file
+##########################################
+
+sub process_FASTA{
+
+	my ($seqs) = @_;
+
+	my $input;
+
+	# if dealing with gzip file, treat differently
+	if($seqs =~ m/\.gz$/){
+		open($input, "gunzip -c $seqs |") or die "Can't open a pipe to $seqs\n";
+	} else{
+		open($input, "<", "$seqs") or die "Can't open $seqs\n";
+	}
+
+	my $fasta = new FAlite_a93cba2(\*$input);
+
+	# want to keep track of various contig + scaffold counts
+	my $seq_count = 0;
+
+	while(my $entry = $fasta->nextEntry){
+	    my $seq = uc($entry->seq);
+		my $length = length($seq);
+		$seq_count++;
+
+		# everything gets pushed to scaffolds array
+		push(@{$data{scaffold}{seqs}},$seq);
+		push(@{$data{scaffold}{lengths}},$length);
+
+		# if there are not at least 25 consecutive Ns in the sequence we need to split it into contigs
+		# otherwise the sequence must be a contig itself and it still needs to be put in @contigs array
+		if ($seq =~ m/N{$n_limit}/){
+
+			# add length to $scaffolded_contig_length
+			$scaffolded_contig_length += $length;
+
+			# loop through all contigs that comprise the scaffold
+			foreach my $contig (split(/N{$n_limit,}/, $seq)){
+			    next unless my $length = length($contig);
+			        $scaffolded_contigs++;
+				push(@{$data{contig}{seqs}},$contig);
+				push(@{$data{contig}{lengths}},$length);
+			}
+		} else {
+			# must be here if the scaffold is actually just a contig (or is a scaffold with < $n_limit Ns)
+			$unscaffolded_contigs++;
+			$unscaffolded_contig_length += $length;
+			push(@{$data{contig}{seqs}},$seq);
+			push(@{$data{contig}{lengths}},$length);
+		}
+		# for testing, just use a few sequences
+		last if ($seq_count >= $limit);
+
+	}
+	close($input);
+}
+
+
+##########################################
+#    Calculate basic assembly metrics
+##########################################
+
+sub sequence_statistics{
+	my ($type) = @_;
+
+	print "\n";
+
+	# need descriptions of each result
+	my $desc;
+
+	# there are just a couple of intermediate level statistics to print
+	if($type eq 'intermediate'){
+		my $total_size = sum(@{$data{scaffold}{lengths}});
+
+		# now calculate percentage of assembly that is accounted for by scaffolded contigs
+		my $percent = sprintf("%.1f",($scaffolded_contig_length / $total_size) * 100);
+		$desc = "Percentage of assembly in scaffolded contigs";
+		printf "%${w}s %10s\n", $desc, "$percent%";
+		store_results($desc, $percent) if ($csv);
+
+		# now calculate percentage of assembly that is accounted for by unscaffolded contigs
+		$percent = sprintf("%.1f",($unscaffolded_contig_length / $total_size) * 100);
+		$desc = "Percentage of assembly in unscaffolded contigs";
+		printf "%${w}s %10s\n", $desc, "$percent%";
+		store_results($desc, $percent) if ($csv);
+
+
+		# statistics that describe N regions that join contigs in scaffolds
+
+		# get number of breaks
+		my $contig_count = scalar(@{$data{contig}{lengths}});
+		my $scaffold_count = scalar(@{$data{scaffold}{lengths}});
+		my $average_contigs_per_scaffold = sprintf("%.1f",$contig_count / $scaffold_count);
+		$desc = "Average number of contigs per scaffold";
+		printf "%${w}s %10s\n", $desc, $average_contigs_per_scaffold;
+		store_results($desc, $average_contigs_per_scaffold) if ($csv);
+
+		# now calculate average length of break between contigs
+		# just find all runs of Ns in scaffolds (>= $n_limit) and calculate average length
+		my @contig_breaks;
+		foreach my $scaffold (@{$data{scaffold}{seqs}}){
+			while($scaffold =~ m/(N{$n_limit,})/g){
+				push(@contig_breaks, length($1));
+			}
+		}
+		# set break size to zero if there are no Ns in scaffolds
+		my $average_break_length;
+
+		if(@contig_breaks == 0){
+			$average_break_length = 0;
+		} else{
+		    $average_break_length = sum(@contig_breaks) / scalar(@contig_breaks);
+		}
+		if($n_limit == 1) {
+                  $desc = "Mean length of breaks (>=${n_limit}N) between contigs in scaffold";
+		} else {
+                  $desc = "Mean length of breaks (>=${n_limit}Ns) between contigs in scaffold";
+		}
+		if(length($n_limit)>=5) {
+		    printf "%${w}s %9d\n", $desc, $average_break_length;
+		} else {
+		    printf "%${w}s %10d\n", $desc, $average_break_length;
+		}
+		store_results($desc, $average_break_length) if ($csv);
+		return();
+	}
+
+
+	# n
+	my $count = scalar(@{$data{$type}{lengths}});
+	$desc = "Number of ${type}s";
+	printf "%${w}s %10d\n", $desc, $count;
+	store_results($desc, $count) if ($csv);
+
+
+
+	# more contig details (only for contigs)
+	if ($type eq 'contig'){
+		$desc = "Number of contigs in scaffolds";
+		printf "%${w}s %10d\n",$desc, $scaffolded_contigs;
+		store_results($desc, $scaffolded_contigs) if ($csv);
+
+		$desc = "Number of contigs not in scaffolds";
+		printf "%${w}s %10d\n", $desc,$unscaffolded_contigs;
+		store_results($desc, $unscaffolded_contigs) if ($csv);
+	}
+
+
+	# total size of sequences
+	my $total_size = sum(@{$data{$type}{lengths}});
+	$desc = "Total size of ${type}s";
+	printf "%${w}s %10d\n", $desc, $total_size;
+	store_results($desc, $total_size) if ($csv);
+
+
+	# For scaffold data only, can caluclate the percentage of known genome size
+	# and also the amount of useful sequence
+	if ($type eq 'scaffold' && defined($genome_size)){
+		my $percent = sprintf("%.1f",($total_size / $genome_size) * 100);
+		$desc = "Total scaffold length as percentage of assumed genome size";
+		printf "%${w}s %10s\n", $desc, "$percent%";
+		store_results($desc, $percent) if ($csv);
+
+		# Also want to find total fraction of genome (based on estimated size) that is
+		# in 'non-useful scaffolds', those below average size of vertebrate gene
+		# (taken to be 25 kbp)
+		my $useful_length = 25000;
+		my $sum_useful = 0;
+		foreach my $length (@{$data{$type}{lengths}}){
+			($sum_useful += $length) if ($length >= $useful_length);
+		}
+		# calculate how much non-useful sequence there was
+		$desc = "Useful amount of $type sequences (>= 25K nt)";
+		printf "%${w}s %10d\n", $desc, $sum_useful;
+		store_results($desc, $sum_useful) if ($csv);
+
+		my $percent_useful = sprintf("%.1f",($sum_useful / $genome_size) * 100);
+ 		$desc = "% of estimated genome that is useful";
+		printf "%${w}s %10s\n", $desc, "$percent_useful%";
+		store_results($desc, $percent_useful) if ($csv);
+
+	}
+
+
+	# longest and shortest sequences
+	my $max = max(@{$data{$type}{lengths}});
+	$desc = "Longest $type";
+	printf "%${w}s %10d\n", $desc, $max;
+	store_results($desc, $max) if ($csv);
+
+	my $min = min(@{$data{$type}{lengths}});
+	$desc = "Shortest $type";
+	printf "%${w}s %10d\n", $desc, $min;
+	store_results($desc, $min) if ($csv);
+
+
+	# find number of sequences above certain sizes
+	my %sizes_to_shorthand = (1000     => '1K',
+							  10000    => '10K',
+							  100000   => '100K',
+							  1000000  => '1M',
+							  10000000 => '10M');
+
+	foreach my $size (qw(1000 10000 100000 1000000 10000000)){
+		my $matches = grep { $_ > $size } @{$data{$type}{lengths}};
+		my $percent = sprintf("%.1f", ($matches / $count) * 100);
+
+		$desc = "Number of ${type}s > $sizes_to_shorthand{$size} nt";
+		printf "%${w}s %10d %5s%%\n", $desc, $matches, $percent;
+		store_results($desc, $matches)  if ($csv);
+
+		$desc = "Percentage of ${type}s > $sizes_to_shorthand{$size} nt";
+		store_results($desc, $percent)  if ($csv);
+	}
+
+
+	# mean sequence size
+	my $mean = sprintf("%.0f",$total_size / $count);
+	$desc = "Mean $type size";
+	printf "%${w}s %10d\n", $desc, $mean;
+	store_results($desc, $mean) if ($csv);
+
+	# median sequence size
+    my $median = (sort{$a <=> $b} @{$data{$type}{lengths}})[$count/2];
+	$desc = "Median $type size";
+	printf "%${w}s %10d\n", $desc, $median;
+	store_results($desc, $median) if ($csv);
+
+
+
+	##################################################################################
+ 	#
+	# N50 values
+	#
+	# Includes N(x) values, NG(x) (using assumed genome size)
+	# and L(x) values (number of sequences larger than or equal to N50 sequence size)
+	##################################################################################
+
+	# keep track of cumulative assembly size (starting from smallest seq)
+	my $running_total = 0;
+
+	# want to store all N50-style values from N1..N100. First target size to pass is N1
+	my $n_index = 1;
+	my @n_values;
+	my $n50_length = 0;
+
+	my $i = 0;
+
+	my $x = $total_size * 0.5;
+	# start with longest lengths scaffold/contig
+	foreach my $length (reverse sort{$a <=> $b} @{$data{$type}{lengths}}){
+		$i++;
+		$running_total += $length;
+
+		# check the current sequence and all sequences shorter than current one
+		# to see if they exceed the current NX value
+		while($running_total > int (($n_index / 100) * $total_size)){
+			if ($n_index == 50){
+				$n50_length = $length;
+				$desc = "N50 $type length";
+				printf "%${w}s %10d\n", $desc, $length;
+				store_results($desc, $length) if ($csv);
+
+				# L50 = number of scaffolds/contigs that are longer than or equal to the N50 size
+				$desc = "L50 $type count";
+				printf "%${w}s %10d\n","L50 $type count", $i;
+				store_results($desc, $i) if ($csv);
+			}
+			$n_values[$n_index] = $length;
+			$n_index++;
+		}
+	}
+
+	my @ng_values;
+
+	# do we have an estimated/known genome size to work with?
+	if(defined($genome_size)){
+		my $ng_index = 1;
+		my $ng50_length = 0;
+
+		$running_total = 0;
+		$i = 0;
+
+		foreach my $length (reverse sort{$a <=> $b} @{$data{$type}{lengths}}){
+			$i++;
+			$running_total += $length;
+
+			# now do the same for NG values, using assumed genome size
+			while($running_total > int (($ng_index / 100) * $genome_size)){
+				if ($ng_index == 50){
+					$ng50_length = $length;
+					$desc = "NG50 $type length";
+					printf "%${w}s %10d\n", $desc, $length;
+					store_results($desc, $length) if ($csv);
+
+					$desc = "LG50 $type count";
+					printf "%${w}s %10d\n", $desc, $i;
+					store_results($desc, $i) if ($csv);
+				}
+				$ng_values[$ng_index] = $length;
+				$ng_index++;
+			}
+		}
+
+		# calculate N50/NG50 difference
+		my $n50_diff = abs($ng50_length - $n50_length);
+		$desc = "N50 $type - NG50 $type length difference";
+		printf "%${w}s %10d\n", $desc, $n50_diff;
+		store_results($desc, $n50_diff) if ($csv);
+
+	}
+	# add final value to @n_values and @ng_values which will just be the shortest sequence
+#	$n_values[100] = $min;
+#	$ng_values[100] = $min;
+
+
+	# base frequencies
+	my %bases;
+
+    my $seq = join('',@{$data{$type}{seqs}});
+	my $length = length($seq);
+
+    # count mononucleotide frequencies
+    $bases{A} = ($seq =~ tr/A/A/);
+    $bases{C} = ($seq =~ tr/C/C/);
+    $bases{G} = ($seq =~ tr/G/G/);
+    $bases{T} = ($seq =~ tr/T/T/);
+    $bases{N} = ($seq =~ tr/N/N/);
+
+	my $base_count = 0;
+	foreach my $base (qw(A C G T N)){
+		my $percent = sprintf("%.2f", ($bases{$base} / $length) * 100);
+		$desc = "$type %$base";
+		printf "%${w}s %10s\n", $desc, $percent;
+		store_results($desc, $percent) if ($csv);
+		$base_count += $bases{$base};
+	}
+
+    # calculate remainder ('other) in case there are other characters present
+	my $other = $length - $base_count;
+	my $percent = sprintf("%.2f", ($other / $length) * 100);
+	$desc = "$type %non-ACGTN";
+	printf "%${w}s %10s\n",$desc, $percent;
+	store_results($desc, $percent) if ($csv);
+
+	$desc = "Number of $type non-ACGTN nt";
+	printf "%${w}s %10d\n",$desc, $other;
+	store_results($desc, $other) if ($csv);
+
+
+	# anything to dump for graphing?
+	if($graph){
+
+		# create new output file name
+		my $file_name = $file;
+		$file_name =~ s/\.gz$//;
+		$file_name =~ s/\.(fa|fasta)$//;
+		$file_name .= ".${type}.NG50.csv";
+
+		open(my $out, ">", "$file_name") or die "Can't create $file_name\n";
+		print $out join (',',"Assembly",1..99), "\n";
+
+		# make some guesses of what might constitute the unique assembly ID
+		my $assembly_ID = $file;
+		($assembly_ID) = $file =~ m/^([A-Z]\d{1,2})_/ if ($file =~ m/^[A-Z]\d{1,2}_/);
+		($assembly_ID) = $file =~ m/^((bird|snake|fish)_\d+(C|E))_/ if ($file =~ m/^(bird|snake|fish)_\d+C|E_/);
+
+		# CSV file, with filename in first column
+		print $out "$assembly_ID";
+
+		for (my $i = 1; $i < 100; $i++){
+			# higher NG values might not be present if assembly is poor
+			if (defined $ng_values[$i]){
+				print $out ",$ng_values[$i]";
+			} else{
+				print $out ",0";
+			}
+		}
+		print $out "\n";
+		close($out);
+	}
+}
+
+# simple routine to add results to a pair of arrays that will be used for printing results later on
+# if -csv option is used
+sub store_results{
+	my ($desc, $result) = @_;
+
+	push(@headers,$desc);
+	push(@results,$result);
+}
+
+sub write_csv{
+	my ($file) = @_;
+
+	# create new output file name
+	my $output = $file;
+	$output =~ s/\.gz$//;
+	$output =~ s/\.(fa|fasta)$//;
+	$output .= ".csv";
+
+	# make some guesses of what might constitute the unique assembly ID
+	my $assembly_ID = $file;
+	($assembly_ID) = $file =~ m/^([A-Z]\d{1,2})_/ if ($file =~ m/^[A-Z]\d{1,2}_/);
+	($assembly_ID) = $file =~ m/^((bird|snake|fish)_\d+(C|E))_/ if ($file =~ m/^(bird|snake|fish)_\d+C|E_/);
+
+	open(my $out, ">", $output) or die "Can't create $output\n";
+
+	print $out "Assembly,";
+	foreach my $header (@headers){
+		print $out "$header,";
+	}
+	print $out "\n";
+
+	print $out "$assembly_ID,";
+	foreach my $result (@results){
+		print $out "$result,";
+	}
+	print $out "\n";
+
+
+	close($out);
+}
diff --git a/modules/local/assemblathon_stats.nf b/modules/local/assemblathon_stats.nf
new file mode 100644
index 00000000..3285e08c
--- /dev/null
+++ b/modules/local/assemblathon_stats.nf
@@ -0,0 +1,55 @@
+process ASSEMBLATHON_STATS {
+    tag "${asm_tag}"
+    label "process_single"
+
+    container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ?
+        'https://depot.galaxyproject.org/singularity/ubuntu:20.04':
+        'quay.io/nf-core/ubuntu:20.04' }"
+
+    input:
+    tuple val(asm_tag), path(fasta_file)
+    val n_limit
+
+    output:
+    path "${asm_tag}_stats.csv"     , emit: stats
+    path 'versions.yml'             , emit: versions
+
+    script:
+    def VERSION = "github/PlantandFoodResearch/assemblathon2-analysis/a93cba2"
+    """
+    paths_to_check=\$(printf "%s\\n" \$(echo \$PATH | tr ':' ' ') \\
+        | xargs -I {} find {} -maxdepth 0 -print 2>/dev/null \\
+        | grep -v '^\$' \\
+        | grep -v '/sbin' \\
+        | xargs
+    )
+
+    falite_path="\$(find \$paths_to_check -name FAlite_a93cba2.pm)"
+
+    ln -s "\$falite_path" FAlite_a93cba2.pm
+
+    PERL5LIB=./ assemblathon_stats_a93cba2.pl \\
+        -n $n_limit \\
+        -csv \\
+        "${fasta_file}"
+
+    csv_file_name=\$(ls | grep "csv")
+    mv \$csv_file_name "${asm_tag}_stats.csv"
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        assemblathon_stats: $VERSION
+    END_VERSIONS
+    """
+
+    stub:
+    def VERSION = "github/PlantandFoodResearch/assemblathon2-analysis/a93cba2"
+    """
+    touch "${asm_tag}_stats.csv"
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        assemblathon_stats: $VERSION
+    END_VERSIONS
+    """
+}
diff --git a/workflows/assemblyqc.nf b/workflows/assemblyqc.nf
index 0df1b2b0..34f71156 100644
--- a/workflows/assemblyqc.nf
+++ b/workflows/assemblyqc.nf
@@ -31,6 +31,7 @@ include { GT_STAT                           } from '../modules/pfr/gt/stat/main'
 include { GFF3_VALIDATE                     } from '../subworkflows/pfr/gff3_validate/main'
 include { NCBI_FCS_ADAPTOR                  } from '../modules/local/ncbi_fcs_adaptor'
 include { NCBI_FCS_GX                       } from '../subworkflows/local/ncbi_fcs_gx'
+include { ASSEMBLATHON_STATS                } from '../modules/local/assemblathon_stats'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -243,6 +244,15 @@ workflow ASSEMBLYQC {
                                                 [ tag, fa ]
                                             }
 
+    // MODULE: ASSEMBLATHON_STATS
+    ASSEMBLATHON_STATS(
+        ch_clean_assembly,
+        params.assemblathon_stats_n_limit
+    )
+
+    ch_assemblathon_stats                   = ASSEMBLATHON_STATS.out.stats
+    ch_versions                             = ch_versions.mix(ASSEMBLATHON_STATS.out.versions.first())
+
     // MODULE: CUSTOM_DUMPSOFTWAREVERSIONS
     CUSTOM_DUMPSOFTWAREVERSIONS (
         ch_versions.unique().collectFile(name: 'collated_versions.yml')