Skip to content

Commit

Permalink
Merge pull request #923 from shawnlaffan/calc_abc_any
Browse files Browse the repository at this point in the history
This PR includes several optimisations.  Chief among them are:

  * Implement _calc_abc_any for cases where a depending method only needs the label hash keys or element lists
  * calc_abc grabs and modifies results from calc_abc2 or calc_abc3 if already run, thus saving some processing
  * calc_abc2 or calc_abc3 are always run before calc_abc
  * add a hierarchical _calc_abc variant for cases such as cluster node calcs where the label hashes can be generated from the child node results instead of processing the full list of terminals
  * some other general index optimisations such as result sharing when nbr set 2 is empty
  • Loading branch information
shawnlaffan authored Feb 26, 2024
2 parents 3c590de + a046fe1 commit 8216111
Show file tree
Hide file tree
Showing 11 changed files with 315 additions and 93 deletions.
25 changes: 22 additions & 3 deletions lib/Biodiverse/Indices.pm
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use warnings;
#use Devel::Symdump;
#use Data::Dumper;
use Scalar::Util qw /blessed weaken/;
use List::MoreUtils qw /uniq/;
use List::MoreUtils qw /uniq first_index/;
use List::Util qw /sum any/;
use English ( -no_match_vars );
use Ref::Util qw { :all };
Expand Down Expand Up @@ -1021,6 +1021,25 @@ sub aggregate_calc_lists_by_type {
foreach my $type (@types) {
my $array = $aggregated{$type};
my @u_array = uniq @$array;
if ($type eq 'pre_calc'
and scalar @u_array
) {
# move first /calc_abc[23]/ to front so
# calc_abc and _calc_abc_any can grab results
# otherwise ensure calc_abc is at the front
# for _calc_abc_any
state $re = qr{^calc_abc[23]};
my $iter23 = first_index {$_ =~ $re} @u_array;
if ($iter23 > 0) {
unshift @u_array, splice @u_array, $iter23, 1;
}
else {
my $iter1 = first_index {$_ eq 'calc_abc'} @u_array;
if ($iter1 > 0) {
unshift @u_array, splice @u_array, $iter1, 1;
}
}
}
$aggregated{$type} = \@u_array;
}

Expand Down Expand Up @@ -1533,7 +1552,7 @@ sub run_dependencies {
my %results;
my %as_results_from;
# make sure this is new each iteration
$self->set_cached_value ($cache_name_local_results => \%as_results_from);
$self->set_param ($cache_name_local_results => \%as_results_from);

foreach my $calc (@$calc_list) {
my $calc_results;
Expand Down Expand Up @@ -1572,7 +1591,7 @@ sub run_dependencies {
}

# We refresh each call above, but this ensures last one is cleaned up.
$self->delete_cached_value($cache_name_local_results);
$self->delete_param ($cache_name_local_results);

if ( $type eq 'pre_calc_global' ) {
$self->set_param( AS_RESULTS_FROM_GLOBAL => \%as_results_from_global );
Expand Down
11 changes: 6 additions & 5 deletions lib/Biodiverse/Indices/Endemism.pm
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ sub calc_endemism_central_hier_part {
# If we have no nbrs in set 2 then we are the same as the "whole" variant.
# So just grab its values if it has already been calculated.
if (!keys %{$args{label_hash2}}) {
my $cache_hash = $self->get_cached_value('AS_RESULTS_FROM_LOCAL');
my $cache_hash = $self->get_param('AS_RESULTS_FROM_LOCAL');
my $cached = $cache_hash->{calc_endemism_whole_hier_part};
if ($cached) {
my %remapped;
Expand Down Expand Up @@ -335,7 +335,7 @@ sub calc_endemism_whole_hier_part {
# If we have no nbrs in set 2 then we are the same as the "central" variant.
# So just grab its values if it has already been calculated.
if (!keys %{$args{label_hash2}}) {
my $cache_hash = $self->get_cached_value('AS_RESULTS_FROM_LOCAL');
my $cache_hash = $self->get_param('AS_RESULTS_FROM_LOCAL');
my $cached = $cache_hash->{calc_endemism_central_hier_part};
if ($cached) {
# say STDERR join ' ', sort keys %$cached;
Expand Down Expand Up @@ -586,7 +586,7 @@ sub _calc_endemism_central {
# If we have no nbrs in set 2 then we are the same as the "whole" variant.
# So just grab its values if it has already been calculated.
if (!keys %{$args{label_hash2}}) {
my $cache_hash = $self->get_cached_value('AS_RESULTS_FROM_LOCAL');
my $cache_hash = $self->get_param('AS_RESULTS_FROM_LOCAL');
my $cached = $cache_hash->{_calc_endemism_whole};
return wantarray ? %$cached : $cached
if $cached;
Expand Down Expand Up @@ -736,7 +736,7 @@ sub _calc_endemism_whole {
# If we have no nbrs in set 2 then we are the same as the "central" variant.
# So just grab its values if it has already been calculated.
if (!keys %{$args{label_hash2}}) {
my $cache_hash = $self->get_cached_value('AS_RESULTS_FROM_LOCAL');
my $cache_hash = $self->get_param('AS_RESULTS_FROM_LOCAL');
my $cached = $cache_hash->{_calc_endemism_central};
return wantarray ? %$cached : $cached
if $cached;
Expand All @@ -761,7 +761,8 @@ sub _calc_endemism {
? $args{label_hash1}
: $args{label_hash_all};

# allows us to use this for any other basedata get_* function
# Allows us to use this for any other basedata get_* function.
# calc_rarity is an example of this.
my $function = $args{function} || 'get_range';
my $range_hash = $args{label_range_hash} || {};

Expand Down
Loading

0 comments on commit 8216111

Please sign in to comment.