Skip to content

Commit

Permalink
WIP: enable larger Rmax (up to half the box size) (#277)
Browse files Browse the repository at this point in the history
* Attempt to enable Rmax comparable to half boxsize by removing (unnecessary?) duplicate cell checks

* Comment out unused var

* Add test implementing Manodeep's example

* Restore duplicate cell pair check, but only count a pair as a duplicate if the wrap value is identical.

* Add pragmas for CI (will revisit)

* Update GNU assembler bug detection (#278)

* Update GNU assembler bug detection

* Cosmetic enhancement to suppress spurious warnings during GAS bug test

* Fix test error code

* Add another test of large Rmax, comparing against brute-force

* Add const qualifiers

* Apply @manodeep's fix for large Rmax test against brute-force, and require nmesh>=2 to avoid duplicate cell pairs

* Make boxsize non-trivial in test. Remove extra print statement.

* Add comments on array broadcasting to test

* Changed variable name for clarity

Co-authored-by: Manodeep Sinha <manodeep@gmail.com>
  • Loading branch information
lgarrison and manodeep authored Oct 7, 2022
1 parent c3e260d commit 23453ae
Show file tree
Hide file tree
Showing 7 changed files with 133 additions and 12 deletions.
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ New features
Enhancements
------------
- Allow user to specify periodicity and box size per dimension [#276]
- Allow larger Rmax (up to half the boxsize) [#277]

Changes
-------
Expand All @@ -28,6 +29,7 @@ Fixes
-----
- Add additional check to tell if it's safe to redirect stdout/err [#270]
- Check and fix ``z`` vs ``cz`` in ``DDrppi_mocks`` and ``DDsmu_mocks`` only if comoving distance flag is not set [#275]
- Update GNU assembler bug detection [#278]


2.4.0 (2021-09-30)
Expand Down
98 changes: 98 additions & 0 deletions Corrfunc/tests/test_theory.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,104 @@ def test_narrow_extent(N, isa='fastest', nthreads=maxthreads()):
assert np.all(results['npairs'] == [0, 0])


@pytest.mark.parametrize('autocorr', [0, 1], ids=['cross', 'auto'])
@pytest.mark.parametrize('binref', [1, 2, 3])
@pytest.mark.parametrize('min_sep_opt', [False, True])
@pytest.mark.parametrize('maxcells', [1, 2, 3])
def test_duplicate_cellpairs(autocorr, binref, min_sep_opt, maxcells,
isa='fastest', nthreads=maxthreads()):
'''A test to implement Manodeep's example from
https://github.com/manodeep/Corrfunc/pull/277#issuecomment-1190921894
where a particle pair straddles the wrap.
Also tests the large Rmax case, where Rmax is almost as large as L/2.
'''
from Corrfunc.theory import DD
boxsize = 432.
r_bins = np.array([0.01, 0.4]) * boxsize
pos = np.array([[0.02, 0.98],
[0., 0.],
[0., 0.0],
]) * boxsize

results = DD(autocorr, nthreads, r_bins, pos[0], pos[1], pos[2],
X2=pos[0], Y2=pos[1], Z2=pos[2],
boxsize=boxsize, periodic=True, isa=isa,
verbose=True, max_cells_per_dim=maxcells,
xbin_refine_factor=binref, ybin_refine_factor=binref,
zbin_refine_factor=binref, enable_min_sep_opt=min_sep_opt,
)

assert np.all(results['npairs'] == [2])

r_bins = np.array([0.2, 0.3, 0.49]) * boxsize
pos = np.array([[0., 0.],
[0., 0.],
[0., 0.48],
]) * boxsize

results = DD(autocorr, nthreads, r_bins, pos[0], pos[1], pos[2],
X2=pos[0], Y2=pos[1], Z2=pos[2],
boxsize=boxsize, periodic=True, isa=isa,
verbose=True, max_cells_per_dim=maxcells,
xbin_refine_factor=binref, ybin_refine_factor=binref,
zbin_refine_factor=binref, enable_min_sep_opt=min_sep_opt,
)

assert np.all(results['npairs'] == [0, 2])


@pytest.mark.parametrize('autocorr', [0, 1], ids=['cross', 'auto'])
@pytest.mark.parametrize('binref', [1, 2, 3])
@pytest.mark.parametrize('min_sep_opt', [False, True])
@pytest.mark.parametrize('maxcells', [1, 2, 3])
def test_rmax_against_brute(autocorr, binref, min_sep_opt, maxcells,
isa='fastest', nthreads=maxthreads()):
'''Generate two small point clouds near particles near (0,0,0)
and (L/2,L/2,L/2) and compare against the brute-force answer.
Use close to the max allowable Rmax, 0.49*Lbox.
'''
np.random.seed(1234)
npts = 100
eps = 0.2 # as a fraction of boxsize
boxsize = 123.
bins = np.linspace(0.01, 0.49*boxsize, 20)

# two clouds of width eps*boxsize
pos = np.random.uniform(low=-eps, high=eps,
size=(3, npts))*boxsize
pos[:, npts//2:] += boxsize/2. # second cloud is in center of box
pos %= boxsize

# Compute the pairwise distance between particles with broadcasting.
# Broadcasting (3,1,npts) against (3,npts,1) yields (3,npts,npts),
# which is the array of all npts^2 (x,y,z) differences.
pdiff = np.abs(pos[:, np.newaxis] - pos[:, :, np.newaxis])
pdiff[pdiff >= boxsize/2] -= boxsize

# Compute dist^2 = x^2 + y^2 + z^2, flattening because we don't
# care which dist came from which particle pair
sqr_pdiff = (pdiff**2).sum(axis=0).reshape(-1)
brutecounts, _ = np.histogram(sqr_pdiff, bins=bins**2)

# spot-check that we have non-zero counts
assert np.any(brutecounts > 0)

from Corrfunc.theory import DD

results = DD(autocorr, nthreads, bins, pos[0], pos[1], pos[2],
X2=pos[0], Y2=pos[1], Z2=pos[2],
boxsize=boxsize, periodic=True, isa=isa,
verbose=True, max_cells_per_dim=maxcells,
xbin_refine_factor=binref, ybin_refine_factor=binref,
zbin_refine_factor=binref, enable_min_sep_opt=min_sep_opt,
)

assert np.all(results['npairs'] == brutecounts)



@pytest.mark.parametrize('isa,nthreads', generate_isa_and_nthreads_combos())
def test_DDrppi(gals_Mr19, isa, nthreads):
from Corrfunc.theory import DDrppi
Expand Down
8 changes: 3 additions & 5 deletions common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -347,12 +347,10 @@ ifeq ($(DO_CHECKS), 1)
CLINK += -lm
endif #not icc

# The GNU Assembler (GAS) has an AVX-512 bug in versions 2.30 to 2.31.1
# So we turn off AVX-512 if the compiler reports it is using one of these versions.
# This works for gcc and icc.
# clang typically uses its own assembler, but if it is using the system assembler, this will also detect that.
# The GNU Assembler (GAS) has an AVX-512 bug in some versions (originally 2.30 to 2.31.1)
# So we compile a test program and check the assembly for the correct output.
# See: https://github.com/manodeep/Corrfunc/issues/193
GAS_BUG_DISABLE_AVX512 := $(shell $(CC) $(CFLAGS) -xc -Wa,-v -c /dev/null -o /dev/null 2>&1 | \grep -Ecm1 'GNU assembler version (2\.30|2\.31|2\.31\.1)')
GAS_BUG_DISABLE_AVX512 := $(shell echo 'vmovaps 64(,%rax), %zmm0' | $(CC) $(CFLAGS) -xassembler -c - -oa.out 2>/dev/null && objdump -dw a.out | \grep -q 'vmovaps 0x40(,%rax,1),%zmm0'; RET=$$?; rm -f a.out; echo $$RET)

ifeq ($(GAS_BUG_DISABLE_AVX512),1)
# Did the compiler support AVX-512 in the first place? Otherwise no need to disable it!
Expand Down
4 changes: 2 additions & 2 deletions utils/gridlink_impl.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ struct cell_pair_DOUBLE * generate_cell_pairs_DOUBLE(struct cellarray_DOUBLE *la
return NULL;
}
const int64_t num_self_pairs = totncells;
const int64_t num_nonself_pairs = totncells * max_ngb_cells / (1 + autocorr);
const int64_t num_nonself_pairs = totncells * max_ngb_cells;

const int64_t max_num_cell_pairs = num_self_pairs + num_nonself_pairs;
int64_t num_cell_pairs = 0;
Expand Down Expand Up @@ -529,7 +529,7 @@ struct cell_pair_DOUBLE * generate_cell_pairs_DOUBLE(struct cellarray_DOUBLE *la
//Check if the ngb-cell has already been added - can happen under periodic boundary
//conditions, with small value of nmesh_x/y/z (ie large Rmax relative to BoxSize)
if(check_for_duplicate_ngb_cells) {
CHECK_AND_CONTINUE_FOR_DUPLICATE_NGB_CELLS_DOUBLE(icell, icell2, num_cell_pairs, num_ngb_this_cell, all_cell_pairs);
CHECK_AND_CONTINUE_FOR_DUPLICATE_NGB_CELLS_DOUBLE(icell, icell2, off_xwrap, off_ywrap, off_zwrap, num_cell_pairs, num_ngb_this_cell, all_cell_pairs);
}

struct cellarray_DOUBLE *second = &(lattice2[icell2]);
Expand Down
11 changes: 10 additions & 1 deletion utils/gridlink_mocks_impl.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -1585,7 +1585,12 @@ struct cell_pair_DOUBLE * generate_cell_pairs_mocks_theta_ra_dec_DOUBLE(cellarra
continue;
}

CHECK_AND_CONTINUE_FOR_DUPLICATE_NGB_CELLS_DOUBLE(icell, icell2, num_cell_pairs, num_ngb_this_cell, all_cell_pairs);
// No periodicity with mocks; wrap value is always zero.
const DOUBLE xwrap = ZERO;
const DOUBLE ywrap = ZERO;
const DOUBLE zwrap = ZERO;

CHECK_AND_CONTINUE_FOR_DUPLICATE_NGB_CELLS_DOUBLE(icell, icell2, xwrap, ywrap, zwrap, num_cell_pairs, num_ngb_this_cell, all_cell_pairs);

XRETURN(num_cell_pairs < max_num_cell_pairs, NULL,
"Error: Assigning this existing cell-pair would require accessing invalid memory.\n"
Expand Down Expand Up @@ -1626,6 +1631,10 @@ struct cell_pair_DOUBLE * generate_cell_pairs_mocks_theta_ra_dec_DOUBLE(cellarra
this_cell_pair->closest_y1 = closest_y1;
this_cell_pair->closest_z1 = closest_z1;

this_cell_pair->xwrap = xwrap;
this_cell_pair->ywrap = ywrap;
this_cell_pair->zwrap = zwrap;

this_cell_pair->same_cell = (autocorr == 1 && icell2 == icell) ? 1:0;

num_cell_pairs++;
Expand Down
12 changes: 10 additions & 2 deletions utils/gridlink_utils.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,24 @@ int get_binsize_DOUBLE(const DOUBLE xdiff, const DOUBLE xwrap, const DOUBLE rmax
{
int nmesh=(int)(refine_factor*xdiff/rmax);
nmesh = nmesh < 1 ? 1:nmesh;
if( xwrap > 0 && ((xdiff + rmax) >= xwrap) ) {

if(xwrap > 0 && rmax >= xwrap/2){
fprintf(stderr,"%s> ERROR: rmax=%f must be less than half of periodic boxize=%f to avoid double-counting particles\n",
__FILE__,rmax,xwrap);
return EXIT_FAILURE;
}

/*if( xwrap > 0 && ((xdiff + rmax) >= xwrap) ) {
if (nmesh<(2*refine_factor+1)) {
fprintf(stderr,"%s> ERROR: nlattice = %d is so small that with periodic wrapping the same cells will be counted twice ....exiting\n",__FILE__,nmesh) ;
fprintf(stderr,"%s> Please reduce Rmax = %"REAL_FORMAT" to be a smaller fraction of the particle distribution region = %"REAL_FORMAT"\n",
__FILE__,rmax, xdiff);
return EXIT_FAILURE;
}
}
}*/

if (nmesh>max_ncells) nmesh=max_ncells;
if (nmesh<2) nmesh=2; // to avoid forming duplicate cell pairs
*xbinsize = xdiff/nmesh;
*nlattice = nmesh;

Expand Down
10 changes: 8 additions & 2 deletions utils/gridlink_utils.h.src
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ extern "C" {
DOUBLE *sqr_sep_min, DOUBLE *sqr_sep_max);


#define CHECK_AND_CONTINUE_FOR_DUPLICATE_NGB_CELLS_DOUBLE(icell, icell2, num_cell_pairs, num_ngb_this_cell, all_cell_pairs) { \
#define CHECK_AND_CONTINUE_FOR_DUPLICATE_NGB_CELLS_DOUBLE(icell, icell2, off_xwrap, off_ywrap, off_zwrap, num_cell_pairs, num_ngb_this_cell, all_cell_pairs) { \
int duplicate_flag = 0; \
XRETURN(num_cell_pairs - num_ngb_this_cell >= 0, NULL, \
"Error: While working on detecting (potential) duplicate cell-pairs on primary cell = %"PRId64"\n" \
Expand All @@ -57,10 +57,16 @@ extern "C" {
"For cell-pair # %"PRId64", the primary cellindex (within cell-pair) = %"PRId64" should be *exactly* " \
"equal to current primary cellindex = %"PRId64". Num_cell_pairs = %"PRId64" num_ngb_this_cell = %"PRId64"\n", \
icell, num_cell_pairs - jj, this_cell_pair->cellindex1, icell, num_cell_pairs, num_ngb_this_cell); \
if (this_cell_pair->cellindex2 == icell2) { \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") \
if (this_cell_pair->cellindex2 == icell2 && \
this_cell_pair->xwrap == off_xwrap && \
this_cell_pair->ywrap == off_ywrap && \
this_cell_pair->zwrap == off_zwrap) { \
duplicate_flag = 1; \
break; \
} \
_Pragma("GCC diagnostic pop") \
} \
if(duplicate_flag == 1) continue; \
}
Expand Down

0 comments on commit 23453ae

Please sign in to comment.