Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow VectorUtilProvider tests to be executed although hardware may not fully support vectorization or if C2 is not enabled #12376

Merged
merged 1 commit into from
Jun 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions lucene/core/src/java/org/apache/lucene/util/VectorUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
/** Utilities for computations with numeric arrays */
public final class VectorUtil {

// visible for testing
static final VectorUtilProvider PROVIDER = VectorUtilProvider.lookup();
private static final VectorUtilProvider PROVIDER = VectorUtilProvider.lookup(false);

private VectorUtil() {}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ interface VectorUtilProvider {
/** The minimal version of Java that has the bugfix for JDK-8301190. */
static final Version VERSION_JDK8301190_FIXED = Version.parse("20.0.2");

static VectorUtilProvider lookup() {
static VectorUtilProvider lookup(boolean testMode) {
final int runtimeVersion = Runtime.version().feature();
if (runtimeVersion >= 20 && runtimeVersion <= 21) {
// is locale sane (only buggy in Java 20)
Expand All @@ -71,7 +71,7 @@ static VectorUtilProvider lookup() {
"Java vector incubator module is not readable. For optimal vector performance, pass '--add-modules jdk.incubator.vector' to enable Vector API.");
return new VectorUtilDefaultProvider();
}
if (isClientVM()) {
if (!testMode && isClientVM()) {
LOG.warning("C2 compiler is disabled; Java vector incubator API can't be enabled");
return new VectorUtilDefaultProvider();
}
Expand All @@ -80,9 +80,10 @@ static VectorUtilProvider lookup() {
// have private access through the lookup:
final var lookup = MethodHandles.lookup();
final var cls = lookup.findClass("org.apache.lucene.util.VectorUtilPanamaProvider");
final var constr = lookup.findConstructor(cls, MethodType.methodType(void.class));
final var constr =
lookup.findConstructor(cls, MethodType.methodType(void.class, boolean.class));
try {
return (VectorUtilProvider) constr.invoke();
return (VectorUtilProvider) constr.invoke(testMode);
} catch (UnsupportedOperationException uoe) {
// not supported because preferred vector size too small or similar
LOG.warning("Java vector incubator API was not enabled. " + uoe.getMessage());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,7 @@ final class VectorUtilPanamaProvider implements VectorUtilProvider {
* <p>it could be that it has only AVX1 and integer vectors are fast. it could also be that it has
* no AVX and integer vectors are extremely slow. don't use integer vectors to avoid landmines.
*/
private static final boolean IS_AMD64_WITHOUT_AVX2 =
Constants.OS_ARCH.equals("amd64") && INT_SPECIES_PREF_BIT_SIZE < 256;
private final boolean hasFastIntegerVectors;

static {
if (INT_SPECIES_PREF_BIT_SIZE >= 256) {
Expand All @@ -67,8 +66,8 @@ private static <T> T doPrivileged(PrivilegedAction<T> action) {
return AccessController.doPrivileged(action);
}

VectorUtilPanamaProvider() {
if (INT_SPECIES_PREF_BIT_SIZE < 128) {
VectorUtilPanamaProvider(boolean testMode) {
if (!testMode && INT_SPECIES_PREF_BIT_SIZE < 128) {
throw new UnsupportedOperationException(
"Vector bit size is less than 128: " + INT_SPECIES_PREF_BIT_SIZE);
}
Expand All @@ -83,9 +82,16 @@ private static <T> T doPrivileged(PrivilegedAction<T> action) {
"We hit initialization failure described in JDK-8309727: " + se);
}

// check if the system is x86 and less than 256-bit vectors:
var isAMD64withoutAVX2 = Constants.OS_ARCH.equals("amd64") && INT_SPECIES_PREF_BIT_SIZE < 256;
this.hasFastIntegerVectors = testMode || false == isAMD64withoutAVX2;

var log = Logger.getLogger(getClass().getName());
log.info(
"Java vector incubator API enabled; uses preferredBitSize=" + INT_SPECIES_PREF_BIT_SIZE);
"Java vector incubator API enabled"
+ (testMode ? " (test mode)" : "")
+ "; uses preferredBitSize="
+ INT_SPECIES_PREF_BIT_SIZE);
}

@Override
Expand Down Expand Up @@ -295,7 +301,7 @@ public int dotProduct(byte[] a, byte[] b) {
int res = 0;
// only vectorize if we'll at least enter the loop a single time, and we have at least 128-bit
// vectors (256-bit on intel to dodge performance landmines)
if (a.length >= 16 && IS_AMD64_WITHOUT_AVX2 == false) {
if (a.length >= 16 && hasFastIntegerVectors) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a bit of a pity that this check is now with a final rather than static final, but that's the price we pay for test-ability!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thats ok, because in production we only have exactly one instance of this class, so it does not matter, it is optimized away.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually I like this more, because it is no negative test.

// compute vectorized dot product consistent with VPDPBUSD instruction
if (INT_SPECIES_PREF_BIT_SIZE >= 256) {
// optimized 256/512 bit implementation, processes 8/16 bytes at a time
Expand Down Expand Up @@ -352,7 +358,7 @@ public float cosine(byte[] a, byte[] b) {
int norm2 = 0;
// only vectorize if we'll at least enter the loop a single time, and we have at least 128-bit
// vectors (256-bit on intel to dodge performance landmines)
if (a.length >= 16 && IS_AMD64_WITHOUT_AVX2 == false) {
if (a.length >= 16 && hasFastIntegerVectors) {
if (INT_SPECIES_PREF_BIT_SIZE >= 256) {
// optimized 256/512 bit implementation, processes 8/16 bytes at a time
int upperBound = PREF_BYTE_SPECIES.loopBound(a.length);
Expand Down Expand Up @@ -442,7 +448,7 @@ public int squareDistance(byte[] a, byte[] b) {
int res = 0;
// only vectorize if we'll at least enter the loop a single time, and we have at least 128-bit
// vectors (256-bit on intel to dodge performance landmines)
if (a.length >= 16 && IS_AMD64_WITHOUT_AVX2 == false) {
if (a.length >= 16 && hasFastIntegerVectors) {
if (INT_SPECIES_PREF_BIT_SIZE >= 256) {
// optimized 256/512 bit implementation, processes 8/16 bytes at a time
int upperBound = PREF_BYTE_SPECIES.loopBound(a.length);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public class TestVectorUtilProviders extends LuceneTestCase {

private static final double DELTA = 1e-3;
private static final VectorUtilProvider LUCENE_PROVIDER = new VectorUtilDefaultProvider();
private static final VectorUtilProvider JDK_PROVIDER = VectorUtil.PROVIDER;
private static final VectorUtilProvider JDK_PROVIDER = VectorUtilProvider.lookup(true);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is great - that we can ensure that we're always testing the Panama implementation. I guess the test can sometime be slow?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test only compares the default lucene provider with a few random vectors and compares results. So slowness does not matter here!

Of course here we only test the Panama Implementation if it is Java 20/21 and it is not buggy (turkish locale bug).


private static final int[] VECTOR_SIZES = {
1, 4, 6, 8, 13, 16, 25, 32, 64, 100, 128, 207, 256, 300, 512, 702, 1024
Expand Down