Skip to content

Commit

Permalink
Modify half-domain LUT1D GPU shader to improve zero handling (#1981)
Browse files Browse the repository at this point in the history
Signed-off-by: Doug Walker <doug.walker@autodesk.com>
  • Loading branch information
doug-walker authored Sep 5, 2024
1 parent d2c9617 commit 9864d75
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 7 deletions.
12 changes: 6 additions & 6 deletions src/OpenColorIO/ops/lut1d/Lut1DOpGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ void GetLut1DGPUShaderProgram(GpuShaderCreatorRcPtr & shaderCreator,
{
static const float NEG_MIN_EXP = 15.0f;
static const float EXP_SCALE = 1024.0f;
static const float HALF_DENRM_MAX = 6.09755515e-05f; // e.g. 2^-14 - 2^-24
static const float INV_DENRM_STEP = 16777216.0f; // 1 / 2^-24

ss.newLine() << "float dep;";
ss.newLine() << "float abs_f = abs(f);";
Expand All @@ -258,15 +258,15 @@ void GetLut1DGPUShaderProgram(GpuShaderCreatorRcPtr & shaderCreator,
ss.newLine() << "else";
ss.newLine() << "{";
ss.indent();
// Extract bits from denormalized values
ss.newLine() << "dep = abs_f * 1023.0 / " << HALF_DENRM_MAX << ";";
// Extract bits from denormalized values.
ss.newLine() << "dep = abs_f * " << INV_DENRM_STEP << ";";
ss.dedent();
ss.newLine() << "}";

// Adjust position for negative values
ss.newLine() << "dep += step(f, 0.0) * 32768.0;";
// Adjust position for negative values.
ss.newLine() << "dep += (f < 0.) ? 32768.0 : 0.0;";

// At this point 'dep' contains the raw half
// At this point 'dep' contains the raw half.
// Note: Raw halfs for NaN floats cannot be computed using
// floating-point operations.
}
Expand Down
32 changes: 31 additions & 1 deletion tests/cpu/Processor_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
namespace OCIO = OCIO_NAMESPACE;


OCIO_ADD_TEST(Processor, basic)
OCIO_ADD_TEST(Processor, basic_cache)
{
OCIO::ConfigRcPtr config = OCIO::Config::Create();
OCIO::GroupTransformRcPtr group = OCIO::GroupTransform::Create();
Expand Down Expand Up @@ -56,6 +56,36 @@ OCIO_ADD_TEST(Processor, basic)
OCIO_CHECK_EQUAL(std::string(processorMat->getCacheID()), "1b1880136f7669351adb0dcae0f4f9fd");
}

OCIO_ADD_TEST(Processor, basic_cache_lut)
{
OCIO::ConfigRcPtr config = OCIO::Config::Create();
OCIO::GroupTransformRcPtr group = OCIO::GroupTransform::Create();

auto processorEmptyGroup = config->getProcessor(group);
OCIO_CHECK_EQUAL(processorEmptyGroup->getNumTransforms(), 0);
OCIO_CHECK_EQUAL(std::string(processorEmptyGroup->getCacheID()), "<NOOP>");

auto lut = OCIO::Lut3DTransform::Create(3);
// Make sure it's not an identity.
lut->setValue(2, 2, 2, 2.f, 3.f, 4.f);

auto processorLut = config->getProcessor(lut);
OCIO_CHECK_EQUAL(processorLut->getNumTransforms(), 1);
OCIO_CHECK_EQUAL(std::string(processorLut->getCacheID()), "2b26d0097cdcf8f141fe3b3d6e21b5ec");

// Check behaviour of the cacheID

// Change a value and check that the cacheID changes.
lut->setValue(2, 2, 2, 1.f, 3.f, 4.f);
processorLut = config->getProcessor(lut);
OCIO_CHECK_EQUAL(std::string(processorLut->getCacheID()), "288ec8ea132adaca5b5aed24a296a1a2");

// Restore the original value, check that the cache ID matches what it used to be.
lut->setValue(2, 2, 2, 2.f, 3.f, 4.f);
processorLut = config->getProcessor(lut);
OCIO_CHECK_EQUAL(std::string(processorLut->getCacheID()), "2b26d0097cdcf8f141fe3b3d6e21b5ec");
}

OCIO_ADD_TEST(Processor, unique_dynamic_properties)
{
OCIO::TransformDirection direction = OCIO::TRANSFORM_DIR_FORWARD;
Expand Down
47 changes: 47 additions & 0 deletions tests/gpu/Lut1DOp_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,53 @@ OCIO_ADD_GPU_TEST(Lut1DOp, lut1d_half_domain_unequal_channels)
test.setTestInfinity(false);
}

OCIO_ADD_GPU_TEST(Lut1DOp, lut1d_half_domain_negative_zero)
{
// This is an edge case, but this test documents that the behavior of CPU & GPU
// are different with respect to where in the LUT negative zero looks up at.
// This is only visible with half-domain LUTs that set different values for
// positive and negative zero, which really should be considered a bug in the LUT.
// Given that IEEE arithmetic specifies that -0 == +0 in comparisons, this does
// not seem to be worth fixing in OCIO at the cost of reduced performance.

// Create a half-domain LUT1D.
const auto lut = OCIO::Lut1DTransform::Create(65536, true);

// Set the positive and negative denorms to large values to make it easy
// to check that the processing is correct.
for (unsigned i=0; i<1024; i++)
{
const float x = static_cast<float>(i);
// Positive denorms.
lut->setValue(0 + i, x, x, x);
// Negative denorms. Create a jump between +0 and -0.
lut->setValue(32768 + i, x + 10.f, x + 10.f, x + 10.f);
}

test.setProcessor(lut);

// TODO: Would like this to be lower.
test.setErrorThreshold(2e-3f);

OCIOGPUTest::CustomValues values;
values.m_inputValues =
{
// Negative zero uses the positive 0 LUT value on the GPU, and negative 0 LUT on CPU.
// -0.00f, -0.00f, -0.000f, 0.0f,
0.00f, 0.00f, 0.000f, 1.0f,
// Use values that fall in the middle of the first, second, and third LUT segments
// to test accuracy in the denormals.
3e-8f, 9e-8f, 15e-8f, 0.0f,
-3e-8f, -9e-8f, -15e-8f, 0.0f,
// Throw in a more typical value.
0.50f, 0.05f, 0.005f, 0.5f,
};
test.setCustomValues(values);

test.setTestNaN(false);
test.setTestInfinity(false);
}

OCIO_ADD_GPU_TEST(Lut1DOp, lut1d_file2_test)
{
OCIO::FileTransformRcPtr file = GetFileTransform("lut1d_green.ctf");
Expand Down

0 comments on commit 9864d75

Please sign in to comment.