From 1ea0c0ac4279f9daffa615c50ffaf031a2b3f69e Mon Sep 17 00:00:00 2001 From: stringhandler Date: Tue, 26 Nov 2024 09:04:26 +0200 Subject: [PATCH] fix: undo accidental crippling (#27) A previous attempt to fix a bug on AMD GPUs accidentally crippled all miners. The main problem was trying to initialize the output to 0. This would not change the hashrate but would result in valid nonces being missed --- src/main.rs | 2 +- src/opencl_engine.rs | 30 +++-- src/opencl_sha3.cl | 298 +++++++++++++++++++++---------------------- 3 files changed, 163 insertions(+), 167 deletions(-) diff --git a/src/main.rs b/src/main.rs index 88d2d82..689f3bf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -514,7 +514,7 @@ fn run_thread( debug!(target: LOG_TARGET, "Elapsed {:?} > {:?}", elapsed.elapsed().as_secs(), config.template_refresh_secs ); break; } - let num_iterations = 16; + let num_iterations = 1; let result = gpu_engine.mine( &gpu_function, &context, diff --git a/src/opencl_engine.rs b/src/opencl_engine.rs index 4f2aa54..a8f3a3a 100644 --- a/src/opencl_engine.rs +++ b/src/opencl_engine.rs @@ -1,3 +1,4 @@ +use core::ffi::c_void; use std::{ io::Read, ptr, @@ -11,7 +12,7 @@ use opencl3::{ context::Context, device::{Device, CL_DEVICE_TYPE_GPU}, kernel::{ExecuteKernel, Kernel}, - memory::{Buffer, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY}, + memory::{Buffer, CL_MEM_COPY_HOST_PTR, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY}, platform::{get_platforms, Platform}, program::Program, types::{cl_ulong, CL_TRUE}, @@ -169,8 +170,8 @@ impl EngineImpl for OpenClEngine { debug!(target: LOG_TARGET, "OpenClEngine: created queue"); - let batch_size = 1 << 19; // According to tests, but we can try work this out - let global_dimensions = [batch_size as usize]; + // let batch_size = 1 << 19; // According to tests, but we can try work this out + // let global_dimensions = [batch_size as usize]; // let max_workgroups = Device::new(context.context.devices()[0]).max_work_group_size().unwrap(); // dbg!(max_compute); // let max_work_items = queue.max_work_item_dimensions(); @@ -195,14 +196,19 @@ impl EngineImpl for OpenClEngine { }; debug!(target: LOG_TARGET, "OpenClEngine: buffer created",); - let output_buffer = - match Buffer::::create(&context.context, CL_MEM_WRITE_ONLY, 2, ptr::null_mut()) { - Ok(buffer) => buffer, - Err(e) => { - error!(target: LOG_TARGET, "OpenClEngine: failed to create output buffer: {}", e); - return Err(e.into()); - }, - }; + let initial_output = vec![0u64, 0u64]; + let output_buffer = match Buffer::::create( + &context.context, + CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, + 2, + initial_output.as_ptr() as *mut c_void, + ) { + Ok(buffer) => buffer, + Err(e) => { + error!(target: LOG_TARGET, "OpenClEngine: failed to create output buffer: {}", e); + return Err(e.into()); + }, + }; // dbg!(block_size); // dbg!(grid_size); debug!(target: LOG_TARGET, "OpenClEngine: output buffer created",); @@ -243,9 +249,9 @@ impl EngineImpl for OpenClEngine { queue.finish()?; let mut output = vec![0u64, 0u64]; - debug!(target: LOG_TARGET, "OpenClEngine: mine output {:?}", output[0] > 0); queue.enqueue_read_buffer(&output_buffer, CL_TRUE, 0, output.as_mut_slice(), &[])?; if output[0] > 0 { + println!("output and diff {:?} {:?}", output[0], u64::MAX / output[1]); return Ok(( Some(output[0]), grid_size * block_size * num_iterations, diff --git a/src/opencl_sha3.cl b/src/opencl_sha3.cl index c33da19..ad41d6f 100644 --- a/src/opencl_sha3.cl +++ b/src/opencl_sha3.cl @@ -17,167 +17,156 @@ constant static const ulong RC[] = { 0x8000000000008080ul, 0x0000000080000001ul, 0x8000000080008008ul, }; - ulong swap_endian_64(ulong value) { - return ((value & 0x00000000000000FFULL) << 56) | - ((value & 0x000000000000FF00ULL) << 40) | - ((value & 0x0000000000FF0000ULL) << 24) | - ((value & 0x00000000FF000000ULL) << 8) | - ((value & 0x000000FF00000000ULL) >> 8) | - ((value & 0x0000FF0000000000ULL) >> 24) | - ((value & 0x00FF000000000000ULL) >> 40) | - ((value & 0xFF00000000000000ULL) >> 56); + return ((value & 0x00000000000000FFULL) << 56) | + ((value & 0x000000000000FF00ULL) << 40) | + ((value & 0x0000000000FF0000ULL) << 24) | + ((value & 0x00000000FF000000ULL) << 8) | + ((value & 0x000000FF00000000ULL) >> 8) | + ((value & 0x0000FF0000000000ULL) >> 24) | + ((value & 0x00FF000000000000ULL) >> 40) | + ((value & 0xFF00000000000000ULL) >> 56); } +kernel void sha3(global ulong *buffer, ulong nonce_start, ulong difficulty, + uint num_rounds, global ulong *output_1) { + + // output_1[0] = 0; + // output_1[1] = 0; + ulong state[25]; + for (uint i = 0; i < num_rounds; i++) { + + for (uint j = 0; j < 25; j++) { + state[j] = 0; + } + state[0] = nonce_start + get_global_id(0) + i * get_global_size(0); + state[1] = buffer[1]; + state[2] = buffer[2]; + state[3] = buffer[3]; + + state[4] = buffer[4]; + state[5] = buffer[5]; + + state[16] ^= 0x8000000000000000ul; + + uint r, x, y, t; + ulong tmp, current, C[5]; + for (r = 0; r < 24; ++r) { + for (x = 0; x < 5; ++x) { + C[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^ + state[x + 20]; + } + for (x = 0; x < 5; ++x) { + tmp = C[(x + 4) % 5] ^ rotate(C[(x + 1) % 5], 1ul); + for (y = 0; y < 5; ++y) { + state[x + y * 5] ^= tmp; + } + } + current = state[1]; + for (t = 0; t < 24; ++t) { + tmp = state[pos[t]]; + state[pos[t]] = rotate(current, rot[t]); + current = tmp; + } + for (y = 0; y < 25; y += 5) { + for (x = 0; x < 5; ++x) + C[x] = state[y + x]; + for (x = 0; x < 5; ++x) { + state[x + y] = C[x] ^ (~C[(x + 1) % 5] & C[(x + 2) % 5]); + } + } + state[0] ^= RC[r]; + } + + for (uint j = 4; j < 25; j++) { + state[j] = 0; + } + state[4] = 0x06; + state[16] = 0x8000000000000000ul; + + for (r = 0; r < 24; ++r) { + for (x = 0; x < 5; ++x) { + C[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^ + state[x + 20]; + } + for (x = 0; x < 5; ++x) { + tmp = C[(x + 4) % 5] ^ rotate(C[(x + 1) % 5], 1ul); + for (y = 0; y < 5; ++y) { + state[x + y * 5] ^= tmp; + } + } + current = state[1]; + for (t = 0; t < 24; ++t) { + tmp = state[pos[t]]; + state[pos[t]] = rotate(current, rot[t]); + current = tmp; + } + for (y = 0; y < 25; y += 5) { + for (x = 0; x < 5; ++x) + C[x] = state[y + x]; + for (x = 0; x < 5; ++x) { + state[x + y] = C[x] ^ (~C[(x + 1) % 5] & C[(x + 2) % 5]); + } + } + state[0] ^= RC[r]; + } + + for (uint j = 4; j < 25; j++) { + state[j] = 0; + } + state[4] = 0x06; + state[16] = 0x8000000000000000ul; + + // round 3 + for (r = 0; r < 24; ++r) { + for (x = 0; x < 5; ++x) { + C[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^ + state[x + 20]; + } + for (x = 0; x < 5; ++x) { + tmp = C[(x + 4) % 5] ^ rotate(C[(x + 1) % 5], 1ul); + for (y = 0; y < 5; ++y) { + state[x + y * 5] ^= tmp; + } + } + current = state[1]; + for (t = 0; t < 24; ++t) { + tmp = state[pos[t]]; + state[pos[t]] = rotate(current, rot[t]); + current = tmp; + } + for (y = 0; y < 25; y += 5) { + for (x = 0; x < 5; ++x) + C[x] = state[y + x]; + for (x = 0; x < 5; ++x) { + state[x + y] = C[x] ^ (~C[(x + 1) % 5] & C[(x + 2) % 5]); + } + } + state[0] ^= RC[r]; + } -kernel void sha3(global ulong *buffer, - ulong nonce_start, ulong difficulty, - uint num_rounds, global ulong *output_1 - ) { - -output_1[0] = 0; -output_1[1] = 0; -ulong state[25]; -for (uint i = 0;i< num_rounds; i++) { - - for (uint j = 0; j < 25; j++) { - state[j] = 0; - } - state[0] = nonce_start + get_global_id(0) + i * get_global_size(0); - state[1] = buffer[1]; - state[2] = buffer[2]; - state[3] = buffer[3]; - - state[4] = buffer[4]; - state[5] = buffer[5]; - - state[16] ^= 0x8000000000000000ul; - - - - - - uint r, x, y, t; - ulong tmp, current, C[5]; - for (r = 0; r < 24; ++r) { - for (x = 0; x < 5; ++x) { - C[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^ - state[x + 20]; - } - for (x = 0; x < 5; ++x) { - tmp = C[(x + 4) % 5] ^ rotate(C[(x + 1) % 5], 1ul); - for (y = 0; y < 5; ++y) { - state[x + y * 5] ^= tmp; - } - } - current = state[1]; - for (t = 0; t < 24; ++t) { - tmp = state[pos[t]]; - state[pos[t]] = rotate(current, rot[t]); - current = tmp; - } - for (y = 0; y < 25; y += 5) { - for (x = 0; x < 5; ++x) - C[x] = state[y + x]; - for (x = 0; x < 5; ++x) { - state[x + y] = C[x] ^ (~C[(x + 1) % 5] & C[(x + 2) % 5]); - } - } - state[0] ^= RC[r]; - } - - - for (uint j = 4; j < 25; j++) { - state[j] = 0; - } - state[4] = 0x06; - state[16] = 0x8000000000000000ul; - - for (r = 0; r < 24; ++r) { - for (x = 0; x < 5; ++x) { - C[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^ - state[x + 20]; - } - for (x = 0; x < 5; ++x) { - tmp = C[(x + 4) % 5] ^ rotate(C[(x + 1) % 5], 1ul); - for (y = 0; y < 5; ++y) { - state[x + y * 5] ^= tmp; - } - } - current = state[1]; - for (t = 0; t < 24; ++t) { - tmp = state[pos[t]]; - state[pos[t]] = rotate(current, rot[t]); - current = tmp; - } - for (y = 0; y < 25; y += 5) { - for (x = 0; x < 5; ++x) - C[x] = state[y + x]; - for (x = 0; x < 5; ++x) { - state[x + y] = C[x] ^ (~C[(x + 1) % 5] & C[(x + 2) % 5]); - } - } - state[0] ^= RC[r]; - } - - - for (uint j = 4; j < 25; j++) { - state[j] = 0; - } - state[4] = 0x06; - state[16] = 0x8000000000000000ul; - - // round 3 - for (r = 0; r < 24; ++r) { - for (x = 0; x < 5; ++x) { - C[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^ - state[x + 20]; - } - for (x = 0; x < 5; ++x) { - tmp = C[(x + 4) % 5] ^ rotate(C[(x + 1) % 5], 1ul); - for (y = 0; y < 5; ++y) { - state[x + y * 5] ^= tmp; - } - } - current = state[1]; - for (t = 0; t < 24; ++t) { - tmp = state[pos[t]]; - state[pos[t]] = rotate(current, rot[t]); - current = tmp; - } - for (y = 0; y < 25; y += 5) { - for (x = 0; x < 5; ++x) - C[x] = state[y + x]; - for (x = 0; x < 5; ++x) { - state[x + y] = C[x] ^ (~C[(x + 1) % 5] & C[(x + 2) % 5]); - } - } - state[0] ^= RC[r]; - } - - - // check difficulty - ulong swap = swap_endian_64(state[0]); - if (swap < difficulty) { - output_1[0] = nonce_start + get_global_id(0) + i * get_global_size(0); - output_1[1] = swap; - } - else { + // check difficulty + ulong swap = swap_endian_64(state[0]); + if (swap < difficulty) { if (output_1[1] == 0 || output_1[1] > swap) { - output_1[1] = swap; + output_1[0] = nonce_start + get_global_id(0) + i * get_global_size(0); + output_1[1] = swap; } - // if (output_1[1] < nonce_start+ get_global_id(0)) { - // output_1[1] = nonce_start + get_global_id(0); - // } - } + } else { + if (output_1[1] == 0 || output_1[1] > swap) { + // output_1[0] = nonce_start + get_global_id(0) + i * + // get_global_size(0); + output_1[1] = swap; + } + // if (output_1[1] < nonce_start+ get_global_id(0)) { + // output_1[1] = nonce_start + get_global_id(0); + // } + } - //output_1[0] = difficulty; - // output_1[0] = nonce_start + get_global_id(0) ; + // output_1[0] = difficulty; + // output_1[0] = nonce_start + get_global_id(0) ; // output_1[0] = 1; -} - - - + } // // Compare difficulty // bool le = true; @@ -225,7 +214,8 @@ for (uint i = 0;i< num_rounds; i++) { // // n -= d // int r = 0; // for (int j = 31; j >= 0; --j) { - // // There is no temporary overflow, because in OpenCL uchar + uchar is + // // There is no temporary overflow, because in OpenCL uchar + uchar + // is // // ulong (not really sure, but it's bigger than uchar) // if (n[j] < output_buffer[j] + r) { // n[j] = n[j] - r - output_buffer[j];