Skip to content

Commit

Permalink
fix: undo accidental crippling (#27)
Browse files Browse the repository at this point in the history
A previous attempt to fix a bug on AMD GPUs accidentally crippled all
miners.

The main problem was trying to initialize the output to 0. This would
not change the hashrate but would result in valid nonces being missed
  • Loading branch information
stringhandler authored Nov 26, 2024
1 parent 59d074a commit 1ea0c0a
Show file tree
Hide file tree
Showing 3 changed files with 163 additions and 167 deletions.
2 changes: 1 addition & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,7 @@ fn run_thread<T: EngineImpl>(
debug!(target: LOG_TARGET, "Elapsed {:?} > {:?}", elapsed.elapsed().as_secs(), config.template_refresh_secs );
break;
}
let num_iterations = 16;
let num_iterations = 1;
let result = gpu_engine.mine(
&gpu_function,
&context,
Expand Down
30 changes: 18 additions & 12 deletions src/opencl_engine.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use core::ffi::c_void;
use std::{
io::Read,
ptr,
Expand All @@ -11,7 +12,7 @@ use opencl3::{
context::Context,
device::{Device, CL_DEVICE_TYPE_GPU},
kernel::{ExecuteKernel, Kernel},
memory::{Buffer, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY},
memory::{Buffer, CL_MEM_COPY_HOST_PTR, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY},
platform::{get_platforms, Platform},
program::Program,
types::{cl_ulong, CL_TRUE},
Expand Down Expand Up @@ -169,8 +170,8 @@ impl EngineImpl for OpenClEngine {

debug!(target: LOG_TARGET, "OpenClEngine: created queue");

let batch_size = 1 << 19; // According to tests, but we can try work this out
let global_dimensions = [batch_size as usize];
// let batch_size = 1 << 19; // According to tests, but we can try work this out
// let global_dimensions = [batch_size as usize];
// let max_workgroups = Device::new(context.context.devices()[0]).max_work_group_size().unwrap();
// dbg!(max_compute);
// let max_work_items = queue.max_work_item_dimensions();
Expand All @@ -195,14 +196,19 @@ impl EngineImpl for OpenClEngine {
};

debug!(target: LOG_TARGET, "OpenClEngine: buffer created",);
let output_buffer =
match Buffer::<cl_ulong>::create(&context.context, CL_MEM_WRITE_ONLY, 2, ptr::null_mut()) {
Ok(buffer) => buffer,
Err(e) => {
error!(target: LOG_TARGET, "OpenClEngine: failed to create output buffer: {}", e);
return Err(e.into());
},
};
let initial_output = vec![0u64, 0u64];
let output_buffer = match Buffer::<cl_ulong>::create(
&context.context,
CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
2,
initial_output.as_ptr() as *mut c_void,
) {
Ok(buffer) => buffer,
Err(e) => {
error!(target: LOG_TARGET, "OpenClEngine: failed to create output buffer: {}", e);
return Err(e.into());
},
};
// dbg!(block_size);
// dbg!(grid_size);
debug!(target: LOG_TARGET, "OpenClEngine: output buffer created",);
Expand Down Expand Up @@ -243,9 +249,9 @@ impl EngineImpl for OpenClEngine {
queue.finish()?;

let mut output = vec![0u64, 0u64];
debug!(target: LOG_TARGET, "OpenClEngine: mine output {:?}", output[0] > 0);
queue.enqueue_read_buffer(&output_buffer, CL_TRUE, 0, output.as_mut_slice(), &[])?;
if output[0] > 0 {
println!("output and diff {:?} {:?}", output[0], u64::MAX / output[1]);
return Ok((
Some(output[0]),
grid_size * block_size * num_iterations,
Expand Down
298 changes: 144 additions & 154 deletions src/opencl_sha3.cl
Original file line number Diff line number Diff line change
Expand Up @@ -17,167 +17,156 @@ constant static const ulong RC[] = {
0x8000000000008080ul, 0x0000000080000001ul, 0x8000000080008008ul,
};


ulong swap_endian_64(ulong value) {
return ((value & 0x00000000000000FFULL) << 56) |
((value & 0x000000000000FF00ULL) << 40) |
((value & 0x0000000000FF0000ULL) << 24) |
((value & 0x00000000FF000000ULL) << 8) |
((value & 0x000000FF00000000ULL) >> 8) |
((value & 0x0000FF0000000000ULL) >> 24) |
((value & 0x00FF000000000000ULL) >> 40) |
((value & 0xFF00000000000000ULL) >> 56);
return ((value & 0x00000000000000FFULL) << 56) |
((value & 0x000000000000FF00ULL) << 40) |
((value & 0x0000000000FF0000ULL) << 24) |
((value & 0x00000000FF000000ULL) << 8) |
((value & 0x000000FF00000000ULL) >> 8) |
((value & 0x0000FF0000000000ULL) >> 24) |
((value & 0x00FF000000000000ULL) >> 40) |
((value & 0xFF00000000000000ULL) >> 56);
}

kernel void sha3(global ulong *buffer, ulong nonce_start, ulong difficulty,
uint num_rounds, global ulong *output_1) {

// output_1[0] = 0;
// output_1[1] = 0;
ulong state[25];
for (uint i = 0; i < num_rounds; i++) {

for (uint j = 0; j < 25; j++) {
state[j] = 0;
}
state[0] = nonce_start + get_global_id(0) + i * get_global_size(0);
state[1] = buffer[1];
state[2] = buffer[2];
state[3] = buffer[3];

state[4] = buffer[4];
state[5] = buffer[5];

state[16] ^= 0x8000000000000000ul;

uint r, x, y, t;
ulong tmp, current, C[5];
for (r = 0; r < 24; ++r) {
for (x = 0; x < 5; ++x) {
C[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^
state[x + 20];
}
for (x = 0; x < 5; ++x) {
tmp = C[(x + 4) % 5] ^ rotate(C[(x + 1) % 5], 1ul);
for (y = 0; y < 5; ++y) {
state[x + y * 5] ^= tmp;
}
}
current = state[1];
for (t = 0; t < 24; ++t) {
tmp = state[pos[t]];
state[pos[t]] = rotate(current, rot[t]);
current = tmp;
}
for (y = 0; y < 25; y += 5) {
for (x = 0; x < 5; ++x)
C[x] = state[y + x];
for (x = 0; x < 5; ++x) {
state[x + y] = C[x] ^ (~C[(x + 1) % 5] & C[(x + 2) % 5]);
}
}
state[0] ^= RC[r];
}

for (uint j = 4; j < 25; j++) {
state[j] = 0;
}
state[4] = 0x06;
state[16] = 0x8000000000000000ul;

for (r = 0; r < 24; ++r) {
for (x = 0; x < 5; ++x) {
C[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^
state[x + 20];
}
for (x = 0; x < 5; ++x) {
tmp = C[(x + 4) % 5] ^ rotate(C[(x + 1) % 5], 1ul);
for (y = 0; y < 5; ++y) {
state[x + y * 5] ^= tmp;
}
}
current = state[1];
for (t = 0; t < 24; ++t) {
tmp = state[pos[t]];
state[pos[t]] = rotate(current, rot[t]);
current = tmp;
}
for (y = 0; y < 25; y += 5) {
for (x = 0; x < 5; ++x)
C[x] = state[y + x];
for (x = 0; x < 5; ++x) {
state[x + y] = C[x] ^ (~C[(x + 1) % 5] & C[(x + 2) % 5]);
}
}
state[0] ^= RC[r];
}

for (uint j = 4; j < 25; j++) {
state[j] = 0;
}
state[4] = 0x06;
state[16] = 0x8000000000000000ul;

// round 3
for (r = 0; r < 24; ++r) {
for (x = 0; x < 5; ++x) {
C[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^
state[x + 20];
}
for (x = 0; x < 5; ++x) {
tmp = C[(x + 4) % 5] ^ rotate(C[(x + 1) % 5], 1ul);
for (y = 0; y < 5; ++y) {
state[x + y * 5] ^= tmp;
}
}
current = state[1];
for (t = 0; t < 24; ++t) {
tmp = state[pos[t]];
state[pos[t]] = rotate(current, rot[t]);
current = tmp;
}
for (y = 0; y < 25; y += 5) {
for (x = 0; x < 5; ++x)
C[x] = state[y + x];
for (x = 0; x < 5; ++x) {
state[x + y] = C[x] ^ (~C[(x + 1) % 5] & C[(x + 2) % 5]);
}
}
state[0] ^= RC[r];
}

kernel void sha3(global ulong *buffer,
ulong nonce_start, ulong difficulty,
uint num_rounds, global ulong *output_1
) {

output_1[0] = 0;
output_1[1] = 0;
ulong state[25];
for (uint i = 0;i< num_rounds; i++) {

for (uint j = 0; j < 25; j++) {
state[j] = 0;
}
state[0] = nonce_start + get_global_id(0) + i * get_global_size(0);
state[1] = buffer[1];
state[2] = buffer[2];
state[3] = buffer[3];

state[4] = buffer[4];
state[5] = buffer[5];

state[16] ^= 0x8000000000000000ul;





uint r, x, y, t;
ulong tmp, current, C[5];
for (r = 0; r < 24; ++r) {
for (x = 0; x < 5; ++x) {
C[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^
state[x + 20];
}
for (x = 0; x < 5; ++x) {
tmp = C[(x + 4) % 5] ^ rotate(C[(x + 1) % 5], 1ul);
for (y = 0; y < 5; ++y) {
state[x + y * 5] ^= tmp;
}
}
current = state[1];
for (t = 0; t < 24; ++t) {
tmp = state[pos[t]];
state[pos[t]] = rotate(current, rot[t]);
current = tmp;
}
for (y = 0; y < 25; y += 5) {
for (x = 0; x < 5; ++x)
C[x] = state[y + x];
for (x = 0; x < 5; ++x) {
state[x + y] = C[x] ^ (~C[(x + 1) % 5] & C[(x + 2) % 5]);
}
}
state[0] ^= RC[r];
}


for (uint j = 4; j < 25; j++) {
state[j] = 0;
}
state[4] = 0x06;
state[16] = 0x8000000000000000ul;

for (r = 0; r < 24; ++r) {
for (x = 0; x < 5; ++x) {
C[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^
state[x + 20];
}
for (x = 0; x < 5; ++x) {
tmp = C[(x + 4) % 5] ^ rotate(C[(x + 1) % 5], 1ul);
for (y = 0; y < 5; ++y) {
state[x + y * 5] ^= tmp;
}
}
current = state[1];
for (t = 0; t < 24; ++t) {
tmp = state[pos[t]];
state[pos[t]] = rotate(current, rot[t]);
current = tmp;
}
for (y = 0; y < 25; y += 5) {
for (x = 0; x < 5; ++x)
C[x] = state[y + x];
for (x = 0; x < 5; ++x) {
state[x + y] = C[x] ^ (~C[(x + 1) % 5] & C[(x + 2) % 5]);
}
}
state[0] ^= RC[r];
}


for (uint j = 4; j < 25; j++) {
state[j] = 0;
}
state[4] = 0x06;
state[16] = 0x8000000000000000ul;

// round 3
for (r = 0; r < 24; ++r) {
for (x = 0; x < 5; ++x) {
C[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^
state[x + 20];
}
for (x = 0; x < 5; ++x) {
tmp = C[(x + 4) % 5] ^ rotate(C[(x + 1) % 5], 1ul);
for (y = 0; y < 5; ++y) {
state[x + y * 5] ^= tmp;
}
}
current = state[1];
for (t = 0; t < 24; ++t) {
tmp = state[pos[t]];
state[pos[t]] = rotate(current, rot[t]);
current = tmp;
}
for (y = 0; y < 25; y += 5) {
for (x = 0; x < 5; ++x)
C[x] = state[y + x];
for (x = 0; x < 5; ++x) {
state[x + y] = C[x] ^ (~C[(x + 1) % 5] & C[(x + 2) % 5]);
}
}
state[0] ^= RC[r];
}


// check difficulty
ulong swap = swap_endian_64(state[0]);
if (swap < difficulty) {
output_1[0] = nonce_start + get_global_id(0) + i * get_global_size(0);
output_1[1] = swap;
}
else {
// check difficulty
ulong swap = swap_endian_64(state[0]);
if (swap < difficulty) {
if (output_1[1] == 0 || output_1[1] > swap) {
output_1[1] = swap;
output_1[0] = nonce_start + get_global_id(0) + i * get_global_size(0);
output_1[1] = swap;
}
// if (output_1[1] < nonce_start+ get_global_id(0)) {
// output_1[1] = nonce_start + get_global_id(0);
// }
}
} else {
if (output_1[1] == 0 || output_1[1] > swap) {
// output_1[0] = nonce_start + get_global_id(0) + i *
// get_global_size(0);
output_1[1] = swap;
}
// if (output_1[1] < nonce_start+ get_global_id(0)) {
// output_1[1] = nonce_start + get_global_id(0);
// }
}

//output_1[0] = difficulty;
// output_1[0] = nonce_start + get_global_id(0) ;
// output_1[0] = difficulty;
// output_1[0] = nonce_start + get_global_id(0) ;
// output_1[0] = 1;
}



}

// // Compare difficulty
// bool le = true;
Expand Down Expand Up @@ -225,7 +214,8 @@ for (uint i = 0;i< num_rounds; i++) {
// // n -= d
// int r = 0;
// for (int j = 31; j >= 0; --j) {
// // There is no temporary overflow, because in OpenCL uchar + uchar is
// // There is no temporary overflow, because in OpenCL uchar + uchar
// is
// // ulong (not really sure, but it's bigger than uchar)
// if (n[j] < output_buffer[j] + r) {
// n[j] = n[j] - r - output_buffer[j];
Expand Down

0 comments on commit 1ea0c0a

Please sign in to comment.