diff --git a/CMakeLists.txt b/CMakeLists.txt index a29ee1f..a69eaa5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -492,7 +492,8 @@ if (WITH_HIP) # target_link_libraries(${PROJECT_NAME}-${PROJECT_VERSION} ${CMAKE_BINARY_DIR}/tnn_hip.lib) # target_link_libraries(${PROJECT_NAME}-${PROJECT_VERSION} ${CMAKE_BINARY_DIR}/libtnn_hip.a) else() - set(TNN_OUTPUT_NAME "tnn-miner-${INTERNAL_TNN_VERSION}_cpu") + # set(TNN_OUTPUT_NAME "tnn-miner-${INTERNAL_TNN_VERSION}_cpu") + set(TNN_OUTPUT_NAME "tnn-miner-cpu") add_executable(${TNN_OUTPUT_NAME} ${SOURCES_CORE} ${SOURCES_NET} ${SOURCES_CRYPTO} ${HEADERS_CRYPTO}) setup_target_libraries(${TNN_OUTPUT_NAME}) @@ -504,4 +505,6 @@ else() # Set output name set_target_properties(${TNN_OUTPUT_NAME} PROPERTIES OUTPUT_NAME ${TNN_OUTPUT_NAME}) -endif() \ No newline at end of file +endif() + +unset(HIP_LANG CACHE) \ No newline at end of file diff --git a/cmake/hip-crypto/hip-crypto.cmake b/cmake/hip-crypto/hip-crypto.cmake index b753f81..46edfe6 100644 --- a/cmake/hip-crypto/hip-crypto.cmake +++ b/cmake/hip-crypto/hip-crypto.cmake @@ -41,13 +41,15 @@ if (WITH_HIP) # DESTINATION ${CMAKE_INSTALL_PREFIX} # ) - if (CMAKE_HIP_PLATFORM MATCHES nvidia OR CMAKE_HIP_PLATFORM MATCHES nvcc) + if (HIP_PLATFORM MATCHES nvidia OR HIP_PLATFORM MATCHES nvcc) # set(TNN_RDC "-rdc=false") else() set(TNN_RDC "-fno-gpu-rdc") endif() set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} ${TNN_RDC}") + unset(TNN_RDC CACHE) + list(APPEND SOURCES_CRYPTO ${hipSources} ) diff --git a/include/hex.h b/include/hex.h index a2af615..3994022 100644 --- a/include/hex.h +++ b/include/hex.h @@ -10,7 +10,6 @@ // #include // #include -// __host__ __forceinline__ CUDA hints inline std::string hexStr(const unsigned char *data, int len) { static const char characters[] = "0123456789abcdef"; @@ -38,7 +37,6 @@ inline std::string hexStr(const unsigned char *data, int len) // return result; // } -// __host__ __device__ __forceinline__ CUDA hints inline void hexstrToBytes(std::string s, unsigned char *b) { for (unsigned int i = 0; i < s.length(); i += 2) diff --git a/include/terminal.h b/include/terminal.h index 403f0c3..49e184a 100644 --- a/include/terminal.h +++ b/include/terminal.h @@ -177,12 +177,10 @@ inline po::options_description get_prog_opts() ("test-randomx", "Run Tevador's reference RandomX tests") ; - // po::options_description astrix("Astrix", col_width); - // astrix.add_options() - // ("astrix", "Required for mining to Xatum pools on Xelis") - // ("xelis-bench", "Run a benchmark of xelis-hash with 1 thread") - // ("test-xelis", "Run the xelis-hash tests from the official source code") - // ; + po::options_description astrix("Astrix", col_width); + astrix.add_options() + ("test-astrix", "Run a basic astrix-hash validation test") + ; po::options_description advanced("Advanced", col_width); advanced.add_options() @@ -203,6 +201,7 @@ inline po::options_description get_prog_opts() general.add(spectre); general.add(xelis); general.add(randomX); + general.add(astrix); general.add(advanced); general.add(debug); return general; diff --git a/include/tnn-common.hpp b/include/tnn-common.hpp index 17a814a..40be118 100644 --- a/include/tnn-common.hpp +++ b/include/tnn-common.hpp @@ -66,6 +66,12 @@ extern int64_t difficultyDev; extern double doubleDiff; extern double doubleDiffDev; +extern std::string HIP_names[32]; +extern std::vector> HIP_counters; +extern std::vector> HIP_rates5min; +extern std::vector> HIP_rates1min; +extern std::vector> HIP_rates30sec; + extern std::vector rate5min; extern std::vector rate1min; extern std::vector rate30sec; diff --git a/src/coins/mine_astrix.cpp b/src/coins/mine_astrix.cpp index daa57f6..7f5ccab 100644 --- a/src/coins/mine_astrix.cpp +++ b/src/coins/mine_astrix.cpp @@ -5,286 +5,283 @@ void mineAstrix(int tid) { - for(;;) { - boost::this_thread::yield(); - } -// int64_t localJobCounter; -// int64_t localOurHeight = 0; -// int64_t localDevHeight = 0; + int64_t localJobCounter; + int64_t localOurHeight = 0; + int64_t localDevHeight = 0; -// uint64_t i = 0; -// uint64_t i_dev = 0; + uint64_t i = 0; + uint64_t i_dev = 0; -// byte powHash[32]; -// byte work[AstrixHash::INPUT_SIZE] = {0}; -// byte devWork[AstrixHash::INPUT_SIZE] = {0}; + byte powHash[32]; + byte work[AstrixHash::INPUT_SIZE] = {0}; + byte devWork[AstrixHash::INPUT_SIZE] = {0}; -// AstrixHash::worker *worker = (AstrixHash::worker *)malloc_huge_pages(sizeof(AstrixHash::worker)); -// AstrixHash::worker *devWorker = (AstrixHash::worker *)malloc_huge_pages(sizeof(AstrixHash::worker)); + AstrixHash::worker *worker = (AstrixHash::worker *)malloc_huge_pages(sizeof(AstrixHash::worker)); + AstrixHash::worker *devWorker = (AstrixHash::worker *)malloc_huge_pages(sizeof(AstrixHash::worker)); -// waitForJob: +waitForJob: -// while (!isConnected) -// { -// boost::this_thread::sleep_for(boost::chrono::milliseconds(100)); -// } + while (!isConnected) + { + boost::this_thread::sleep_for(boost::chrono::milliseconds(100)); + } -// while (true) -// { -// try -// { -// bool assigned = false; -// boost::json::value myJob; -// boost::json::value myJobDev; -// { -// std::scoped_lock lockGuard(mutex); -// myJob = job; -// myJobDev = devJob; -// localJobCounter = jobCounter; -// } + while (true) + { + try + { + bool assigned = false; + boost::json::value myJob; + boost::json::value myJobDev; + { + std::scoped_lock lockGuard(mutex); + myJob = job; + myJobDev = devJob; + localJobCounter = jobCounter; + } -// if (!myJob.at("template").is_string()) { -// continue; -// } -// if (ourHeight == 0 && devHeight == 0) -// continue; + if (!myJob.at("template").is_string()) { + continue; + } + if (ourHeight == 0 && devHeight == 0) + continue; -// if (ourHeight == 0 || localOurHeight != ourHeight) -// { -// byte *b2 = new byte[AstrixHash::INPUT_SIZE]; -// switch (protocol) -// { -// case SPECTRE_SOLO: -// hexstrToBytes(std::string(myJob.at("template").as_string()), b2); -// break; -// case SPECTRE_STRATUM: -// hexstrToBytes(std::string(myJob.at("template").as_string()), b2); -// break; -// } -// memcpy(work, b2, AstrixHash::INPUT_SIZE); -// AstrixHash::newMatrix(work, worker->matBuffer, *worker); -// // AstrixHash::genPrePowHash(b2, *worker);/ -// // AstrixHash::newMatrix(b2, worker->mat); -// delete[] b2; -// localOurHeight = ourHeight; -// i = 0; -// } + if (ourHeight == 0 || localOurHeight != ourHeight) + { + byte *b2 = new byte[AstrixHash::INPUT_SIZE]; + switch (protocol) + { + case ASTRIX_SOLO: + hexstrToBytes(std::string(myJob.at("template").as_string()), b2); + break; + case ASTRIX_STRATUM: + hexstrToBytes(std::string(myJob.at("template").as_string()), b2); + break; + } + memcpy(work, b2, AstrixHash::INPUT_SIZE); + AstrixHash::newMatrix(work, worker->matBuffer, *worker); + // AstrixHash::genPrePowHash(b2, *worker);/ + // AstrixHash::newMatrix(b2, worker->mat); + delete[] b2; + localOurHeight = ourHeight; + i = 0; + } -// if (devConnected && myJobDev.at("template").is_string()) -// { -// if (devHeight == 0 || localDevHeight != devHeight) -// { -// byte *b2d = new byte[AstrixHash::INPUT_SIZE]; -// switch (protocol) -// { -// case SPECTRE_SOLO: -// hexstrToBytes(std::string(myJobDev.at("template").as_string()), b2d); -// break; -// case SPECTRE_STRATUM: -// hexstrToBytes(std::string(myJobDev.at("template").as_string()), b2d); -// break; -// } -// memcpy(devWork, b2d, AstrixHash::INPUT_SIZE); -// AstrixHash::newMatrix(devWork, devWorker->matBuffer, *devWorker); -// // AstrixHash::genPrePowHash(b2d, *devWorker); -// // AstrixHash::newMatrix(b2d, devWorker->mat); -// delete[] b2d; -// localDevHeight = devHeight; -// i_dev = 0; -// } -// } + if (devConnected && myJobDev.at("template").is_string()) + { + if (devHeight == 0 || localDevHeight != devHeight) + { + byte *b2d = new byte[AstrixHash::INPUT_SIZE]; + switch (protocol) + { + case ASTRIX_SOLO: + hexstrToBytes(std::string(myJobDev.at("template").as_string()), b2d); + break; + case ASTRIX_STRATUM: + hexstrToBytes(std::string(myJobDev.at("template").as_string()), b2d); + break; + } + memcpy(devWork, b2d, AstrixHash::INPUT_SIZE); + AstrixHash::newMatrix(devWork, devWorker->matBuffer, *devWorker); + // AstrixHash::genPrePowHash(b2d, *devWorker); + // AstrixHash::newMatrix(b2d, devWorker->mat); + delete[] b2d; + localDevHeight = devHeight; + i_dev = 0; + } + } -// bool devMine = false; -// double which; -// bool submit = false; -// double DIFF = 1; -// Num cmpDiff; + bool devMine = false; + double which; + bool submit = false; + double DIFF = 1; + Num cmpDiff; -// // printf("end of job application\n"); -// while (localJobCounter == jobCounter) -// { -// which = (double)(rand() % 10000); -// devMine = (devConnected && devHeight > 0 && which < devFee * 100.0); -// DIFF = devMine ? doubleDiffDev : doubleDiff; -// if (DIFF == 0) -// continue; + // printf("end of job application\n"); + while (localJobCounter == jobCounter) + { + which = (double)(rand() % 10000); + devMine = (devConnected && devHeight > 0 && which < devFee * 100.0); + DIFF = devMine ? doubleDiffDev : doubleDiff; + if (DIFF == 0) + continue; -// // cmpDiff = ConvertDifficultyToBig(DIFF, SPECTRE_X); -// cmpDiff = AstrixHash::diffToTarget(DIFF); + // cmpDiff = ConvertDifficultyToBig(DIFF, ASTRIX_X); + cmpDiff = AstrixHash::diffToTarget(DIFF); -// uint64_t *nonce = devMine ? &i_dev : &i; -// (*nonce)++; + uint64_t *nonce = devMine ? &i_dev : &i; + (*nonce)++; -// // printf("nonce = %llu\n", *nonce); + // printf("nonce = %llu\n", *nonce); -// byte *WORK = (devMine && devConnected) ? &devWork[0] : &work[0]; -// byte *nonceBytes = &WORK[72]; -// uint64_t n; + byte *WORK = (devMine && devConnected) ? &devWork[0] : &work[0]; + byte *nonceBytes = &WORK[72]; + uint64_t n; -// int enLen = 0; + int enLen = 0; -// boost::json::value &J = devMine ? myJobDev : myJob; -// if (!J.as_object().if_contains("extraNonce") || J.at("extraNonce").as_string().size() == 0) -// n = ((tid - 1) % (256 * 256)) | ((rand() % 256) << 16) | ((*nonce) << 24); -// else { -// uint64_t eN = std::stoull(std::string(J.at("extraNonce").as_string().c_str()), NULL, 16); -// enLen = std::string(J.at("extraNonce").as_string()).size()/2; -// int offset = (64 - enLen*8); -// n = ((tid - 1) % (256 * 256)) | (((*nonce) << 16) & ((1ULL << offset)-1)) | (eN << offset); -// } -// memcpy(nonceBytes, (byte *)&n, 8); + boost::json::value &J = devMine ? myJobDev : myJob; + if (!J.as_object().if_contains("extraNonce") || J.at("extraNonce").as_string().size() == 0) + n = ((tid - 1) % (256 * 256)) | ((rand() % 256) << 16) | ((*nonce) << 24); + else { + uint64_t eN = std::stoull(std::string(J.at("extraNonce").as_string().c_str()), NULL, 16); + enLen = std::string(J.at("extraNonce").as_string()).size()/2; + int offset = (64 - enLen*8); + n = ((tid - 1) % (256 * 256)) | (((*nonce) << 16) & ((1ULL << offset)-1)) | (eN << offset); + } + memcpy(nonceBytes, (byte *)&n, 8); -// // printf("after nonce: %s\n", hexStr(WORK, AstrixHash::INPUT_SIZE).c_str()); + // printf("after nonce: %s\n", hexStr(WORK, AstrixHash::INPUT_SIZE).c_str()); -// if (localJobCounter != jobCounter) { -// // printf("thread %d updating job before hash\n", tid); -// break; -// } + if (localJobCounter != jobCounter) { + // printf("thread %d updating job before hash\n", tid); + break; + } -// AstrixHash::worker &usedWorker = devMine ? *devWorker : *worker; -// AstrixHash::hash(usedWorker, WORK, AstrixHash::INPUT_SIZE, powHash); + AstrixHash::worker &usedWorker = devMine ? *devWorker : *worker; + AstrixHash::hash(usedWorker, WORK, AstrixHash::INPUT_SIZE, powHash); -// // if (littleEndian()) -// // { -// // std::reverse(powHash, powHash + 32); -// // } + if (littleEndian()) + { + std::reverse(usedWorker.scratchData, usedWorker.scratchData + 32); + } -// counter.fetch_add(1); -// submit = (devMine && devConnected) ? !submittingDev : !submitting; + counter.fetch_add(1); + submit = (devMine && devConnected) ? !submittingDev : !submitting; -// if (localJobCounter != jobCounter || localOurHeight != ourHeight) { -// // printf("thread %d updating job after hash\n", tid); -// break; -// } + if (localJobCounter != jobCounter || localOurHeight != ourHeight) { + // printf("thread %d updating job after hash\n", tid); + break; + } -// if (Num(hexStr(powHash, 32).c_str(), 16) <= cmpDiff) -// { -// // printf("thread %d entered submission process\n", tid); -// if (!submit) { -// for(;;) { -// submit = (devMine && devConnected) ? !submittingDev : !submitting; -// if (submit || localJobCounter != jobCounter || localOurHeight != ourHeight) -// break; -// boost::this_thread::yield(); -// } -// } -// if (localJobCounter != jobCounter) { -// // printf("thread %d updating job after check\n", tid); -// break; -// } -// // if (littleEndian()) -// // { -// // std::reverse(powHash, powHash + 32); -// // } -// // std::string b64 = base64::to_base64(std::string((char *)&WORK[0], XELIS_TEMPLATE_SIZE)); -// // boost::lock_guard lock(mutex); -// if (devMine) -// { -// submittingDev = true; -// // std::scoped_lock lockGuard(devMutex); -// // if (localJobCounter != jobCounter || localDevHeight != devHeight) -// // { -// // break; -// // } -// setcolor(CYAN); -// std::cout << "\n(DEV) Thread " << tid << " found a dev share\n" << std::flush; -// setcolor(BRIGHT_WHITE); -// switch (protocol) -// { -// case SPECTRE_SOLO: -// devShare = {{"block_template", hexStr(&WORK[0], AstrixHash::INPUT_SIZE).c_str()}}; -// break; -// case SPECTRE_STRATUM: -// std::vector nonceStr; -// // Num(std::to_string((n << enLen*8) >> enLen*8).c_str(),10).print(nonceStr, 16); -// Num(std::to_string(n).c_str(),10).print(nonceStr, 16); -// devShare = {{{"id", SpectreStratum::submitID}, -// {"method", SpectreStratum::submit.method.c_str()}, -// {"params", {devWorkerName, // WORKER -// myJobDev.at("jobId").as_string().c_str(), // JOB ID -// std::string(nonceStr.data()).c_str()}}}}; + if (Num(hexStr(usedWorker.scratchData, 32).c_str(), 16) <= cmpDiff) + { + // printf("thread %d entered submission process\n", tid); + if (!submit) { + for(;;) { + submit = (devMine && devConnected) ? !submittingDev : !submitting; + if (submit || localJobCounter != jobCounter || localOurHeight != ourHeight) + break; + boost::this_thread::yield(); + } + } + if (localJobCounter != jobCounter) { + // printf("thread %d updating job after check\n", tid); + break; + } + // if (littleEndian()) + // { + // std::reverse(powHash, powHash + 32); + // } + // std::string b64 = base64::to_base64(std::string((char *)&WORK[0], XELIS_TEMPLATE_SIZE)); + // boost::lock_guard lock(mutex); + if (devMine) + { + submittingDev = true; + // std::scoped_lock lockGuard(devMutex); + // if (localJobCounter != jobCounter || localDevHeight != devHeight) + // { + // break; + // } + setcolor(CYAN); + std::cout << "\n(DEV) Thread " << tid << " found a dev share\n" << std::flush; + setcolor(BRIGHT_WHITE); + switch (protocol) + { + case ASTRIX_SOLO: + devShare = {{"block_template", hexStr(&WORK[0], AstrixHash::INPUT_SIZE).c_str()}}; + break; + case ASTRIX_STRATUM: + std::vector nonceStr; + // Num(std::to_string((n << enLen*8) >> enLen*8).c_str(),10).print(nonceStr, 16); + Num(std::to_string(n).c_str(),10).print(nonceStr, 16); + devShare = {{{"id", SpectreStratum::submitID}, + {"method", SpectreStratum::submit.method.c_str()}, + {"params", {devWorkerName, // WORKER + myJobDev.at("jobId").as_string().c_str(), // JOB ID + std::string(nonceStr.data()).c_str()}}}}; -// break; -// } -// data_ready = true; -// } -// else -// { -// submitting = true; -// // std::scoped_lock lockGuard(userMutex); -// // if (localJobCounter != jobCounter || localOurHeight != ourHeight) -// // { -// // break; -// // } -// setcolor(BRIGHT_YELLOW); -// std::cout << "\nThread " << tid << " found a nonce!\n" << std::flush; -// setcolor(BRIGHT_WHITE); -// switch (protocol) -// { -// case SPECTRE_SOLO: -// share = {{"block_template", hexStr(&WORK[0], AstrixHash::INPUT_SIZE).c_str()}}; -// break; -// case SPECTRE_STRATUM: -// std::vector nonceStr; -// // Num(std::to_string((n << enLen*8) >> enLen*8).c_str(),10).print(nonceStr, 16); -// Num(std::to_string(n).c_str(),10).print(nonceStr, 16); -// share = {{{"id", SpectreStratum::submitID}, -// {"method", SpectreStratum::submit.method.c_str()}, -// {"params", {workerName, // WORKER -// myJob.at("jobId").as_string().c_str(), // JOB ID -// std::string(nonceStr.data()).c_str()}}}}; + break; + } + data_ready = true; + } + else + { + submitting = true; + // std::scoped_lock lockGuard(userMutex); + // if (localJobCounter != jobCounter || localOurHeight != ourHeight) + // { + // break; + // } + setcolor(BRIGHT_YELLOW); + std::cout << "\nThread " << tid << " found a nonce!\n" << std::flush; + setcolor(BRIGHT_WHITE); + switch (protocol) + { + case ASTRIX_SOLO: + share = {{"block_template", hexStr(&WORK[0], AstrixHash::INPUT_SIZE).c_str()}}; + break; + case ASTRIX_STRATUM: + std::vector nonceStr; + // Num(std::to_string((n << enLen*8) >> enLen*8).c_str(),10).print(nonceStr, 16); + Num(std::to_string(n).c_str(),10).print(nonceStr, 16); + share = {{{"id", SpectreStratum::submitID}, + {"method", SpectreStratum::submit.method.c_str()}, + {"params", {workerName, // WORKER + myJob.at("jobId").as_string().c_str(), // JOB ID + std::string(nonceStr.data()).c_str()}}}}; -// // std::cout << "blob: " << hexStr(&WORK[0], AstrixHash::INPUT_SIZE).c_str() << std::endl; -// // std::cout << "nonce: " << nonceStr.data() << std::endl; -// // std::cout << "extraNonce: " << hexStr(&WORK[AstrixHash::INPUT_SIZE - 48], enLen).c_str() << std::endl; -// // std::cout << "hash: " << hexStr(&powHash[0], 32) << std::endl; -// // std::vector diffHex; -// // cmpDiff.print(diffHex, 16); -// // std::cout << "difficulty (LE): " << std::string(diffHex.data()).c_str() << std::endl; -// // std::cout << "powValue: " << Num(hexStr(powHash, 32).c_str(), 16) << std::endl; -// // std::cout << "target (decimal): " << cmpDiff << std::endl; + // std::cout << "blob: " << hexStr(&WORK[0], AstrixHash::INPUT_SIZE).c_str() << std::endl; + // std::cout << "nonce: " << nonceStr.data() << std::endl; + // std::cout << "extraNonce: " << hexStr(&WORK[AstrixHash::INPUT_SIZE - 48], enLen).c_str() << std::endl; + // std::cout << "hash: " << hexStr(&powHash[0], 32) << std::endl; + // std::vector diffHex; + // cmpDiff.print(diffHex, 16); + // std::cout << "difficulty (LE): " << std::string(diffHex.data()).c_str() << std::endl; + // std::cout << "powValue: " << Num(hexStr(powHash, 32).c_str(), 16) << std::endl; + // std::cout << "target (decimal): " << cmpDiff << std::endl; -// // printf("blob: %s\n", foundBlob.c_str()); -// // printf("hash (BE): %s\n", hexStr(&powHash[0], 32).c_str()); -// // printf("nonce (Full bytes for injection): %s\n", hexStr((byte *)&n, 8).c_str()); + // printf("blob: %s\n", foundBlob.c_str()); + // printf("hash (BE): %s\n", hexStr(&powHash[0], 32).c_str()); + // printf("nonce (Full bytes for injection): %s\n", hexStr((byte *)&n, 8).c_str()); -// break; -// } -// data_ready = true; -// } -// // printf("thread %d finished submission process\n", tid); -// cv.notify_all(); -// } + break; + } + data_ready = true; + } + // printf("thread %d finished submission process\n", tid); + cv.notify_all(); + } -// if (!isConnected) { -// data_ready = true; -// cv.notify_all(); -// break; -// } -// } -// if (!isConnected) { -// data_ready = true; -// cv.notify_all(); -// break; -// } -// } -// catch (std::exception& e) -// { -// setcolor(RED); -// std::cerr << "Error in POW Function" << std::endl; -// std::cerr << e.what() << std::endl; -// setcolor(BRIGHT_WHITE); + if (!isConnected) { + data_ready = true; + cv.notify_all(); + break; + } + } + if (!isConnected) { + data_ready = true; + cv.notify_all(); + break; + } + } + catch (std::exception& e) + { + setcolor(RED); + std::cerr << "Error in POW Function" << std::endl; + std::cerr << e.what() << std::endl; + setcolor(BRIGHT_WHITE); -// localJobCounter = -1; -// localOurHeight = -1; -// localDevHeight = -1; -// } -// if (!isConnected) { -// data_ready = true; -// cv.notify_all(); -// break; -// } -// } -// goto waitForJob; + localJobCounter = -1; + localOurHeight = -1; + localDevHeight = -1; + } + if (!isConnected) { + data_ready = true; + cv.notify_all(); + break; + } + } + goto waitForJob; } diff --git a/src/coins/mine_spectre.cpp b/src/coins/mine_spectre.cpp index 095a39a..3299782 100644 --- a/src/coins/mine_spectre.cpp +++ b/src/coins/mine_spectre.cpp @@ -166,7 +166,7 @@ void mineSpectre(int tid) } - if (Num(hexStr(powHash, 32).c_str(), 16) <= cmpDiff) + if (Num(hexStr(usedWorker.scratchData, 32).c_str(), 16) <= cmpDiff) { // printf("thread %d entered submission process\n", tid); if (!submit) { diff --git a/src/core/gpulibs.h b/src/core/gpulibs.h index f5843cf..2bdb62b 100644 --- a/src/core/gpulibs.h +++ b/src/core/gpulibs.h @@ -10,9 +10,8 @@ inline int GPUTest() { if (is_hip_supported()) { helloTest(); benchAstrixHip(); + AstrixHash::test(); } #endif - - AstrixHash::hipCompare(); return 0; } \ No newline at end of file diff --git a/src/core/miner.cpp b/src/core/miner.cpp index 5fd6ab0..d36c4b3 100644 --- a/src/core/miner.cpp +++ b/src/core/miner.cpp @@ -58,6 +58,13 @@ int miningAlgo = DERO_HASH; int reportCounter = 0; int reportInterval = 3; + +std::string HIP_names[32]; +std::vector> HIP_counters(32); +std::vector> HIP_rates5min(32); +std::vector> HIP_rates1min(32); +std::vector> HIP_rates30sec(32); + std::atomic counter = 0; std::atomic benchCounter = 0; boost::asio::io_context my_context; @@ -200,6 +207,9 @@ int main(int argc, char **argv) // test_cshake256(); GPUTest(); + #ifdef TNN_HIP + return 0; + #endif std::atexit(onExit); signal(SIGTERM, sigterm); @@ -320,7 +330,7 @@ int main(int argc, char **argv) { #if defined(TNN_ASTRIXHASH) symbol = "AIX"; - protocol = ASTRIX_SOLO; + protocol = ASTRIX_STRATUM; #else setcolor(RED); printf(unsupported_astrix); @@ -392,6 +402,19 @@ int main(int argc, char **argv) #endif } + if (vm.count("test-astrix")) + { + #if defined(TNN_RANDOMX) + return AstrixHash::test(); + #else + setcolor(RED); + printf(unsupported_astrix); + fflush(stdout); + setcolor(BRIGHT_WHITE); + return 1; + #endif + } + if (vm.count("xelis-bench")) { #if defined(TNN_XELISHASH) @@ -479,7 +502,7 @@ int main(int argc, char **argv) } if(wallet.find("astrix", 0) != std::string::npos) { symbol = "AIX"; - protocol = ASTRIX_SOLO; + protocol = ASTRIX_STRATUM; } if(wallet.find("ZEPHYR", 0) != std::string::npos) { symbol = "ZEPH"; diff --git a/src/core/miner.h b/src/core/miner.h index 64d1df2..e3a25d9 100644 --- a/src/core/miner.h +++ b/src/core/miner.h @@ -68,7 +68,7 @@ std::string defaultHost[] = { "monerohash.com", "", // verus "", - "34150" // aix + "127.0.0.1" // aix }; std::string devPort[] = { diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp index 3ea3332..f9d2dd9 100644 --- a/src/core/reporter.cpp +++ b/src/core/reporter.cpp @@ -85,6 +85,12 @@ int update_handler(const boost::system::error_code& error) case RX0: dPrint = difficulty; break; + case VERUSHASH: + dPrint = difficulty; + break; + case ASTRIX_HASH: + dPrint = doubleDiff; + break; } std::cout << std::setw(2) << "ACCEPTED " << accepted << std::setw(2) << " | REJECTED " << rejected diff --git a/src/crypto/astrix-hash/astrix-hash.cpp b/src/crypto/astrix-hash/astrix-hash.cpp index f640007..ae1f91b 100644 --- a/src/crypto/astrix-hash/astrix-hash.cpp +++ b/src/crypto/astrix-hash/astrix-hash.cpp @@ -4,26 +4,34 @@ #include #include -extern "C"{ -#include -#include -} +#include + +#include namespace AstrixHash { + constexpr int HALF_MATRIX_SIZE = 64/2; + constexpr int QUARTER_MATRIX_SIZE = 64/4; + const char *pwHashDomain = "ProofOfWorkHash"; const char *heavyHashDomain = "HeavyHash"; const Num trueMax("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", 16); - const Num maxTarget("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", 16); + const Num maxTarget("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", 16); const Num minHash = (Num(1) << 256) / maxTarget; - const Num bigKilo(1e3); + const Num bigGig(1e9); + + static const uint8_t powP[Plen] = {0x3d, 0xd8, 0xf6, 0xa1, 0x0d, 0xff, 0x3c, 0x11, 0x3c, 0x7e, 0x02, 0xb7, 0x55, 0x88, 0xbf, 0x29, 0xd2, 0x44, 0xfb, 0x0e, 0x72, 0x2e, 0x5f, 0x1e, 0xa0, 0x69, 0x98, 0xf5, 0xa3, 0xa4, 0xa5, 0x1b, 0x65, 0x2d, 0x5e, 0x87, 0xca, 0xaf, 0x2f, 0x7b, 0x46, 0xe2, 0xdc, 0x29, 0xd6, 0x61, 0xef, 0x4a, 0x10, 0x5b, 0x41, 0xad, 0x1e, 0x98, 0x3a, 0x18, 0x9c, 0xc2, 0x9b, 0x78, 0x0c, 0xf6, 0x6b, 0x77, 0x40, 0x31, 0x66, 0x88, 0x33, 0xf1, 0xeb, 0xf8, 0xf0, 0x5f, 0x28, 0x43, 0x3c, 0x1c, 0x65, 0x2e, 0x0a, 0x4a, 0xf1, 0x40, 0x05, 0x07, 0x96, 0x0f, 0x52, 0x91, 0x29, 0x5b, 0x87, 0x67, 0xe3, 0x44, 0x15, 0x37, 0xb1, 0x25, 0xa4, 0xf1, 0x70, 0xec, 0x89, 0xda, 0xe9, 0x82, 0x8f, 0x5d, 0xc8, 0xe6, 0x23, 0xb2, 0xb4, 0x85, 0x1f, 0x60, 0x1a, 0xb2, 0x46, 0x6a, 0xa3, 0x64, 0x90, 0x54, 0x85, 0x34, 0x1a, 0x85, 0x2f, 0x7a, 0x1c, 0xdd, 0x06, 0x0f, 0x42, 0xb1, 0x3b, 0x56, 0x1d, 0x02, 0xa2, 0xc1, 0xe4, 0x68, 0x16, 0x45, 0xe4, 0xe5, 0x1d, 0xba, 0x8d, 0x5f, 0x09, 0x05, 0x41, 0x57, 0x02, 0xd1, 0x4a, 0xcf, 0xce, 0x9b, 0x84, 0x4e, 0xca, 0x89, 0xdb, 0x2e, 0x74, 0xa8, 0x27, 0x94, 0xb0, 0x48, 0x72, 0x52, 0x8b, 0xe7, 0x9c, 0xce, 0xfc, 0xb1, 0xbc, 0xa5, 0xaf, 0x82, 0xcf, 0x29, 0x11, 0x5d, 0x83, 0x43, 0x82, 0x6f, 0x78, 0x7c, 0xb9, 0x02}; + static const uint8_t heavyP[Plen] = {0x09, 0x85, 0x24, 0xb2, 0x52, 0x4c, 0xd7, 0x3a, 0x16, 0x42, 0x9f, 0x2f, 0x0e, 0x9b, 0x62, 0x79, 0xee, 0xf8, 0xc7, 0x16, 0x48, 0xff, 0x14, 0x7a, 0x98, 0x64, 0x05, 0x80, 0x4c, 0x5f, 0xa7, 0x11, 0xda, 0xce, 0xee, 0x44, 0xdf, 0xe0, 0x20, 0xe7, 0x69, 0x40, 0xf3, 0x14, 0x2e, 0xd8, 0xc7, 0x72, 0xba, 0x35, 0x89, 0x93, 0x2a, 0xff, 0x00, 0xc1, 0x62, 0xc4, 0x0f, 0x25, 0x40, 0x90, 0x21, 0x5e, 0x48, 0x6a, 0xcf, 0x0d, 0xa6, 0xf9, 0x39, 0x80, 0x0c, 0x3d, 0x2a, 0x79, 0x9f, 0xaa, 0xbc, 0xa0, 0x26, 0xa2, 0xa9, 0xd0, 0x5d, 0xc0, 0x31, 0xf4, 0x3f, 0x8c, 0xc1, 0x54, 0xc3, 0x4c, 0x1f, 0xd3, 0x3d, 0xcc, 0x69, 0xa7, 0x01, 0x7d, 0x6b, 0x6c, 0xe4, 0x93, 0x24, 0x56, 0xd3, 0x5b, 0xc6, 0x2e, 0x44, 0xb0, 0xcd, 0x99, 0x3a, 0x4b, 0xf7, 0x4e, 0xb0, 0xf2, 0x34, 0x54, 0x83, 0x86, 0x4c, 0x77, 0x16, 0x94, 0xbc, 0x36, 0xb0, 0x61, 0xe9, 0x07, 0x07, 0xcc, 0x65, 0x77, 0xb1, 0x1d, 0x8f, 0x7e, 0x39, 0x6d, 0xc4, 0xba, 0x80, 0xdb, 0x8f, 0xea, 0x58, 0xca, 0x34, 0x7b, 0xd3, 0xf2, 0x92, 0xb9, 0x57, 0xb9, 0x81, 0x84, 0x04, 0xc5, 0x76, 0xc7, 0x2e, 0xc2, 0x12, 0x51, 0x67, 0x9f, 0xc3, 0x47, 0x0a, 0x0c, 0x29, 0xb5, 0x9d, 0x39, 0xbb, 0x92, 0x15, 0xc6, 0x9f, 0x2f, 0x31, 0xe0, 0x9a, 0x54, 0x35, 0xda, 0xb9, 0x10, 0x7d, 0x32, 0x19, 0x16}; bool checkPow(Num in, Num diff) { if (trueMax >> (diff.to_double()) < in) return false; return true; } + #define make_uchar4(a,b,c,d) \ + (((uint32_t)(a << 24) | (uint32_t)(b << 16) | (uint16_t)(c << 8) | d)) + Num diffToTarget(double diff) { // Create a Num object representing the difficulty @@ -38,24 +46,45 @@ namespace AstrixHash // Calculate the target by dividing maxTarget by difficulty Num hv = Num::mul(diff, minHash); - Num target = Num::div(hv, bigKilo); + Num target = Num::div(hv, bigGig); return target; } - void heavyHash(byte *hash, matrix &mat, byte *out) + static inline void amul4bit(uint32_t packed_vec1[32], uint32_t packed_vec2[32], uint32_t *ret) + { + unsigned int res = 0; + + // Loop through each 32-bit element (containing four 4-bit values) + for (int i = 0; i < QUARTER_MATRIX_SIZE; i++) + { + // Extract 4-bit values from packed_vec1 and packed_vec2 + for (int j = 0; j < 8; j++) + { + uint8_t val1 = (packed_vec1[i] >> (4 * j)) & 0xF; // Extract 4 bits + uint8_t val2 = (packed_vec2[i] >> (4 * j)) & 0xF; // Extract 4 bits + res += val1 * val2; // Perform the dot product + } + } + + *ret = res; + } + + void heavyHash(byte *scratch, matrix &mat, byte *out) { std::array v{}, p{}; for (int i = 0; i < matSize / 2; i++) { - v[i * 2] = uint16_t(hash[i] >> 4); - v[i * 2 + 1] = uint16_t(hash[i] & 0x0f); + v[i * 2] = uint16_t(scratch[i] >> 4); + v[i * 2 + 1] = uint16_t(scratch[i] & 0x0f); } // build the product array + #pragma unroll(4) for (int i = 0; i < 64; i++) { uint16_t s = 0; + #pragma unroll(4) for (int j = 0; j < 64; j++) { s += mat[i][j] * v[j]; @@ -66,26 +95,42 @@ namespace AstrixHash // calculate the digest for (size_t i = 0; i < 32; i++) { - out[i] = hash[i] ^ (static_cast(p[i * 2] << 4) | static_cast(p[i * 2 + 1])); + scratch[i] = scratch[i] ^ (static_cast(p[i * 2] << 4) | static_cast(p[i * 2 + 1])); } // hash the digest a final time, reverse bytes + #pragma unroll(5) + for (int i=0; i<4; i++) ((uint64_t *)scratch)[i] = ((uint64_t *)heavyP)[i] ^ ((uint64_t *)scratch)[i]; + + #pragma unroll + for (int i = 4; i < 25; i++) ((uint64_t *)scratch)[i] = ((uint64_t *)heavyP)[i]; + + keccakf(scratch); + // std::reverse(scratch, scratch+32); - cshake256_nil_function_name(out, 32, "HeavyHash", out, 32*8); - // std::reverse(out, out+32); + // memcpy(out, scratch, 32); + } + + static inline void sha3_256_astrix(byte *in, byte *scratch) { + scratch[80] = 0x06; + scratch[135] = 0x80; + + memcpy(scratch, in, 80); + + keccakf(scratch); } void hash(worker &worker, byte *in, int len, byte *out) { + memset(worker.scratchData, 0, 200); // cshake256("ProofOfWorkHash", in, len, worker.sha3Hash, 32); // newMatrix(in, worker.mat, worker); memcpy(worker.mat, worker.matBuffer, sizeof(matrix)); - sha3_256(in, len, worker.sha3Hash); + sha3_256_astrix(in, worker.scratchData); - printf("SHA3 Result: %s\n", hexStr(worker.sha3Hash, 32).c_str()); // cshake256_nil_function_name(in, len, "ProofOfWorkHash", worker.sha3Hash, 32*8); // AstroBWTv3(worker.sha3Hash, 32, worker.astrobwtv3Hash, *worker.astroWorker, false); - heavyHash(worker.sha3Hash, worker.mat, out); + heavyHash(worker.scratchData, worker.mat, out); } void testWithInput(const char* input, byte *out) { @@ -119,8 +164,10 @@ namespace AstrixHash // free(aw); } - int hipCompare() { + int test() { const char* input = "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f40414243444546470000000000000000"; + const char* expected = "40c377567fbc1d33f564493fb670eca26ba02c8a05c4dc61ab3bf86415006276"; + byte in[80]; memset(in, 0, 80); @@ -133,8 +180,8 @@ namespace AstrixHash newMatrix(in, w.matBuffer, w); hash(w, in, 80, out); - printf("CPU Result: %s\n", hexStr(out, 32).c_str()); - return 0; + printf("CPU Astrix Result:\n%s\nWant:\n%s\n", hexStr(w.scratchData, 32).c_str(), expected); + return strcmp(hexStr(w.scratchData, 32).c_str(), expected) == 0; } // int test() { diff --git a/src/crypto/astrix-hash/astrix-hash.h b/src/crypto/astrix-hash/astrix-hash.h index fb9af40..79012d3 100644 --- a/src/crypto/astrix-hash/astrix-hash.h +++ b/src/crypto/astrix-hash/astrix-hash.h @@ -4,6 +4,7 @@ #include #include #include +#include // NOTES @@ -25,6 +26,7 @@ namespace AstrixHash { matrix matBuffer; matrix mat; + uint8_t scratchData[200]; double copied[matSize][matSize]; std::bitset rowsSelected; byte sha3Hash[32]; @@ -152,5 +154,5 @@ namespace AstrixHash bool checkPow(Num in, Num diff); Num diffToTarget(double diff); Num diffToHash(double diff); - int hipCompare(); + int test(); } \ No newline at end of file diff --git a/src/crypto/spectrex/spectrex.cpp b/src/crypto/spectrex/spectrex.cpp index 3a0d3a8..60c86a4 100644 --- a/src/crypto/spectrex/spectrex.cpp +++ b/src/crypto/spectrex/spectrex.cpp @@ -4,9 +4,7 @@ #include #include -extern "C"{ -#include "cshake/cshake.h" -} +#include namespace SpectreX { @@ -23,6 +21,9 @@ namespace SpectreX return true; } + static const uint8_t powP[Plen] = {0x3d, 0xd8, 0xf6, 0xa1, 0x0d, 0xff, 0x3c, 0x11, 0x3c, 0x7e, 0x02, 0xb7, 0x55, 0x88, 0xbf, 0x29, 0xd2, 0x44, 0xfb, 0x0e, 0x72, 0x2e, 0x5f, 0x1e, 0xa0, 0x69, 0x98, 0xf5, 0xa3, 0xa4, 0xa5, 0x1b, 0x65, 0x2d, 0x5e, 0x87, 0xca, 0xaf, 0x2f, 0x7b, 0x46, 0xe2, 0xdc, 0x29, 0xd6, 0x61, 0xef, 0x4a, 0x10, 0x5b, 0x41, 0xad, 0x1e, 0x98, 0x3a, 0x18, 0x9c, 0xc2, 0x9b, 0x78, 0x0c, 0xf6, 0x6b, 0x77, 0x40, 0x31, 0x66, 0x88, 0x33, 0xf1, 0xeb, 0xf8, 0xf0, 0x5f, 0x28, 0x43, 0x3c, 0x1c, 0x65, 0x2e, 0x0a, 0x4a, 0xf1, 0x40, 0x05, 0x07, 0x96, 0x0f, 0x52, 0x91, 0x29, 0x5b, 0x87, 0x67, 0xe3, 0x44, 0x15, 0x37, 0xb1, 0x25, 0xa4, 0xf1, 0x70, 0xec, 0x89, 0xda, 0xe9, 0x82, 0x8f, 0x5d, 0xc8, 0xe6, 0x23, 0xb2, 0xb4, 0x85, 0x1f, 0x60, 0x1a, 0xb2, 0x46, 0x6a, 0xa3, 0x64, 0x90, 0x54, 0x85, 0x34, 0x1a, 0x85, 0x2f, 0x7a, 0x1c, 0xdd, 0x06, 0x0f, 0x42, 0xb1, 0x3b, 0x56, 0x1d, 0x02, 0xa2, 0xc1, 0xe4, 0x68, 0x16, 0x45, 0xe4, 0xe5, 0x1d, 0xba, 0x8d, 0x5f, 0x09, 0x05, 0x41, 0x57, 0x02, 0xd1, 0x4a, 0xcf, 0xce, 0x9b, 0x84, 0x4e, 0xca, 0x89, 0xdb, 0x2e, 0x74, 0xa8, 0x27, 0x94, 0xb0, 0x48, 0x72, 0x52, 0x8b, 0xe7, 0x9c, 0xce, 0xfc, 0xb1, 0xbc, 0xa5, 0xaf, 0x82, 0xcf, 0x29, 0x11, 0x5d, 0x83, 0x43, 0x82, 0x6f, 0x78, 0x7c, 0xb9, 0x02}; + static const uint8_t heavyP[Plen] = {0x09, 0x85, 0x24, 0xb2, 0x52, 0x4c, 0xd7, 0x3a, 0x16, 0x42, 0x9f, 0x2f, 0x0e, 0x9b, 0x62, 0x79, 0xee, 0xf8, 0xc7, 0x16, 0x48, 0xff, 0x14, 0x7a, 0x98, 0x64, 0x05, 0x80, 0x4c, 0x5f, 0xa7, 0x11, 0xda, 0xce, 0xee, 0x44, 0xdf, 0xe0, 0x20, 0xe7, 0x69, 0x40, 0xf3, 0x14, 0x2e, 0xd8, 0xc7, 0x72, 0xba, 0x35, 0x89, 0x93, 0x2a, 0xff, 0x00, 0xc1, 0x62, 0xc4, 0x0f, 0x25, 0x40, 0x90, 0x21, 0x5e, 0x48, 0x6a, 0xcf, 0x0d, 0xa6, 0xf9, 0x39, 0x80, 0x0c, 0x3d, 0x2a, 0x79, 0x9f, 0xaa, 0xbc, 0xa0, 0x26, 0xa2, 0xa9, 0xd0, 0x5d, 0xc0, 0x31, 0xf4, 0x3f, 0x8c, 0xc1, 0x54, 0xc3, 0x4c, 0x1f, 0xd3, 0x3d, 0xcc, 0x69, 0xa7, 0x01, 0x7d, 0x6b, 0x6c, 0xe4, 0x93, 0x24, 0x56, 0xd3, 0x5b, 0xc6, 0x2e, 0x44, 0xb0, 0xcd, 0x99, 0x3a, 0x4b, 0xf7, 0x4e, 0xb0, 0xf2, 0x34, 0x54, 0x83, 0x86, 0x4c, 0x77, 0x16, 0x94, 0xbc, 0x36, 0xb0, 0x61, 0xe9, 0x07, 0x07, 0xcc, 0x65, 0x77, 0xb1, 0x1d, 0x8f, 0x7e, 0x39, 0x6d, 0xc4, 0xba, 0x80, 0xdb, 0x8f, 0xea, 0x58, 0xca, 0x34, 0x7b, 0xd3, 0xf2, 0x92, 0xb9, 0x57, 0xb9, 0x81, 0x84, 0x04, 0xc5, 0x76, 0xc7, 0x2e, 0xc2, 0x12, 0x51, 0x67, 0x9f, 0xc3, 0x47, 0x0a, 0x0c, 0x29, 0xb5, 0x9d, 0x39, 0xbb, 0x92, 0x15, 0xc6, 0x9f, 0x2f, 0x31, 0xe0, 0x9a, 0x54, 0x35, 0xda, 0xb9, 0x10, 0x7d, 0x32, 0x19, 0x16}; + Num diffToTarget(double diff) { // Create a Num object representing the difficulty @@ -42,19 +43,21 @@ namespace SpectreX return target; } - void heavyHash(byte *hash, matrix &mat, byte *out) + void heavyHash(byte *scratch, matrix &mat, byte *out) { std::array v{}, p{}; for (int i = 0; i < matSize / 2; i++) { - v[i * 2] = uint16_t(hash[i] >> 4); - v[i * 2 + 1] = uint16_t(hash[i] & 0x0f); + v[i * 2] = uint16_t(scratch[i] >> 4); + v[i * 2 + 1] = uint16_t(scratch[i] & 0x0f); } // build the product array + #pragma unroll(4) for (int i = 0; i < 64; i++) { uint16_t s = 0; + #pragma unroll(4) for (int j = 0; j < 64; j++) { s += mat[i][j] * v[j]; @@ -65,13 +68,20 @@ namespace SpectreX // calculate the digest for (size_t i = 0; i < 32; i++) { - out[i] = hash[i] ^ (static_cast(p[i * 2] << 4) | static_cast(p[i * 2 + 1])); + scratch[i] = scratch[i] ^ (static_cast(p[i * 2] << 4) | static_cast(p[i * 2 + 1])); } // hash the digest a final time, reverse bytes + #pragma unroll(5) + for (int i=0; i<4; i++) ((uint64_t *)scratch)[i] = ((uint64_t *)heavyP)[i] ^ ((uint64_t *)scratch)[i]; + + #pragma unroll + for (int i = 4; i < 25; i++) ((uint64_t *)scratch)[i] = ((uint64_t *)heavyP)[i]; + + keccakf(scratch); + std::reverse(scratch, scratch+32); - cshake256_nil_function_name(out, 32, "HeavyHash", out, 32*8); - std::reverse(out, out+32); + // memcpy(out, scratch, 32); } void hash(worker &worker, byte *in, int len, byte *out) @@ -79,10 +89,19 @@ namespace SpectreX // cshake256("ProofOfWorkHash", in, len, worker.sha3Hash, 32); // newMatrix(in, worker.mat, worker); memcpy(worker.mat, worker.matBuffer, sizeof(matrix)); - cshake256_nil_function_name(in, len, "ProofOfWorkHash", worker.sha3Hash, 32*8); - AstroBWTv3(worker.sha3Hash, 32, worker.astrobwtv3Hash, *(worker.astroWorker), false); + + #pragma unroll(5) + for (int i=0; i<10; i++) ((uint64_t *)worker.scratchData)[i] = ((uint64_t *)powP)[i] ^ ((uint64_t *)in)[i]; + + #pragma unroll + for (int i = 10; i < 25; i++) ((uint64_t *)worker.scratchData)[i] = ((uint64_t *)powP)[i]; + + keccakf(worker.scratchData); + + AstroBWTv3(worker.scratchData, 32, worker.astrobwtv3Hash, *(worker.astroWorker), false); // AstroBWTv3(worker.sha3Hash, 32, worker.astrobwtv3Hash, *worker.astroWorker, false); - heavyHash(worker.astrobwtv3Hash, worker.mat, out); + memcpy(worker.scratchData, worker.astrobwtv3Hash, 32); + heavyHash(worker.scratchData, worker.mat, out); } void testWithInput(const char* input, byte *out) { @@ -137,21 +156,15 @@ namespace SpectreX // std::reverse(out, out+32); int pieces_failed = 0; - const char *pow_expected = "ae63221b94390528bd5a092be6247f7173099978bf6b150031c034ed22b37cea"; - printf("POW hash: %s\n", hexStr(w->sha3Hash, 32).c_str()); - printf("WANT : %s\n\n", pow_expected); const char *bwt_expected = "271bd27bf393fc8854e4ada0f255cef19c0e86c9b7245088bdafc01318172dc5"; printf("BWT hash: %s\n", hexStr(w->astrobwtv3Hash, 32).c_str()); printf("WANT : %s\n\n", bwt_expected); const char *heavy_expected = "0b68c38a0d359b9ef74fecfae4b2b0a0ea026fdcee22c1d48bcc824f32050ef5"; - printf("Heavy hash: %s\n", hexStr(out, 32).c_str()); + printf("Heavy hash: %s\n", hexStr(w->scratchData, 32).c_str()); printf("WANT : %s\n\n", heavy_expected); - if(memcmp(hexStr(w->sha3Hash, 32).c_str(), pow_expected, 32) != 0) { - pieces_failed += 1; - } if(memcmp(hexStr(w->astrobwtv3Hash, 32).c_str(), bwt_expected, 32) != 0) { pieces_failed += 1; } diff --git a/src/crypto/spectrex/spectrex.h b/src/crypto/spectrex/spectrex.h index 9e885fd..d5f30a7 100644 --- a/src/crypto/spectrex/spectrex.h +++ b/src/crypto/spectrex/spectrex.h @@ -27,6 +27,7 @@ namespace SpectreX matrix mat; double copied[matSize][matSize]; std::bitset rowsSelected; + byte scratchData[200]; byte sha3Hash[32]; byte astrobwtv3Hash[32]; workerData *astroWorker; diff --git a/src/crypto/tiny-keccak/tiny-keccak.h b/src/crypto/tiny-keccak/tiny-keccak.h new file mode 100644 index 0000000..a4eb823 --- /dev/null +++ b/src/crypto/tiny-keccak/tiny-keccak.h @@ -0,0 +1,162 @@ +/** libkeccak-tiny + * + * A single-file implementation of SHA-3 and SHAKE. + * + * Implementor: David Leon Gil + * License: CC0, attribution kindly requested. Blame taken too, + * but not liability. + */ +#pragma once + +#include +#include +#include +#include + +/******** The Keccak-f[1600] permutation ********/ + +/*** Constants. ***/ +static const uint8_t rho[24] = \ + { 1, 3, 6, 10, 15, 21, + 28, 36, 45, 55, 2, 14, + 27, 41, 56, 8, 25, 43, + 62, 18, 39, 61, 20, 44}; +static const uint8_t pi[24] = \ + {10, 7, 11, 17, 18, 3, + 5, 16, 8, 21, 24, 4, + 15, 23, 19, 13, 12, 2, + 20, 14, 22, 9, 6, 1}; +static const uint64_t RC[24] = \ + {1ULL, 0x8082ULL, 0x800000000000808aULL, 0x8000000080008000ULL, + 0x808bULL, 0x80000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL, + 0x8aULL, 0x88ULL, 0x80008009ULL, 0x8000000aULL, + 0x8000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL, + 0x8000000000008002ULL, 0x8000000000000080ULL, 0x800aULL, 0x800000008000000aULL, + 0x8000000080008081ULL, 0x8000000000008080ULL, 0x80000001ULL, 0x8000000080008008ULL}; + +/*** Helper macros to unroll the permutation. ***/ +#define rol(x, s) (((x) << s) | ((x) >> (64 - s))) +#define REPEAT6(e) e e e e e e +#define REPEAT24(e) REPEAT6(e e e e) +#define REPEAT5(e) e e e e e +#define FOR5(v, s, e) \ + v = 0; \ + REPEAT5(e; v += s;) + +/*** Keccak-f[1600] ***/ +inline static void keccakf(void* state) { + uint64_t* a = (uint64_t*)state; + uint64_t b[5] = {0}; + uint64_t t = 0; + uint8_t x, y; + + #pragma unroll + for (int i = 0; i < 24; i++) { + // Theta + FOR5(x, 1, + b[x] = 0; + FOR5(y, 5, + b[x] ^= a[x + y]; )) + FOR5(x, 1, + FOR5(y, 5, + a[y + x] ^= b[(x + 4) % 5] ^ rol(b[(x + 1) % 5], 1); )) + // Rho and pi + t = a[1]; + x = 0; + REPEAT24(b[0] = a[pi[x]]; + a[pi[x]] = rol(t, rho[x]); + t = b[0]; + x++; ) + // Chi + FOR5(y, + 5, + FOR5(x, 1, + b[x] = a[y + x];) + FOR5(x, 1, + a[y + x] = b[x] ^ ((~b[(x + 1) % 5]) & b[(x + 2) % 5]); )) + // Iota + a[0] ^= RC[i]; + } +} + +/******** The FIPS202-defined functions. ********/ + +/*** Some helper macros. ***/ + +#define _(S) do { S } while (0) +#define FOR(i, ST, L, S) \ + _(for (size_t i = 0; i < L; i += ST) { S; }) +#define mkapply_ds(NAME, S) \ + static inline void NAME(uint8_t* dst, \ + const uint8_t* src, \ + size_t len) { \ + FOR(i, 1, len, S); \ + } +#define mkapply_sd(NAME, S) \ + static inline void NAME(const uint8_t* src, \ + uint8_t* dst, \ + size_t len) { \ + FOR(i, 1, len, S); \ + } + +mkapply_ds(xorin, dst[i] ^= src[i]) // xorin +mkapply_sd(setout, dst[i] = src[i]) // setout + +#define P keccakf +#define Plen 200 + +#define foldP(I, L, F) \ + while (L >= rate) { \ + F(a, I, rate); \ + P(a); \ + I += rate; \ + L -= rate; \ + } + +/** The sponge-based hash construction. **/ +static inline int hash(uint8_t* out, size_t outlen, + const uint8_t* in, size_t inlen, + size_t rate, uint8_t delim) { + if ((out == NULL) || ((in == NULL) && inlen != 0) || (rate >= Plen)) { + return -1; + } + uint8_t a[Plen] = {0}; + // Absorb input. + foldP(in, inlen, xorin); + // Xor in the DS and pad frame. + a[inlen] ^= delim; + a[rate - 1] ^= 0x80; + // Xor in the last block. + xorin(a, in, inlen); + // Apply P + P(a); + // Squeeze output. + foldP(out, outlen, setout); + setout(a, out, outlen); + return 0; +} + +/*** Helper macros to define SHA3 and SHAKE instances. ***/ +#define defshake(bits) \ + static inline int shake##bits(uint8_t* out, size_t outlen, \ + const uint8_t* in, size_t inlen) { \ + return hash(out, outlen, in, inlen, 200 - (bits / 4), 0x1f); \ + } +#define defsha3(bits) \ + static inline int sha3_##bits(uint8_t* out, size_t outlen, \ + const uint8_t* in, size_t inlen) { \ + if (outlen > (bits/8)) { \ + return -1; \ + } \ + return hash(out, outlen, in, inlen, 200 - (bits / 4), 0x06); \ + } + +/*** FIPS202 SHAKE VOFs ***/ +defshake(128) +defshake(256) + +/*** FIPS202 SHA3 FOFs ***/ +defsha3(224) +defsha3(256) +defsha3(384) +defsha3(512) \ No newline at end of file diff --git a/src/net/astrix/net_astrix_stratum.cpp b/src/net/astrix/net_astrix_stratum.cpp index 9683b71..c4e6229 100644 --- a/src/net/astrix/net_astrix_stratum.cpp +++ b/src/net/astrix/net_astrix_stratum.cpp @@ -111,7 +111,7 @@ int handleAstrixStratumPacket(boost::json::object packet, AstrixStratum::jobCach } else if (M.compare(AstrixStratum::s_setDifficulty) == 0) { - // std::cout << boost::json::serialize(packet).c_str() << std::endl; + std::cout << boost::json::serialize(packet).c_str() << std::endl; double *d = isDev ? &doubleDiffDev : &doubleDiff; (*d) = packet.at("params").as_array()[0].get_double(); if ((*d) < 0.00000000001) (*d) = packet.at("params").as_array()[0].get_uint64(); @@ -526,7 +526,7 @@ void astrix_stratum_session( // Consume the data from the buffer after processing it response.consume(trans); - std::cout << "received: " << data << std::endl << std::flush; + // std::cout << "received: " << data << std::endl << std::flush; // printf("received data\n"); fflush(stdout); diff --git a/src/tnn_hip/coins/mine_astrix.hip b/src/tnn_hip/coins/mine_astrix.hip new file mode 100644 index 0000000..ee73524 --- /dev/null +++ b/src/tnn_hip/coins/mine_astrix.hip @@ -0,0 +1,353 @@ +#include +#include "miners.hip.hpp" +#include "tnn-hugepages.h" +#include +#include + +#define GPU_FOR(d, count) for (d = 0; d < count; d++) + +void mineAstrix_hip() +{ + int d = 0; + + int GPUCount = 0; + hipError_t err = hipGetDeviceCount(&GPUCount); + + size_t *blocks, *threads, *batchSizes; + blocks = (size_t*)malloc(sizeof(size_t)*GPUCount); + threads = (size_t*)malloc(sizeof(size_t)*GPUCount); + batchSizes = (size_t*)malloc(sizeof(size_t)*GPUCount); + + GPU_FOR(d, GPUCount) { + hipDeviceProp_t deviceProps; + hipGetDeviceProperties(&deviceProps, d); // Query device properties + int smCount = deviceProps.multiProcessorCount; + + int numBlocksPerSm; + hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocksPerSm, astrixHash_hip, Astrix_HIP::THREAD_DIM, 0); + blocks[d] = numBlocksPerSm*smCount*128; + batchSize[d] = blocks*Astrix_HIP::THREAD_DIM; + } + + if (err != hipSuccess) { + std::cerr << "Failed to get device count: " << hipGetErrorString(err) << std::endl; + return; + } + + int64_t localJobCounter; + int64_t localOurHeight = 0; + int64_t localDevHeight = 0; + + uint64_t i = 0; + uint64_t i_dev = 0; + + byte powHash[32]; + byte work[Astrix_HIP::INPUT_SIZE] = {0}; + byte devWork[Astrix_HIP::INPUT_SIZE] = {0}; +waitForJob: + + while (!isConnected) + { + boost::this_thread::sleep_for(boost::chrono::milliseconds(100)); + } + + uint64_t *nonceCache = (uint64_t*)malloc(sizeof(uint64_t)*GPUCount); + + uint16_t *h_nonceCounts = (uint16_t*)malloc(sizeof(uint16_t)*GPUCount); + uint64_t *h_nonceBuffers = (uint64_t*)malloc(sizeof(uint64_t)*Astrix_HIP::MAX_NONCES*GPUCount); + + uint16_t *d_nonceCount; + uint64_t *d_nonceBuffer; + + GPU_FOR(d, GPUCount) { + hipSetDevice(d); + + hipMalloc(&d_nonceCount[d], sizeof(uint16_t)); + hipMalloc(&d_nonceBuffer[d], sizeof(uint64_t)*Astrix_HIP::MAX_NONCES); + + hipError_t err = hipGetLastError(); + if (err != hipSuccess) { + std::cerr << "hipMalloc failed on GPU " << d << ": " << hipGetErrorString(err) << std::endl; + return; + } + } + + while (true) + { + try + { + boost::json::value myJob; + boost::json::value myJobDev; + { + std::scoped_lock lockGuard(mutex); + myJob = job; + myJobDev = devJob; + localJobCounter = jobCounter; + } + + if (!myJob.at("template").is_string()) { + continue; + } + if (ourHeight == 0 && devHeight == 0) + continue; + + if (ourHeight == 0 || localOurHeight != ourHeight) + { + byte *b2 = new byte[SpectreX::INPUT_SIZE]; + switch (protocol) + { + case ASTRIX_SOLO: + hexstrToBytes(std::string(myJob.at("template").as_string()), b2); + break; + case ASTRIX_STRATUM: + hexstrToBytes(std::string(myJob.at("template").as_string()), b2); + break; + } + memcpy(work, b2, SpectreX::INPUT_SIZE); + delete[] b2; + + GPU_FOR(d, GPUCount) { + hipSetDevice(d); + Astrix_HIP::newMatrix(work); + hipDeviceSynchronize(); + } + + localOurHeight = ourHeight; + i = 0; + } + + if (devConnected && myJobDev.at("template").is_string()) + { + if (devHeight == 0 || localDevHeight != devHeight) + { + byte *b2d = new byte[SpectreX::INPUT_SIZE]; + switch (protocol) + { + case ASTRIX_SOLO: + hexstrToBytes(std::string(myJobDev.at("template").as_string()), b2d); + break; + case ASTRIX_STRATUM: + hexstrToBytes(std::string(myJobDev.at("template").as_string()), b2d); + break; + } + memcpy(devWork, b2d, SpectreX::INPUT_SIZE); + delete[] b2d; + + GPU_FOR(d, GPUCount) { + hipSetDevice(d); + Astrix_HIP::newMatrix(work, true); + hipDeviceSynchronize(); + } + + localDevHeight = devHeight; + i_dev = 0; + } + } + + bool devMine = false; + double which; + bool submit = false; + double DIFF = 1; + Num cmpDiff; + + memset(h_nonceCounts, 0, sizeof(uint16_t)*GPUCount); + + boost::thread_group workers; + + // printf("end of job application\n"); + GPU_FOR(d, GPUCount) { + threads.create_thread([&]() { + hipSetDevice(d); + + int kernelIndex = 0; + while (localJobCounter == jobCounter) + { + which = (double)(rand() % 10000); + devMine = (devConnected && devHeight > 0 && which < devFee * 100.0); + DIFF = devMine ? doubleDiffDev : doubleDiff; + if (DIFF == 0) + continue; + + // cmpDiff = ConvertDifficultyToBig(DIFF, ASTRIX_X); + cmpDiff = SpectreX::diffToTarget(DIFF); + + uint64_t *nonce = devMine ? &i_dev : &i; + (*nonce)++; + + // printf("nonce = %llu\n", *nonce); + + byte *WORK = (devMine && devConnected) ? &devWork[0] : &work[0]; + byte *nonceBytes = &WORK[72]; + uint64_t n; + + int enLen = 0; + uint64_t nonceMask = -1ULL; + + boost::json::value &J = devMine ? myJobDev : myJob; + if (!J.as_object().if_contains("extraNonce") || J.at("extraNonce").as_string().size() == 0) { + n = (rand() % 65536) << 48; + nonceMask >>= 16; + } else { + uint64_t eN = std::stoull(std::string(J.at("extraNonce").as_string().c_str()), NULL, 16); + enLen = std::string(J.at("extraNonce").as_string()).size()/2; + int offset = (64 - enLen*8); + n = (eN << offset); + nonceMask >>= enLen*8; + } + memcpy(nonceBytes, (byte *)&n, 8); + + // printf("after nonce: %s\n", hexStr(WORK, SpectreX::INPUT_SIZE).c_str()); + + if (localJobCounter != jobCounter) { + // printf("thread %d updating job before hash\n", tid); + break; + } + + Astrix_HIP::astrixHash_wrapper(blocks[d], nonceMask, n, d_nonceBuffer, d_nonceCount, kernelIndex, batchSizes[d], d, devMine); + + kernelIndex++; + + // if (littleEndian()) + // { + // std::reverse(powHash, powHash + 32); + // } + + HIP_counters[d].fetch_add(batchSizes[d]); + submit = (devMine && devConnected) ? !submittingDev : !submitting; + + if (localJobCounter != jobCounter || localOurHeight != ourHeight) { + // printf("thread %d updating job after hash\n", tid); + break; + } + + // if (Num(hexStr(powHash, 32).c_str(), 16) <= cmpDiff) + // { + // // printf("thread %d entered submission process\n", tid); + // if (!submit) { + // for(;;) { + // submit = (devMine && devConnected) ? !submittingDev : !submitting; + // if (submit || localJobCounter != jobCounter || localOurHeight != ourHeight) + // break; + // boost::this_thread::yield(); + // } + // } + // if (localJobCounter != jobCounter) { + // // printf("thread %d updating job after check\n", tid); + // break; + // } + // // if (littleEndian()) + // // { + // // std::reverse(powHash, powHash + 32); + // // } + // // std::string b64 = base64::to_base64(std::string((char *)&WORK[0], XELIS_TEMPLATE_SIZE)); + // // boost::lock_guard lock(mutex); + // if (devMine) + // { + // submittingDev = true; + // std::scoped_lock lockGuard(devMutex); + // // if (localJobCounter != jobCounter || localDevHeight != devHeight) + // // { + // // break; + // // } + // setcolor(CYAN); + // std::cout << "\n(DEV) Thread " << tid << " found a dev share\n" << std::flush; + // setcolor(BRIGHT_WHITE); + // switch (protocol) + // { + // case ASTRIX_SOLO: + // devShare = {{"block_template", hexStr(&WORK[0], SpectreX::INPUT_SIZE).c_str()}}; + // break; + // case ASTRIX_STRATUM: + // std::vector nonceStr; + // // Num(std::to_string((n << enLen*8) >> enLen*8).c_str(),10).print(nonceStr, 16); + // Num(std::to_string(n).c_str(),10).print(nonceStr, 16); + // devShare = {{{"id", SpectreStratum::submitID}, + // {"method", SpectreStratum::submit.method.c_str()}, + // {"params", {devWorkerName, // WORKER + // myJobDev.at("jobId").as_string().c_str(), // JOB ID + // std::string(nonceStr.data()).c_str()}}}}; + + // break; + // } + // data_ready = true; + // } + // else + // { + // submitting = true; + // std::scoped_lock lockGuard(userMutex); + // // if (localJobCounter != jobCounter || localOurHeight != ourHeight) + // // { + // // break; + // // } + // setcolor(BRIGHT_YELLOW); + // std::cout << "\nThread " << tid << " found a nonce!\n" << std::flush; + // setcolor(BRIGHT_WHITE); + // switch (protocol) + // { + // case ASTRIX_SOLO: + // share = {{"block_template", hexStr(&WORK[0], SpectreX::INPUT_SIZE).c_str()}}; + // break; + // case ASTRIX_STRATUM: + // std::vector nonceStr; + // // Num(std::to_string((n << enLen*8) >> enLen*8).c_str(),10).print(nonceStr, 16); + // Num(std::to_string(n).c_str(),10).print(nonceStr, 16); + // share = {{{"id", SpectreStratum::submitID}, + // {"method", SpectreStratum::submit.method.c_str()}, + // {"params", {workerName, // WORKER + // myJob.at("jobId").as_string().c_str(), // JOB ID + // std::string(nonceStr.data()).c_str()}}}}; + + // // std::cout << "blob: " << hexStr(&WORK[0], SpectreX::INPUT_SIZE).c_str() << std::endl; + // // std::cout << "nonce: " << nonceStr.data() << std::endl; + // // std::cout << "extraNonce: " << hexStr(&WORK[SpectreX::INPUT_SIZE - 48], enLen).c_str() << std::endl; + // // std::cout << "hash: " << hexStr(&powHash[0], 32) << std::endl; + // // std::vector diffHex; + // // cmpDiff.print(diffHex, 16); + // // std::cout << "difficulty (LE): " << std::string(diffHex.data()).c_str() << std::endl; + // // std::cout << "powValue: " << Num(hexStr(powHash, 32).c_str(), 16) << std::endl; + // // std::cout << "target (decimal): " << cmpDiff << std::endl; + + // // printf("blob: %s\n", foundBlob.c_str()); + // // printf("hash (BE): %s\n", hexStr(&powHash[0], 32).c_str()); + // // printf("nonce (Full bytes for injection): %s\n", hexStr((byte *)&n, 8).c_str()); + + // break; + // } + // data_ready = true; + // } + // // printf("thread %d finished submission process\n", tid); + // cv.notify_all(); + // } + + if (!isConnected) { + data_ready = true; + cv.notify_all(); + break; + } + } + }); + } + + workers.join_all(); + } + catch (std::exception& e) + { + setcolor(RED); + std::cerr << "Error in POW Function" << std::endl; + std::cerr << e.what() << std::endl; + setcolor(BRIGHT_WHITE); + + localJobCounter = -1; + localOurHeight = -1; + localDevHeight = -1; + } + if (!isConnected) { + data_ready = true; + cv.notify_all(); + break; + } + } + goto waitForJob; +} + +#undef GPU_FOR \ No newline at end of file diff --git a/src/tnn_hip/coins/miners.hip.hpp b/src/tnn_hip/coins/miners.hip.hpp new file mode 100644 index 0000000..a205209 --- /dev/null +++ b/src/tnn_hip/coins/miners.hip.hpp @@ -0,0 +1,71 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +using byte = unsigned char; + +inline Num ConvertDifficultyToBig(Num d, int algo) +{ + switch(algo) { + case DERO_HASH: + return oneLsh256 / d; + case XELIS_HASH: + return maxU256 / d; + case SPECTRE_X: + return oneLsh256 / (d+1); + default: + return 0; + } +} + +inline bool CheckHash(unsigned char *hash, int64_t diff, int algo) +{ + if (littleEndian()) std::reverse(hash, hash+32); + bool cmp = Num(hexStr(hash, 32).c_str(), 16) <= ConvertDifficultyToBig(diff, algo); + if (littleEndian()) std::reverse(hash, hash+32); + return (cmp); +} + +inline bool CheckHash(unsigned char *hash, Num diff, int algo) +{ + if (littleEndian()) std::reverse(hash, hash+32); + bool cmp = Num(hexStr(hash, 32).c_str(), 16) <= diff; + if (littleEndian()) std::reverse(hash, hash+32); + return (cmp); +} + +inline std::string uint32ToHex(uint32_t value) { + std::stringstream ss; + ss << std::hex << std::setw(8) << std::setfill('0') << value; + return ss.str(); +} + +void mineAstrix_hip(); + +static inline void unsupported() { + printf("This coin is not supported on GPUs\n"); +} + +typedef void (*mineFunc_hip)(); +const mineFunc_hip POW[] = { + unsupported, // 0 + unsupported, + unsupported, + unsupported, + unsupported, + unsupported, + unsupported, // 5 + unsupported, + unsupported, + unsupported, + unsupported, + unsupported, // 10 + unsupported, + mineAstrix_hip +}; + diff --git a/src/tnn_hip/crypto/astrix-hash/astrix-hash.hip b/src/tnn_hip/crypto/astrix-hash/astrix-hash.hip index b8dd452..b48c5f5 100644 --- a/src/tnn_hip/crypto/astrix-hash/astrix-hash.hip +++ b/src/tnn_hip/crypto/astrix-hash/astrix-hash.hip @@ -3,6 +3,7 @@ #include #include "astrix_archdef.h" +#include "astrix-hash.hip.h" #include #include @@ -25,16 +26,15 @@ hipDeviceProp_t deviceProps; #define RANDOM_LEAN 0 #define RANDOM_XOSHIRO 1 -#define HIP_ASTRIX_MAX_NONCES 640 - #define LT_U256(X, Y) (X.number[3] != Y.number[3] ? X.number[3] < Y.number[3] : X.number[2] != Y.number[2] ? X.number[2] < Y.number[2] \ : X.number[1] != Y.number[1] ? X.number[1] < Y.number[1] \ : X.number[0] < Y.number[0]) - -__constant__ uint8_t matrix[MATRIX_SIZE][MATRIX_SIZE]; -__constant__ uint8_t hash_header[HASH_HEADER_SIZE]; +__align__(32) __constant__ uint8_t matrix[MATRIX_SIZE][MATRIX_SIZE]; +__align__(32) __constant__ uint8_t matrix_dev[MATRIX_SIZE][MATRIX_SIZE]; +__align__(32) __constant__ uint8_t hash_header[HASH_HEADER_SIZE]; +__align__(32) __constant__ uint8_t hash_header_dev[HASH_HEADER_SIZE]; __constant__ uint256_t target; -__constant__ static const uint8_t powP[Plen] = {0x3d, 0xd8, 0xf6, 0xa1, 0x0d, 0xff, 0x3c, 0x11, 0x3c, 0x7e, 0x02, 0xb7, 0x55, 0x88, 0xbf, 0x29, 0xd2, 0x44, 0xfb, 0x0e, 0x72, 0x2e, 0x5f, 0x1e, 0xa0, 0x69, 0x98, 0xf5, 0xa3, 0xa4, 0xa5, 0x1b, 0x65, 0x2d, 0x5e, 0x87, 0xca, 0xaf, 0x2f, 0x7b, 0x46, 0xe2, 0xdc, 0x29, 0xd6, 0x61, 0xef, 0x4a, 0x10, 0x5b, 0x41, 0xad, 0x1e, 0x98, 0x3a, 0x18, 0x9c, 0xc2, 0x9b, 0x78, 0x0c, 0xf6, 0x6b, 0x77, 0x40, 0x31, 0x66, 0x88, 0x33, 0xf1, 0xeb, 0xf8, 0xf0, 0x5f, 0x28, 0x43, 0x3c, 0x1c, 0x65, 0x2e, 0x0a, 0x4a, 0xf1, 0x40, 0x05, 0x07, 0x96, 0x0f, 0x52, 0x91, 0x29, 0x5b, 0x87, 0x67, 0xe3, 0x44, 0x15, 0x37, 0xb1, 0x25, 0xa4, 0xf1, 0x70, 0xec, 0x89, 0xda, 0xe9, 0x82, 0x8f, 0x5d, 0xc8, 0xe6, 0x23, 0xb2, 0xb4, 0x85, 0x1f, 0x60, 0x1a, 0xb2, 0x46, 0x6a, 0xa3, 0x64, 0x90, 0x54, 0x85, 0x34, 0x1a, 0x85, 0x2f, 0x7a, 0x1c, 0xdd, 0x06, 0x0f, 0x42, 0xb1, 0x3b, 0x56, 0x1d, 0x02, 0xa2, 0xc1, 0xe4, 0x68, 0x16, 0x45, 0xe4, 0xe5, 0x1d, 0xba, 0x8d, 0x5f, 0x09, 0x05, 0x41, 0x57, 0x02, 0xd1, 0x4a, 0xcf, 0xce, 0x9b, 0x84, 0x4e, 0xca, 0x89, 0xdb, 0x2e, 0x74, 0xa8, 0x27, 0x94, 0xb0, 0x48, 0x72, 0x52, 0x8b, 0xe7, 0x9c, 0xce, 0xfc, 0xb1, 0xbc, 0xa5, 0xaf, 0x82, 0xcf, 0x29, 0x11, 0x5d, 0x83, 0x43, 0x82, 0x6f, 0x78, 0x7c, 0xb9, 0x02}; +__align__(32) __constant__ static const uint8_t powP[Plen] = {0x3d, 0xd8, 0xf6, 0xa1, 0x0d, 0xff, 0x3c, 0x11, 0x3c, 0x7e, 0x02, 0xb7, 0x55, 0x88, 0xbf, 0x29, 0xd2, 0x44, 0xfb, 0x0e, 0x72, 0x2e, 0x5f, 0x1e, 0xa0, 0x69, 0x98, 0xf5, 0xa3, 0xa4, 0xa5, 0x1b, 0x65, 0x2d, 0x5e, 0x87, 0xca, 0xaf, 0x2f, 0x7b, 0x46, 0xe2, 0xdc, 0x29, 0xd6, 0x61, 0xef, 0x4a, 0x10, 0x5b, 0x41, 0xad, 0x1e, 0x98, 0x3a, 0x18, 0x9c, 0xc2, 0x9b, 0x78, 0x0c, 0xf6, 0x6b, 0x77, 0x40, 0x31, 0x66, 0x88, 0x33, 0xf1, 0xeb, 0xf8, 0xf0, 0x5f, 0x28, 0x43, 0x3c, 0x1c, 0x65, 0x2e, 0x0a, 0x4a, 0xf1, 0x40, 0x05, 0x07, 0x96, 0x0f, 0x52, 0x91, 0x29, 0x5b, 0x87, 0x67, 0xe3, 0x44, 0x15, 0x37, 0xb1, 0x25, 0xa4, 0xf1, 0x70, 0xec, 0x89, 0xda, 0xe9, 0x82, 0x8f, 0x5d, 0xc8, 0xe6, 0x23, 0xb2, 0xb4, 0x85, 0x1f, 0x60, 0x1a, 0xb2, 0x46, 0x6a, 0xa3, 0x64, 0x90, 0x54, 0x85, 0x34, 0x1a, 0x85, 0x2f, 0x7a, 0x1c, 0xdd, 0x06, 0x0f, 0x42, 0xb1, 0x3b, 0x56, 0x1d, 0x02, 0xa2, 0xc1, 0xe4, 0x68, 0x16, 0x45, 0xe4, 0xe5, 0x1d, 0xba, 0x8d, 0x5f, 0x09, 0x05, 0x41, 0x57, 0x02, 0xd1, 0x4a, 0xcf, 0xce, 0x9b, 0x84, 0x4e, 0xca, 0x89, 0xdb, 0x2e, 0x74, 0xa8, 0x27, 0x94, 0xb0, 0x48, 0x72, 0x52, 0x8b, 0xe7, 0x9c, 0xce, 0xfc, 0xb1, 0xbc, 0xa5, 0xaf, 0x82, 0xcf, 0x29, 0x11, 0x5d, 0x83, 0x43, 0x82, 0x6f, 0x78, 0x7c, 0xb9, 0x02}; __align__(32) __constant__ static const uint8_t heavyP[Plen] = {0x09, 0x85, 0x24, 0xb2, 0x52, 0x4c, 0xd7, 0x3a, 0x16, 0x42, 0x9f, 0x2f, 0x0e, 0x9b, 0x62, 0x79, 0xee, 0xf8, 0xc7, 0x16, 0x48, 0xff, 0x14, 0x7a, 0x98, 0x64, 0x05, 0x80, 0x4c, 0x5f, 0xa7, 0x11, 0xda, 0xce, 0xee, 0x44, 0xdf, 0xe0, 0x20, 0xe7, 0x69, 0x40, 0xf3, 0x14, 0x2e, 0xd8, 0xc7, 0x72, 0xba, 0x35, 0x89, 0x93, 0x2a, 0xff, 0x00, 0xc1, 0x62, 0xc4, 0x0f, 0x25, 0x40, 0x90, 0x21, 0x5e, 0x48, 0x6a, 0xcf, 0x0d, 0xa6, 0xf9, 0x39, 0x80, 0x0c, 0x3d, 0x2a, 0x79, 0x9f, 0xaa, 0xbc, 0xa0, 0x26, 0xa2, 0xa9, 0xd0, 0x5d, 0xc0, 0x31, 0xf4, 0x3f, 0x8c, 0xc1, 0x54, 0xc3, 0x4c, 0x1f, 0xd3, 0x3d, 0xcc, 0x69, 0xa7, 0x01, 0x7d, 0x6b, 0x6c, 0xe4, 0x93, 0x24, 0x56, 0xd3, 0x5b, 0xc6, 0x2e, 0x44, 0xb0, 0xcd, 0x99, 0x3a, 0x4b, 0xf7, 0x4e, 0xb0, 0xf2, 0x34, 0x54, 0x83, 0x86, 0x4c, 0x77, 0x16, 0x94, 0xbc, 0x36, 0xb0, 0x61, 0xe9, 0x07, 0x07, 0xcc, 0x65, 0x77, 0xb1, 0x1d, 0x8f, 0x7e, 0x39, 0x6d, 0xc4, 0xba, 0x80, 0xdb, 0x8f, 0xea, 0x58, 0xca, 0x34, 0x7b, 0xd3, 0xf2, 0x92, 0xb9, 0x57, 0xb9, 0x81, 0x84, 0x04, 0xc5, 0x76, 0xc7, 0x2e, 0xc2, 0x12, 0x51, 0x67, 0x9f, 0xc3, 0x47, 0x0a, 0x0c, 0x29, 0xb5, 0x9d, 0x39, 0xbb, 0x92, 0x15, 0xc6, 0x9f, 0x2f, 0x31, 0xe0, 0x9a, 0x54, 0x35, 0xda, 0xb9, 0x10, 0x7d, 0x32, 0x19, 0x16}; __device__ int deviceArch; @@ -55,10 +55,6 @@ __device__ __forceinline__ void amul4bit(uint32_t packed_vec1[32], uint32_t pack { // We assume each 32 bits have four values: A0 B0 C0 D0 unsigned int res = 0; -#if __CUDA_ARCH__ < 610 - char4 *a4 = (char4 *)packed_vec1; - char4 *b4 = (char4 *)packed_vec2; -#endif #pragma unroll for (int i = 0; i < QUARTER_MATRIX_SIZE; i++) { @@ -66,6 +62,8 @@ __device__ __forceinline__ void amul4bit(uint32_t packed_vec1[32], uint32_t pack #if (HIP_ARCH >= 1030) res = v_dot4_u32_u8(packed_vec1[i], packed_vec2[i], res); #else + char4 *a4 = (char4 *)packed_vec1; + char4 *b4 = (char4 *)packed_vec2; res += a4[i].x * b4[i].x; res += a4[i].y * b4[i].y; res += a4[i].z * b4[i].z; @@ -75,6 +73,8 @@ __device__ __forceinline__ void amul4bit(uint32_t packed_vec1[32], uint32_t pack #if __CUDA_ARCH__ >= 610 res = __dp4a(packed_vec1[i], packed_vec2[i], res); #else + char4 *a4 = (char4 *)packed_vec1; + char4 *b4 = (char4 *)packed_vec2; res += a4[i].x * b4[i].x; res += a4[i].y * b4[i].y; res += a4[i].z * b4[i].z; @@ -86,6 +86,7 @@ __device__ __forceinline__ void amul4bit(uint32_t packed_vec1[32], uint32_t pack *ret = res; } +template __global__ void astrixHash_hip(const uint64_t nonce_mask, const uint64_t nonce_fixed, uint64_t *final_nonces, int *nonce_count, int kIndex, size_t batchSize, uint8_t device = 0) { // assuming header_len is 72 @@ -93,16 +94,26 @@ __global__ void astrixHash_hip(const uint64_t nonce_mask, const uint64_t nonce_f uint64_t nonce; nonce = nonceId + kIndex * batchSize; - // nonce = (nonce << 4) | device; - // nonce = (nonce & nonce_mask) | nonce_fixed; + nonce = (nonce << 4) | device; + nonce = (nonce & nonce_mask) | nonce_fixed; + // header __align__(32) uint8_t input[200] = {0}; - memcpy(input, hash_header, HASH_HEADER_SIZE); + input[80] = 0x06; + input[135] = 0x80; + + if constexpr (isDev) { + memcpy(input, hash_header, HASH_HEADER_SIZE); + } else { + memcpy(input, hash_header_dev, HASH_HEADER_SIZE); + } + // data // TODO: check endianity? // uint256_t hash_; - memcpy(&input[HASH_HEADER_SIZE], (uint8_t *)(&nonce), 8); + ((uint64_t*)input)[9] = nonce; + // memcpy(&input[HASH_HEADER_SIZE], (uint8_t *)(&nonce), 8); // if (nonceId == 0 && kIndex == 0) { // printf("input 0:\n"); @@ -114,8 +125,6 @@ __global__ void astrixHash_hip(const uint64_t nonce_mask, const uint64_t nonce_f // __syncthreads(); // sha3_256_astrix(hash_.hash, input); - input[80] = 0x06; - input[135] = 0x80; keccakf(input); @@ -140,11 +149,17 @@ __global__ void astrixHash_hip(const uint64_t nonce_mask, const uint64_t nonce_f (input[2 * i + 1] & 0x0F)); } uint32_t product1, product2; + #pragma unroll for (int rowId = 0; rowId < HALF_MATRIX_SIZE; rowId++) { - amul4bit((uint32_t *)(matrix[(2 * rowId)]), (uint32_t *)(packed_hash), &product1); - amul4bit((uint32_t *)(matrix[(2 * rowId + 1)]), (uint32_t *)(packed_hash), &product2); + if constexpr (isDev) { + amul4bit((uint32_t *)(matrix_dev[(2 * rowId)]), (uint32_t *)(packed_hash), &product1); + amul4bit((uint32_t *)(matrix_dev[(2 * rowId + 1)]), (uint32_t *)(packed_hash), &product2); + } else { + amul4bit((uint32_t *)(matrix[(2 * rowId)]), (uint32_t *)(packed_hash), &product1); + amul4bit((uint32_t *)(matrix[(2 * rowId + 1)]), (uint32_t *)(packed_hash), &product2); + } product1 >>= 6; product1 &= 0xF0; product2 >>= 10; @@ -162,30 +177,12 @@ __global__ void astrixHash_hip(const uint64_t nonce_mask, const uint64_t nonce_f input[rowId] = input[rowId] ^ ((uint8_t)(product1) | (uint8_t)(product2)); #endif } - memset(&input[32], 0, 80-32); - // shake32_heavy(heavyP, hash_.hash, input); #pragma unroll for (int i=0; i<4; i++) ((uint64_t *)input)[i] = ((uint64_t *)heavyP)[i] ^ ((uint64_t *)input)[i]; - // ((ulonglong4 *)input)[0] ^= ((ulonglong4 *)heavyP)[0]; #pragma unroll - for (int i = 1; i < 6; i++) ((ulonglong4 *)input)[i] = ((ulonglong4 *)heavyP)[i]; - - ((uint64_t *)input)[24] = ((uint64_t *)heavyP)[24]; - // ((ulonglong4 *)a)[0] = ((ulonglong4 *)initP)[0] ^ ((ulonglong4 *)in)[0]; - // ((ulonglong4 *)a)[1] = ((ulonglong4 *)initP)[1] ^ ((ulonglong4 *)in)[1]; - // ((ulonglong2 *)a)[4] = ((ulonglong2 *)initP)[4] ^ ((ulonglong4 *)in)[4]; - - // #pragma unroll - // for (int i=10; i<25; i++) ((uint64_t *)input)[i] = ((uint64_t *)heavyP)[i]; - // ((ulonglong2 *)a)[5] = ((ulonglong2 *)initP)[5]; - - // #pragma unroll - // for (int i = 3; i < 6; i++) ((ulonglong4 *)a)[i] = ((ulonglong4 *)initP)[i]; - - // ((ulonglong2 *)a)[12] = ((ulonglong2 *)initP)[12]; - // ((uint64_t *)a)[24] = ((uint64_t *)initP)[24]; + for (int i = 4; i < 25; i++) ((uint64_t *)input)[i] = ((uint64_t *)heavyP)[i]; keccakf(input); @@ -194,13 +191,15 @@ __global__ void astrixHash_hip(const uint64_t nonce_mask, const uint64_t nonce_f // hash_.hash[i] = hash_.hash[31 - i]; // hash_.hash[31 - i] = temp; // } - + // int index = cond ? atomicAdd(nonce_count, 1) : -1; if (LT_U256(((uint256_t*)input)[0], target)) { int index = atomicAdd(nonce_count, 1); - index = index >= HIP_ASTRIX_MAX_NONCES ? HIP_ASTRIX_MAX_NONCES - 1 : index; + index = index >= Astrix_HIP::MAX_NONCES ? Astrix_HIP::MAX_NONCES - 1 : index; final_nonces[index] = nonce; + // atomicCAS(&((unsigned long long int*)final_nonces)[index], 0, (unsigned long long int)nonce); } + // if (nonceId == 0 && kIndex == 0) { // printf("GPU result: "); @@ -215,7 +214,7 @@ __global__ void astrixHash_hip(const uint64_t nonce_mask, const uint64_t nonce_f inline int calculateRank(uint8_t mat[MATRIX_SIZE][MATRIX_SIZE]) { double copied[MATRIX_SIZE][MATRIX_SIZE]; - bool rowsSelected[MATRIX_SIZE]; + bool rowsSelected[MATRIX_SIZE] = {false}; const double epsilon = 1e-9; for (int i = 0; i < MATRIX_SIZE; i++) @@ -265,6 +264,82 @@ inline int calculateRank(uint8_t mat[MATRIX_SIZE][MATRIX_SIZE]) return rank; } +namespace Astrix_HIP { + void getHashBlocksPerSM(int *numBlocksPerSm, bool isDev) { + if (isDev) { + hipOccupancyMaxActiveBlocksPerMultiprocessor(numBlocksPerSm, astrixHash_hip, Astrix_HIP::THREAD_DIM, 0); + } else { + hipOccupancyMaxActiveBlocksPerMultiprocessor(numBlocksPerSm, astrixHash_hip, Astrix_HIP::THREAD_DIM, 0); + } + } + + void astrixHash_wrapper( + int blocks, + const uint64_t nonce_mask, + const uint64_t nonce_fixed, + uint64_t *final_nonces, + int *nonce_count, + int kIndex, + size_t batchSize, + uint8_t device, + bool isDev + ) { + int h_nonce_count = 0; // Initialize on the host + + hipMemcpy(nonce_count, &h_nonce_count, sizeof(int), hipMemcpyHostToDevice); + + if (isDev) { + hipLaunchKernelGGL(astrixHash_hip, dim3(blocks), dim3(Astrix_HIP::THREAD_DIM), 0, 0, + nonce_mask, nonce_fixed, final_nonces, nonce_count, kIndex, batchSize, device); + } else { + hipLaunchKernelGGL(astrixHash_hip, dim3(blocks), dim3(Astrix_HIP::THREAD_DIM), 0, 0, + nonce_mask, nonce_fixed, final_nonces, nonce_count, kIndex, batchSize, device); + } + + // Check for errors after the kernel launch + hipError_t err = hipGetLastError(); + if (err != hipSuccess) + { + printf("Kernel launch failed: %s\n", hipGetErrorString(err)); + } + } + + void newMatrix(uint8_t *in, bool isDev) { + // Prepare host-side data for the matrix, hash_header, and target + uint8_t h_matrix[MATRIX_SIZE][MATRIX_SIZE]; // Host-side matrix + + // Fill the matrix + for (int i = 0; i < MATRIX_SIZE; i++) { + memset(h_matrix[i], 0, MATRIX_SIZE); + } + + alignas(64) uint64_t s0 = *(uint64_t*)&in[0]; + alignas(64) uint64_t s1 = *(uint64_t*)&in[8]; + alignas(64) uint64_t s2 = *(uint64_t*)&in[16]; + alignas(64) uint64_t s3 = *(uint64_t*)&in[24]; + + Xoshiro256PlusPlusHasher hasher(s0, s1, s2, s3); + + while (calculateRank(h_matrix) != MATRIX_SIZE) + { + for (int i = 0; i < MATRIX_SIZE; i++) + { + for (int j = 0; j < MATRIX_SIZE; j += QUARTER_MATRIX_SIZE) + { + uint64_t value = hasher.next(); + for (int k = 0; k < 16; k++) + { + h_matrix[i][j + k] = uint16_t((value >> (4 * k)) & 0x0f); + } + } + } + } + + hipMemcpyToSymbol(isDev ? matrix : matrix_dev, h_matrix, sizeof(h_matrix)); + } + +} + void benchAstrixHip() { int device; @@ -272,6 +347,7 @@ void benchAstrixHip() hipDeviceProp_t deviceProps; hipGetDeviceProperties(&deviceProps, device); // Query device properties + int smCount = deviceProps.multiProcessorCount; // Get the architecture version (gcnArch) and store it for device code // int arch = deviceProps.gcnArch; @@ -279,19 +355,19 @@ void benchAstrixHip() // Retrieve architecture-specific dimensions size_t blocks, threads, batchSize; - getArchDims(blocks, threads, batchSize); + // getArchDims(blocks, threads, batchSize); // Example input data setup uint64_t nonce_mask = 0xFFFFFFFFFFFFFFFF; // Example nonce mask uint64_t nonce_fixed = 0x0000000000000000; // Example nonce fixed value // Allocate memory for final_nonce on the CPU - uint64_t h_final_nonces[HIP_ASTRIX_MAX_NONCES] = {0}; + uint64_t h_final_nonces[Astrix_HIP::MAX_NONCES] = {0}; // Allocate space for final_nonce on the GPU uint64_t *d_final_nonces; - hipMalloc((void **)&d_final_nonces, sizeof(uint64_t) * HIP_ASTRIX_MAX_NONCES); + hipMalloc((void **)&d_final_nonces, sizeof(uint64_t) * Astrix_HIP::MAX_NONCES); // Timing events hipEvent_t start, stop; @@ -299,7 +375,7 @@ void benchAstrixHip() hipEventCreate(&stop); // Launch the kernel and perform the benchmark - size_t numRuns = 1000; + size_t numRuns = 500; int *d_nonce_count; int h_nonce_count = 0; // Initialize on the host @@ -318,6 +394,7 @@ void benchAstrixHip() for (int i = 0; i < 4; i++) { h_target.number[i] = 0xFFFFFFFFFFFFFFFFULL; // Example target + // h_target.number[i] = 0; // Example target } // Fill the matrix @@ -350,15 +427,29 @@ void benchAstrixHip() // Transfer the matrix, hash_header, and target data to device constant memory hipMemcpyToSymbol(matrix, h_matrix, sizeof(h_matrix)); hipMemcpyToSymbol(hash_header, h_hash_header, sizeof(h_hash_header)); - hipMemcpyToSymbol(target, &h_target, sizeof(h_target)); + hipMemcpyToSymbol(&target, &h_target, sizeof(h_target)); + + + int numBlocksPerSm; + hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocksPerSm, astrixHash_hip, Astrix_HIP::THREAD_DIM, 0); + blocks = numBlocksPerSm*smCount*128; + batchSize = blocks*Astrix_HIP::THREAD_DIM; + + if (batchSize == 0) { + blocks = 1; + batchSize = Astrix_HIP::THREAD_DIM; + } + + printf("batchSize: %llu\nsmCount: %llu\n", batchSize, smCount); + fflush(stdout); // Record start event hipEventRecord(start, 0); for (int i = 0; i < numRuns; ++i) { - hipMemcpy(d_nonce_count, &h_nonce_count, sizeof(int), hipMemcpyHostToDevice); - hipLaunchKernelGGL(astrixHash_hip, dim3(blocks), dim3(threads), 0, 0, + hipMemset(d_nonce_count, 0, sizeof(int)); + hipLaunchKernelGGL(astrixHash_hip, dim3(blocks), dim3(Astrix_HIP::THREAD_DIM), 0, 0, nonce_mask, nonce_fixed, d_final_nonces, d_nonce_count, i, batchSize, 0); // Check for errors after the kernel launch @@ -380,7 +471,7 @@ void benchAstrixHip() // Calculate throughput float seconds = milliseconds / 1000.0f; - size_t totalComputations = blocks * threads * numRuns; + size_t totalComputations = batchSize * numRuns; float runsPerSecond = totalComputations / seconds; printf("Total time for %d runs: %.3f ms\n", numRuns, milliseconds); diff --git a/src/tnn_hip/crypto/astrix-hash/astrix-hash.hip.h b/src/tnn_hip/crypto/astrix-hash/astrix-hash.hip.h new file mode 100644 index 0000000..50f7a53 --- /dev/null +++ b/src/tnn_hip/crypto/astrix-hash/astrix-hash.hip.h @@ -0,0 +1,34 @@ +#pragma once + +#include + +namespace Astrix_HIP { + constexpr int INPUT_SIZE = 80; + constexpr int MAX_NONCES = 640; + constexpr int THREAD_DIM = 1024; + /** + * Update the Astrix matrix data on the currently active GPU + * @param in A host pointer to the start of the new prePowHash + */ + void newMatrix(uint8_t *in, bool isDev = false); + + /** + * Call the astrixHash_hip kernel with launch parameters provided + * + * @param blocks The amount of blocks to launch the POW kernel with + * @param final_nonces A device pointer to a buffer for storing all found nonces for this POW round + * @param nonce_count A device pointer to a modifiable tally of found nonces for this POW round + * @param kIndex The current run count/index for the current job on the active GPU + */ + void astrixHash_wrapper( + int blocks, + const uint64_t nonce_mask, + const uint64_t nonce_fixed, + uint64_t *final_nonces, + int *nonce_count, + int kIndex, + size_t batchSize, + uint8_t device = 0, + bool isDev = false + ); +} \ No newline at end of file diff --git a/src/tnn_hip/crypto/astrix-hash/astrix_archdef.h b/src/tnn_hip/crypto/astrix-hash/astrix_archdef.h index 314f2ab..424c5c2 100644 --- a/src/tnn_hip/crypto/astrix-hash/astrix_archdef.h +++ b/src/tnn_hip/crypto/astrix-hash/astrix_archdef.h @@ -31,7 +31,7 @@ using ArchDims = std::tuple; // RDNA 3 // RX 7900+ - archDim(HIP_ASTRIX_gfx1100, 61440*256, 1024); + archDim(HIP_ASTRIX_gfx1100, 61440*192, 1024); // RX 7800+ archDim(HIP_ASTRIX_gfx1101, 38400*128, 256); // RX 7700+ @@ -41,7 +41,6 @@ using ArchDims = std::tuple; #elif defined(__HIP_PLATFORM_NVIDIA__) // NVIDIA platform: Define grid dimensions and shared memory size based on architecture - #if defined(__CUDA_ARCH__) #if __CUDA_ARCH__ >= 900 // Hopper archDim(HIP_ASTRIX, 2048, 256); @@ -57,7 +56,6 @@ using ArchDims = std::tuple; #else // Older NVIDIA architectures archDim(HIP_ASTRIX, 2048, 128); #endif - #endif #else #error "Unsupported platform" #endif @@ -65,31 +63,37 @@ using ArchDims = std::tuple; ArchDims defaultDims = {HIP_ASTRIX_BLOCKS, HIP_ASTRIX_THREADS, HIP_ASTRIX_BATCH_SIZE}; // Define a lookup table (map) that holds the block/thread/batch sizes for each architecture -static inline const std::unordered_map archDimsMap = { - ARCH_DIM_ENTRY(HIP_ASTRIX, gfx900), - ARCH_DIM_ENTRY(HIP_ASTRIX, gfx906), - ARCH_DIM_ENTRY(HIP_ASTRIX, gfx1010), - ARCH_DIM_ENTRY(HIP_ASTRIX, gfx1030), - ARCH_DIM_ENTRY(HIP_ASTRIX, gfx1031), - ARCH_DIM_ENTRY(HIP_ASTRIX, gfx1032), - ARCH_DIM_ENTRY(HIP_ASTRIX, gfx1034), - ARCH_DIM_ENTRY(HIP_ASTRIX, gfx1100) -}; +// #if defined(__HIP_PLATFORM_AMD__) +// static inline const std::unordered_map archDimsMap = { +// ARCH_DIM_ENTRY(HIP_ASTRIX, gfx900), +// ARCH_DIM_ENTRY(HIP_ASTRIX, gfx906), +// ARCH_DIM_ENTRY(HIP_ASTRIX, gfx1010), +// ARCH_DIM_ENTRY(HIP_ASTRIX, gfx1030), +// // ARCH_DIM_ENTRY(HIP_ASTRIX, gfx1031), +// // ARCH_DIM_ENTRY(HIP_ASTRIX, gfx1032), +// // ARCH_DIM_ENTRY(HIP_ASTRIX, gfx1034), +// ARCH_DIM_ENTRY(HIP_ASTRIX, gfx1100) +// }; +// #elif defined(__HIP_PLATFORM_NVIDIA__) +// static inline const std::unordered_map archDimsMap = { + +// } +// #endif // Function to retrieve architecture dimensions -static inline void getArchDims(size_t &blocks, size_t &threads, size_t &batchSize) { - int device; - hipGetDevice(&device); +// static inline void getArchDims(size_t &blocks, size_t &threads, size_t &batchSize) { +// int device; +// hipGetDevice(&device); - hipDeviceProp_t props; - hipGetDeviceProperties(&props, device); +// hipDeviceProp_t props; +// hipGetDeviceProperties(&props, device); - const char* archName = props.gcnArchName; +// const char* archName = props.gcnArchName; - // Find the architecture in the lookup table - auto it = archDimsMap.find(std::string(archName)); - const ArchDims& dims = (it != archDimsMap.end()) ? it->second : defaultDims; +// // Find the architecture in the lookup table +// auto it = archDimsMap.find(std::string(archName)); +// const ArchDims& dims = (it != archDimsMap.end()) ? it->second : defaultDims; - // Unpack the tuple into blocks, threads, and batchSize - std::tie(blocks, threads, batchSize) = dims; -} \ No newline at end of file +// // Unpack the tuple into blocks, threads, and batchSize +// std::tie(blocks, threads, batchSize) = dims; +// } \ No newline at end of file diff --git a/src/tnn_hip/crypto/keccak-tiny.hip.inc b/src/tnn_hip/crypto/keccak-tiny.hip.inc index 9730f93..990e450 100644 --- a/src/tnn_hip/crypto/keccak-tiny.hip.inc +++ b/src/tnn_hip/crypto/keccak-tiny.hip.inc @@ -44,7 +44,7 @@ __device__ static const uint64_t RC[24] = \ REPEAT5(e; v += s;) /*** Keccak-f[1600] ***/ -__device__ __forceinline__ static void keccakf(void* state) { +static inline void keccakf(void* state) { uint64_t* a = (uint64_t*)state; uint64_t b[5] = {0}; uint64_t t = 0;