Skip to content

Commit

Permalink
Merge branch 'amd-develop' into amd-master
Browse files Browse the repository at this point in the history
Change-Id: I81429e5f3f55a71498da6cece9d08a8b1c170057
  • Loading branch information
mangupta committed Jan 6, 2017
2 parents 8f31ad6 + 4fd4808 commit 9199f95
Show file tree
Hide file tree
Showing 10 changed files with 1,309 additions and 754 deletions.
9 changes: 7 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ if(HIP_PLATFORM STREQUAL "hcc")
add_to_config(_buildInfo HCC_VERSION)
string(REPLACE "-" ";" HCC_VERSION_LIST ${HCC_VERSION})
list(GET HCC_VERSION_LIST 0 HCC_PACKAGE_VERSION)
string(REPLACE "." ";" HCC_VERSION_LIST ${HCC_PACKAGE_VERSION})
list(GET HCC_VERSION_LIST 0 HCC_VERSION_MAJOR)
list(GET HCC_VERSION_LIST 1 HCC_VERSION_MINOR)

# Determine HSA_PATH
if(NOT DEFINED HSA_PATH)
Expand Down Expand Up @@ -180,9 +183,11 @@ if(HIP_PLATFORM STREQUAL "hcc")
src/hip_fp16.cpp
src/device_functions.cpp)

set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L${HCC_HOME}/lib -lmcwamp -Wl,-Bsymbolic")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L${HCC_HOME}/lib -lmcwamp -Wl,-Bsymbolic -Wl,-rpath ${HCC_HOME}/lib")
add_library(hip_hcc SHARED ${SOURCE_FILES_RUNTIME})
target_link_libraries(hip_hcc c++ c++abi hc_am)
add_library(hip_hcc_static STATIC ${SOURCE_FILES_RUNTIME})
target_link_libraries(hip_hcc_static c++ c++abi hc_am)
add_dependencies(hip_hcc_static hip_hcc)
add_library(hip_device STATIC ${SOURCE_FILES_DEVICE})
add_dependencies(hip_device hip_hcc)
Expand Down Expand Up @@ -269,7 +274,7 @@ add_custom_target(pkg_hip_hcc COMMAND ${CMAKE_COMMAND} .
COMMAND cp *.rpm ${PROJECT_BINARY_DIR}
COMMAND cp *.tar.gz ${PROJECT_BINARY_DIR}
WORKING_DIRECTORY ${BUILD_DIR}
DEPENDS hip_hcc)
DEPENDS hip_hcc hip_device hip_hcc_static)

# Package: hip_nvcc
set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip_nvcc)
Expand Down
6 changes: 3 additions & 3 deletions bin/hipcc
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ my $needHipHcc = ($HIP_PLATFORM eq 'hcc'); # set if we need to link hip_hcc
my $printHipVersion = 0; # print HIP version
my $runCmd = 1;
my $buildDeps = 0;
my $linkType = 0;
my $linkType = 1;
my $setLinkType = 0;

my @options = ();
Expand Down Expand Up @@ -339,9 +339,9 @@ if ($setStdLib eq 0 and $HIP_PLATFORM eq 'hcc')

if ($needHipHcc) {
if ($linkType eq 0) {
substr($HIPLDFLAGS,0,0) = " -L$HIP_PATH/lib -lhip_hcc_static -lhip_device " ;
substr($HIPLDFLAGS,0,0) = " $HIP_PATH/lib/libhip_hcc_static.a $HIP_PATH/lib/libhip_device.a " ;
} else {
substr($HIPLDFLAGS,0,0) = " -L$HIP_PATH/lib -Wl,--rpath=$HIP_PATH/lib -lhip_hcc -lhip_device ";
substr($HIPLDFLAGS,0,0) = " -Wl,--rpath=$HIP_PATH/lib $HIP_PATH/lib/libhip_hcc.so $HIP_PATH/lib/libhip_device.a ";
}
}

Expand Down
24 changes: 24 additions & 0 deletions bin/hipconvertinplace2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash

#usage : hipconvertinplace.sh DIRNAME [hipify options] [--] [clang options]

#hipify "inplace" all code files in specified directory.
# This can be quite handy when dealing with an existing CUDA code base since the script
# preserves the existing directory structure.

SCRIPT_DIR=`dirname $0`
SEARCH_DIR=$1

hipify_args=''
while (( "$#" )); do
shift
if [ "$1" != "--" ]; then
hipify_args="$hipify_args $1"
else
shift
break
fi
done
clang_args="$@"

$SCRIPT_DIR/hipify-clang -inplace -print-stats $hipify_args `$SCRIPT_DIR/findcode.sh $SEARCH_DIR` -- -x cuda $clang_args
22 changes: 22 additions & 0 deletions bin/hipexamine2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

#usage : hipexamine2.sh DIRNAME [hipify options] [--] [clang options]

# Generate CUDA->HIP conversion statistics for all the code files in the specified directory.

SCRIPT_DIR=`dirname $0`
SEARCH_DIR=$1

hipify_args=''
while (( "$#" )); do
shift
if [ "$1" != "--" ]; then
hipify_args="$hipify_args $1"
else
shift
break
fi
done
clang_args="$@"

$SCRIPT_DIR/hipify-clang -examine $hipify_args `$SCRIPT_DIR/findcode.sh $SEARCH_DIR` -- -x cuda $clang_args
1,922 changes: 1,205 additions & 717 deletions hipify-clang/src/Cuda2Hip.cpp

Large diffs are not rendered by default.

5 changes: 0 additions & 5 deletions include/hip/nvcc_detail/hip_runtime_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -770,11 +770,6 @@ inline static hipError_t hipDeviceGetName(char *name,int len,hipDevice_t device)
return hipCUResultTohipError(cuDeviceGetName(name,len,device));
}

inline static hipError_t hipDeviceGetPCIBusId(char* pciBusId,int len,int device)
{
return hipCUDAErrorTohipError(cudaDeviceGetPCIBusId(pciBusId,len,device));
}

inline static hipError_t hipDeviceGetPCIBusId(char* pciBusId,int len,hipDevice_t device)
{
return hipCUResultTohipError(cuDeviceGetPCIBusId(pciBusId,len,device));
Expand Down
21 changes: 16 additions & 5 deletions packaging/hip_hcc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,15 @@ install(FILES @hip_SOURCE_DIR@/src/hip_ir.ll DESTINATION lib)
#############################
set(CPACK_SET_DESTDIR TRUE)
set(CPACK_INSTALL_PREFIX "/opt/rocm/hip")
set(CPACK_PACKAGE_NAME "hip_hcc")
if(@HCC_VERSION_MAJOR@ EQUAL 0)
set(CPACK_PACKAGE_NAME "hip_hcc")
set(HCC_PACKAGE_NAME "hcc_lc")
set(HIP_PACKAGE_CONFLICTS "hip_hcc_exp")
else()
set(CPACK_PACKAGE_NAME "hip_hcc_exp")
set(HCC_PACKAGE_NAME "hcc")
set(HIP_PACKAGE_CONFLICTS "hip_hcc")
endif()
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "HIP: Heterogenous-computing Interface for Portability [HCC]")
set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.")
set(CPACK_PACKAGE_CONTACT "Maneesh Gupta <maneesh.gupta@amd.com>")
Expand All @@ -25,19 +33,22 @@ set(CPACK_GENERATOR "TGZ;DEB;RPM")
set(CPACK_BINARY_DEB "ON")
set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJECT_BINARY_DIR}/postinst;${PROJECT_BINARY_DIR}/prerm")
if(@COMPILE_HIP_ATP_MARKER@)
set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_base (= ${CPACK_PACKAGE_VERSION}), hcc_lc (= @HCC_PACKAGE_VERSION@), rocm-profiler")
set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_base (= ${CPACK_PACKAGE_VERSION}), ${HCC_PACKAGE_NAME} (= @HCC_PACKAGE_VERSION@), rocm-profiler")
else()
set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_base (= ${CPACK_PACKAGE_VERSION}), hcc_lc (= @HCC_PACKAGE_VERSION@)")
set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_base (= ${CPACK_PACKAGE_VERSION}), ${HCC_PACKAGE_NAME} (= @HCC_PACKAGE_VERSION@)")
endif()
set(CPACK_DEBIAN_PACKAGE_CONFLICTS ${HIP_PACKAGE_CONFLICTS})
set(CPACK_DEBIAN_PACKAGE_REPLACES ${HIP_PACKAGE_CONFLICTS})
set(CPACK_BINARY_RPM "ON")
set(CPACK_RPM_PACKAGE_ARCHITECTURE "x86_64")
set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst")
set(CPACK_RPM_PRE_UNINSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/prerm")
set(CPACK_RPM_PACKAGE_AUTOREQPROV " no")
set(CPACK_RPM_PACKAGE_REQUIRES_PREUN ${HIP_PACKAGE_CONFLICTS})
if(@COMPILE_HIP_ATP_MARKER@)
set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, hcc_lc = @HCC_PACKAGE_VERSION@, rocm-profiler")
set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, ${HCC_PACKAGE_NAME} = @HCC_PACKAGE_VERSION@, rocm-profiler")
else()
set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, hcc_lc = @HCC_PACKAGE_VERSION@")
set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, ${HCC_PACKAGE_NAME} = @HCC_PACKAGE_VERSION@")
endif()
set(CPACK_SOURCE_GENERATOR "TGZ")
include(CPack)
18 changes: 13 additions & 5 deletions src/hip_hcc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ THE SOFTWARE.
#define USE_COPY_EXT_V2 1
#endif

#ifndef USE_ROCR_1_4
#define USE_ROCR_1_4 1
#endif

//=================================================================================================
//Global variables:
//=================================================================================================
Expand Down Expand Up @@ -123,7 +127,7 @@ std::vector<ProfTrigger> g_dbStopTriggers;
thread_local hipError_t tls_lastHipError = hipSuccess;


thread_local ShortTid tls_shortTid;
thread_local TidInfo tls_tidInfo;



Expand All @@ -133,8 +137,8 @@ thread_local ShortTid tls_shortTid;
//=================================================================================================
void recordApiTrace(std::string *fullStr, const std::string &apiStr)
{
auto apiSeqNum = tls_shortTid.incApiSeqNum();
auto tid = tls_shortTid.tid();
auto apiSeqNum = tls_tidInfo.apiSeqNum();
auto tid = tls_tidInfo.tid();

if ((tid < g_dbStartTriggers.size()) && (apiSeqNum >= g_dbStartTriggers[tid].nextTrigger())) {
printf ("info: resume profiling at %lu\n", apiSeqNum);
Expand Down Expand Up @@ -214,7 +218,7 @@ hipError_t ihipSynchronize(void)
//=================================================================================================
// ihipStream_t:
//=================================================================================================
ShortTid::ShortTid() :
TidInfo::TidInfo() :
_apiSeqNum(0)
{
_shortTid = g_lastShortTid.fetch_add(1);
Expand Down Expand Up @@ -733,7 +737,11 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop)

// Get Max Threads Per Multiprocessor
uint32_t max_waves_per_cu;
#if USE_ROCR_1_4
err = hsa_agent_get_info(_hsaAgent,(hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU, &max_waves_per_cu);
#else
max_waves_per_cu = 10;
#endif
DeviceErrorCheck(err);
prop-> maxThreadsPerMultiProcessor = prop->warpSize*max_waves_per_cu;

Expand Down Expand Up @@ -1373,7 +1381,7 @@ void ihipPrintKernelLaunch(const char *kernelName, const grid_launch_parm *lp, c
std::stringstream os_pre;
std::stringstream os;
os_pre << "<<hip-api tid:";
os << tls_shortTid.tid() << "." << tls_shortTid.incApiSeqNum()
os << tls_tidInfo.tid() << "." << tls_tidInfo.apiSeqNum()
<< " hipLaunchKernel '" << kernelName << "'"
<< " gridDim:" << lp->grid_dim
<< " groupDim:" << lp->group_dim
Expand Down
16 changes: 9 additions & 7 deletions src/hip_hcc.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,10 @@ extern int HIP_DISABLE_HW_KERNEL_DEP;


// Class to assign a short TID to each new thread, for HIP debugging purposes.
class ShortTid {
class TidInfo {
public:

ShortTid() ;
TidInfo() ;

int tid() const { return _shortTid; };
uint64_t incApiSeqNum() { return ++_apiSeqNum; };
Expand Down Expand Up @@ -106,7 +106,7 @@ struct ProfTrigger {
//---
//Extern tls
extern thread_local hipError_t tls_lastHipError;
extern thread_local ShortTid tls_shortTid;
extern thread_local TidInfo tls_tidInfo;

extern std::vector<ProfTrigger> g_dbStartTriggers;
extern std::vector<ProfTrigger> g_dbStopTriggers;
Expand Down Expand Up @@ -162,7 +162,7 @@ extern const char *API_COLOR_END;


// Compile support for trace markers that are displayed on CodeXL GUI at start/stop of each function boundary.
// TODO - currently we print the trace message at the beginning. if we waited, we could also include return codes, and any values returned
// TODO - currently we print the trace message at the beginning. if we waited, we could also tls_tidInfo return codes, and any values returned
// through ptr-to-args (ie the pointers allocated by hipMalloc).
#if COMPILE_HIP_ATP_MARKER
#include "CXLActivityLogger.h"
Expand All @@ -184,6 +184,7 @@ extern void recordApiTrace(std::string *fullStr, const std::string &apiStr);
#if COMPILE_HIP_ATP_MARKER || (COMPILE_HIP_TRACE_API & 0x1)
#define API_TRACE(...)\
{\
tls_tidInfo.incApiSeqNum();\
if (HIP_PROFILE_API || (COMPILE_HIP_DB && HIP_TRACE_API)) {\
std::string apiStr = std::string(__func__) + " (" + ToString(__VA_ARGS__) + ')';\
std::string fullStr;\
Expand All @@ -194,7 +195,8 @@ extern void recordApiTrace(std::string *fullStr, const std::string &apiStr);
}
#else
// Swallow API_TRACE
#define API_TRACE(...)
#define API_TRACE(...)\
tls_tidInfo.incApiSeqNum();
#endif


Expand All @@ -217,7 +219,7 @@ extern void recordApiTrace(std::string *fullStr, const std::string &apiStr);
tls_lastHipError = localHipStatus;\
\
if ((COMPILE_HIP_TRACE_API & 0x2) && HIP_TRACE_API) {\
fprintf(stderr, " %ship-api tid:%d.%lu %-30s ret=%2d (%s)>>%s\n", (localHipStatus == 0) ? API_COLOR:KRED, tls_shortTid.tid(),tls_shortTid.apiSeqNum(), __func__, localHipStatus, ihipErrorString(localHipStatus), API_COLOR_END);\
fprintf(stderr, " %ship-api tid:%d.%lu %-30s ret=%2d (%s)>>%s\n", (localHipStatus == 0) ? API_COLOR:KRED, tls_tidInfo.tid(),tls_tidInfo.apiSeqNum(), __func__, localHipStatus, ihipErrorString(localHipStatus), API_COLOR_END);\
}\
if (HIP_PROFILE_API) { MARKER_END(); }\
localHipStatus;\
Expand Down Expand Up @@ -258,7 +260,7 @@ static const DbName dbName [] =
if (HIP_DB & (1<<(trace_level))) {\
char msgStr[1000];\
snprintf(msgStr, 2000, __VA_ARGS__);\
fprintf (stderr, " %ship-%s tid:%d:%s%s", dbName[trace_level]._color, dbName[trace_level]._shortName, tls_shortTid.tid(), msgStr, KNRM); \
fprintf (stderr, " %ship-%s tid:%d:%s%s", dbName[trace_level]._color, dbName[trace_level]._shortName, tls_tidInfo.tid(), msgStr, KNRM); \
}\
}
#else
Expand Down
20 changes: 10 additions & 10 deletions src/hip_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -765,10 +765,11 @@ hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset,

// TODO - make member function of stream?
template <typename T>
hc::completion_future
void
ihipMemsetKernel(hipStream_t stream,
LockedAccessor_StreamCrit_t &crit,
T * ptr, T val, size_t sizeBytes)
T * ptr, T val, size_t sizeBytes,
hc::completion_future *cf)
{
int wg = std::min((unsigned)8, stream->getDevice()->_computeUnits);
const int threads_per_wg = 256;
Expand All @@ -782,7 +783,7 @@ ihipMemsetKernel(hipStream_t stream,
hc::extent<1> ext(threads);
auto ext_tile = ext.tile(threads_per_wg);

hc::completion_future cf =
*cf =
hc::parallel_for_each(
crit->_av,
ext_tile,
Expand All @@ -798,7 +799,6 @@ ihipMemsetKernel(hipStream_t stream,
}
});

return cf;
}

// TODO-sync: function is async unless target is pinned host memory - then these are fully sync.
Expand All @@ -819,16 +819,16 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t s
// use a faster dword-per-workitem copy:
try {
value = value & 0xff;
unsigned value32 = (value << 24) | (value << 16) | (value << 8) | (value) ;
cf = ihipMemsetKernel<unsigned> (stream, crit, static_cast<unsigned*> (dst), value32, sizeBytes/sizeof(unsigned));
uint32_t value32 = (value << 24) | (value << 16) | (value << 8) | (value) ;
ihipMemsetKernel<uint32_t> (stream, crit, static_cast<uint32_t*> (dst), value32, sizeBytes/sizeof(uint32_t), &cf);
}
catch (std::exception &ex) {
e = hipErrorInvalidValue;
}
} else {
// use a slow byte-per-workitem copy:
try {
cf = ihipMemsetKernel<char> (stream, crit, static_cast<char*> (dst), value, sizeBytes);
ihipMemsetKernel<char> (stream, crit, static_cast<char*> (dst), value, sizeBytes, &cf);
}
catch (std::exception &ex) {
e = hipErrorInvalidValue;
Expand Down Expand Up @@ -870,16 +870,16 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes )
// use a faster dword-per-workitem copy:
try {
value = value & 0xff;
unsigned value32 = (value << 24) | (value << 16) | (value << 8) | (value) ;
cf = ihipMemsetKernel<unsigned> (stream, crit, static_cast<unsigned*> (dst), value32, sizeBytes/sizeof(unsigned));
uint32_t value32 = (value << 24) | (value << 16) | (value << 8) | (value) ;
ihipMemsetKernel<uint32_t> (stream, crit, static_cast<uint32_t*> (dst), value32, sizeBytes/sizeof(uint32_t), &cf);
}
catch (std::exception &ex) {
e = hipErrorInvalidValue;
}
} else {
// use a slow byte-per-workitem copy:
try {
cf = ihipMemsetKernel<char> (stream, crit, static_cast<char*> (dst), value, sizeBytes);
ihipMemsetKernel<char> (stream, crit, static_cast<char*> (dst), value, sizeBytes, &cf);
}
catch (std::exception &ex) {
e = hipErrorInvalidValue;
Expand Down

0 comments on commit 9199f95

Please sign in to comment.