Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update the SyncDB tests to use multi-threading #2407

Merged
merged 39 commits into from
Nov 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
3ad9b7e
update test to take advantage of multi-threading
JehandadKhan Sep 21, 2023
3712e2e
remove unused header, disabled the test by default
JehandadKhan Sep 21, 2023
4869f3e
addressed review comments
JehandadKhan Sep 25, 2023
e662784
tidy up raw loop
JehandadKhan Oct 2, 2023
dfeb1ea
enable static db check and fix copy bug
JehandadKhan Oct 13, 2023
c03ecb1
Merge branch 'develop' into jd/db_sync_speed
JehandadKhan Oct 13, 2023
7041447
add test suite for all CU variations
JehandadKhan Oct 13, 2023
0480e07
override CU count in TestHandle
JehandadKhan Oct 18, 2023
5dcda99
Merge branch 'develop' into jd/db_sync_speed
junliume Oct 18, 2023
e9d367a
corrections for db_sync weight tensor initialization
cderb Oct 19, 2023
8da7d7a
clang-format-12
cderb Oct 20, 2023
cff9ec2
Merge branch 'develop' into jd/db_sync_speed
cderb Oct 24, 2023
2f1e3a0
update fin build stage
cderb Oct 24, 2023
aaeade3
tidy
cderb Oct 25, 2023
bde1bfd
perf tuning patch
cderb Oct 26, 2023
a5f54a1
mi100 kdb patch
cderb Oct 26, 2023
ca135f4
mi200 kdb patch
cderb Oct 26, 2023
811e899
Merge remote-tracking branch 'origin/develop' into jd/db_sync_speed
cderb Oct 26, 2023
203b764
Merge branch 'jd/db_sync_speed' of https://github.com/ROCmSoftwarePla…
cderb Oct 26, 2023
58b355b
CI update
cderb Oct 26, 2023
bdbb70a
fix args
cderb Oct 26, 2023
3deb1d6
add lfs pull for kdb test
cderb Oct 26, 2023
4a04cfa
add lfs to checkout behavior
cderb Oct 27, 2023
8b83f49
lfs
cderb Oct 27, 2023
90243b8
lfs
cderb Oct 28, 2023
7fd7519
archive ukdb
cderb Oct 30, 2023
9da6369
archive ukdb
cderb Oct 30, 2023
fcef6a3
archive kdb
cderb Oct 30, 2023
ebce26b
cache dir
cderb Oct 30, 2023
c126749
kdb cache
cderb Oct 30, 2023
79a8095
flags
cderb Oct 31, 2023
45bddf9
gfx90a68 kdb update
cderb Nov 1, 2023
5759f32
cleanup
cderb Nov 1, 2023
ce4ec3d
Merge branch 'cderb/db_sync_ci' of https://github.com/ROCmSoftwarePla…
cderb Nov 1, 2023
4d40a5b
disable test_db_sync by default
cderb Nov 1, 2023
bece737
no reboot for clang format
cderb Nov 1, 2023
7f507da
fix test skipping
cderb Nov 2, 2023
79c060a
Merge branch 'develop' into jd/db_sync_speed
cderb Nov 3, 2023
c654780
Merge branch 'develop' into jd/db_sync_speed
cderb Nov 6, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
doxygen \
gdb \
git \
git-lfs \
lbzip2 \
lcov \
libncurses5-dev \
Expand Down
74 changes: 53 additions & 21 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ def buildHipClangJob(Map conf=[:]){

def codecov = conf.get("codecov", false)
def needs_gpu = conf.get("needs_gpu", true)
def lfs_pull = conf.get("lfs_pull", false)

def retimage
gitStatusWrapper(credentialsId: "${env.status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCmSoftwarePlatform', repo: 'MIOpen') {
Expand Down Expand Up @@ -284,6 +285,10 @@ def buildHipClangJob(Map conf=[:]){
withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') {
timeout(time: 150, unit:'MINUTES')
{
if (lfs_pull) {
sh "git lfs pull --exclude="
}

cmake_build(conf)

if (codecov) {
Expand Down Expand Up @@ -586,27 +591,7 @@ pipeline {
| xargs -n 1 -P 1 -I{} -t sh -c \'clang-format-12 -style=file {} | diff - {}\'"
}
steps{
buildHipClangJobAndReboot(setup_cmd: "", build_cmd: "", execute_cmd: execute_cmd, needs_gpu:false)
}
}
stage('Tuna Fin Build Test') {
agent{ label rocmnode("nogpu") }
environment{
setup_cmd = "CXX='/opt/rocm/llvm/bin/clang++' cmake -DCMAKE_PREFIX_PATH=/opt/rocm -DCMAKE_BUILD_TYPE=DEBUG -DMIOPEN_BACKEND=HIPNOGPU -DBUILD_SHARED_LIBS=Off -DMIOPEN_INSTALL_CXX_HEADERS=On .. "
build_cmd = "make -j\$(nproc) "
}
steps{
buildHipClangJobAndReboot(build_fin: "ON", needs_gpu:false, needs_reboot:false, build_install: "true")
}
}
stage('Perf DB Validity Test') {
agent{ label rocmnode("nogpu") }
environment{
fin_flags = "-DMIOPEN_BACKEND=HIPNOGPU" //-DCMAKE_BUILD_TYPE=DEBUG -DBUILD_SHARED_LIBS=Off -DMIOPEN_INSTALL_CXX_HEADERS=On"

}
steps{
CheckPerfDbValid(setup_flags: fin_flags, config_targets: "all", build_fin: "ON", needs_gpu:false, needs_reboot:false, build_install: "true")
buildHipClangJobAndReboot(setup_cmd: "", build_cmd: "", execute_cmd: execute_cmd, needs_gpu:false, needs_reboot:false)
}
}
stage('HipNoGPU Debug Build Test') {
Expand All @@ -623,6 +608,15 @@ pipeline {
buildHipClangJob( build_type: 'debug', setup_flags: HipNoGPU_flags, build_cmd: build_cmd, needs_gpu:false, needs_reboot:false)
}
}
stage('Tuna Fin Build Test') {
agent{ label rocmnode("nogpu") }
environment{
fin_flags = "-DMIOPEN_BACKEND=HIPNOGPU"
}
steps{
buildHipClangJobAndReboot(setup_flags: fin_flags, config_targets: "all", build_fin: "ON", needs_gpu:false, needs_reboot:false, build_install: "true")
}
}
}
}
stage("Smoke Fp32") {
Expand Down Expand Up @@ -918,6 +912,44 @@ pipeline {
Navi21_build_cmd = "LLVM_PATH=/opt/rocm/llvm CTEST_PARALLEL_LEVEL=2 MIOPEN_CONV_PRECISE_ROCBLAS_TIMING=0 MIOPEN_LOG_LEVEL=5 make -j\$(nproc) check"
}
parallel{
stage('dbsync gfx908') {
when {
beforeAgent true
expression { params.TARGET_GFX908 }
}
options {
retry(2)
}
agent{ label rocmnode("gfx908") }
environment{
setup_flags="-DMIOPEN_TEST_DBSYNC=1"
config_targets='test_db_sync'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JehandadKhan What is the purpose of thinks like

config_targets='test_db_sync'

and then

buildHipClangJobAndReboot(... config_targets: config_targets ...)

What if we'll simply

buildHipClangJobAndReboot(... config_targets: 'test_db_sync' ...)

?

execute_cmd='./bin/test_db_sync'
Comment on lines +926 to +927
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatting] Tabs -> spaces

Comment on lines +925 to +927
Copy link
Contributor

@atamazov atamazov Nov 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Recommandation] @JehandadKhan These variables are used only once, and trivially, like setup_flags: setup_flags. So maybe it is better to remove them and write simply setup_flags: "-DMIOPEN_TEST_DBSYNC=1"?

}
steps{
buildHipClangJobAndReboot(lfs_pull: true, setup_flags: setup_flags, config_targets: config_targets, execute_cmd: execute_cmd,
needs_gpu:false, needs_reboot:false, build_install: "true")
Comment on lines +930 to +931
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Recommendation] I would reformat this like

                        buildHipClangJobAndReboot(lfs_pull: true,
                                                  setup_flags: setup_flags,
                                                  config_targets: config_targets,
                                                  execute_cmd: execute_cmd,
                                                  needs_gpu:false,
                                                  needs_reboot:false,
                                                  build_install: "true")

}
}
stage('dbsync gfx90a') {
when {
beforeAgent true
expression { params.TARGET_GFX90A }
}
options {
retry(2)
}
agent{ label rocmnode("gfx90a") }
environment{
setup_flags="-DMIOPEN_TEST_DBSYNC=1"
config_targets='test_db_sync'
execute_cmd='./bin/test_db_sync'
Comment on lines +945 to +946
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[formatting] Tabs -> spaces

}
steps{
buildHipClangJobAndReboot(lfs_pull: true, setup_flags: setup_flags, config_targets: config_targets, execute_cmd: execute_cmd,
needs_gpu:false, needs_reboot:false, build_install: "true")
Comment on lines +949 to +950
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto

}
}
stage('Int8 HIP All Vega20') {
when {
beforeAgent true
Expand Down
2 changes: 2 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -906,7 +906,9 @@ if(NOT MIOPEN_EMBED_DB STREQUAL "")
else()
file(GLOB FIND_DB_FILES kernels/*.fdb.txt)
file(GLOB PERF_DB_FILES kernels/*.db)
file(GLOB KERN_DB_FILES kernels/*.kdb)
list(APPEND FIND_DB_FILES ${PERF_DB_FILES})
list(APPEND FIND_DB_FILES ${KERN_DB_FILES})
if(NOT MIOPEN_DISABLE_SYSDB)
if( NOT ENABLE_ASAN_PACKAGING )
install(FILES
Expand Down
4 changes: 2 additions & 2 deletions src/kernels/gfx908.kdb.bz2
Git LFS file not shown
Binary file modified src/kernels/gfx90878.HIP.fdb.txt.bz2
Binary file not shown.
Binary file modified src/kernels/gfx90878.db.bz2
Binary file not shown.
4 changes: 2 additions & 2 deletions src/kernels/gfx90a.kdb.bz2
Git LFS file not shown
Binary file modified src/kernels/gfx90a68.HIP.fdb.txt.bz2
Binary file not shown.
Binary file modified src/kernels/gfx90a68.db.bz2
Binary file not shown.
Binary file modified src/kernels/gfx90a6e.HIP.fdb.txt.bz2
Binary file not shown.
Binary file modified src/kernels/gfx90a6e.db.bz2
Binary file not shown.
5 changes: 5 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,11 @@ if(MIOPEN_TEST_GFX900 OR MIOPEN_TEST_GFX906 OR MIOPEN_TEST_GFX908)
list(APPEND SKIP_TESTS test_bn_3d_spatial_test)
endif()

#Don't run db verification by default
if(NOT MIOPEN_TEST_DBSYNC)
list(APPEND SKIP_TESTS db_sync)
endif()

# The usage is non-trivial, see function add_test_command.
if(SKIP_TESTS)
list(REMOVE_DUPLICATES SKIP_TESTS)
Expand Down
Loading