-
-
Notifications
You must be signed in to change notification settings - Fork 205
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #7 from CNugteren/development
Update to version 0.2.0
- Loading branch information
Showing
51 changed files
with
2,021 additions
and
272 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,18 @@ | ||
|
||
Version 0.2.0 | ||
- Added support for complex conjugate transpose | ||
- Several host-code performance improvements | ||
- Improved testing infrastructure and coverage | ||
- Added level-2 routines: | ||
* SGEMV/DGEMV/CGEMV/ZGEMV | ||
- Added level-3 routines: | ||
* CGEMM/ZGEMM | ||
* CSYMM/ZSYMM | ||
|
||
Version 0.1.0 | ||
- Initial preview version release to GitHub | ||
- Supported level-1 routines: | ||
SAXPY/DAXPY/CAXPY/ZAXPY | ||
* SAXPY/DAXPY/CAXPY/ZAXPY | ||
- Supported level-3 routines: | ||
SGEMM/DGEMM, SSYMM/DSYMM | ||
* SGEMM/DGEMM | ||
* SSYMM/DSYMM |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
|
||
// ================================================================================================= | ||
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This | ||
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- | ||
// width of 100 characters per line. | ||
// | ||
// Author(s): | ||
// Cedric Nugteren <www.cedricnugteren.nl> | ||
// | ||
// This file populates the database with best-found tuning parameters for the Xgemv kernels. | ||
// | ||
// ================================================================================================= | ||
|
||
namespace clblast { | ||
// ================================================================================================= | ||
|
||
const Database::DatabaseEntry Database::XgemvSingle = { | ||
"Xgemv", Precision::kSingle, { | ||
{ // NVIDIA GPUs | ||
CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { | ||
{ "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, | ||
{ "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, | ||
{ "Tesla K40m", { {"WGS1",256}, {"WPT1",1}, {"WGS2",256}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",4} } }, | ||
} | ||
}, | ||
{ // AMD GPUs | ||
CL_DEVICE_TYPE_GPU, "AMD", { | ||
{ "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, | ||
} | ||
}, | ||
{ // Intel GPUs | ||
CL_DEVICE_TYPE_GPU, "Intel", { | ||
{ "Iris", { {"WGS1",256}, {"WPT1",2}, {"WGS2",64}, {"WPT2",4}, {"VW2",4}, {"WGS3",256}, {"WPT3",2}, {"VW3",8} } }, | ||
} | ||
}, | ||
{ // Default | ||
CL_DEVICE_TYPE_ALL, kDefault, { | ||
{ kDefault, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, | ||
} | ||
}, | ||
} | ||
}; | ||
|
||
// ================================================================================================= | ||
|
||
const Database::DatabaseEntry Database::XgemvDouble = { | ||
"Xgemv", Precision::kDouble, { | ||
{ // NVIDIA GPUs | ||
CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { | ||
{ "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, | ||
{ "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, | ||
{ "Tesla K40m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, | ||
} | ||
}, | ||
{ // AMD GPUs | ||
CL_DEVICE_TYPE_GPU, "AMD", { | ||
{ "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, | ||
} | ||
}, | ||
{ // Intel GPUs | ||
CL_DEVICE_TYPE_GPU, "Intel", { | ||
} | ||
}, | ||
{ // Default | ||
CL_DEVICE_TYPE_ALL, kDefault, { | ||
{ kDefault, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, | ||
} | ||
}, | ||
} | ||
}; | ||
// ================================================================================================= | ||
|
||
const Database::DatabaseEntry Database::XgemvComplexSingle = { | ||
"Xgemv", Precision::kComplexSingle, { | ||
{ // NVIDIA GPUs | ||
CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { | ||
{ "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, | ||
{ "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, | ||
{ "Tesla K40m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, | ||
} | ||
}, | ||
{ // AMD GPUs | ||
CL_DEVICE_TYPE_GPU, "AMD", { | ||
{ "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, | ||
} | ||
}, | ||
{ // Intel GPUs | ||
CL_DEVICE_TYPE_GPU, "Intel", { | ||
{ "Iris", { {"WGS1",256}, {"WPT1",1}, {"WGS2",64}, {"WPT2",4}, {"VW2",2}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, | ||
} | ||
}, | ||
{ // Default | ||
CL_DEVICE_TYPE_ALL, kDefault, { | ||
{ kDefault, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, | ||
} | ||
}, | ||
} | ||
}; | ||
|
||
// ================================================================================================= | ||
|
||
const Database::DatabaseEntry Database::XgemvComplexDouble = { | ||
"Xgemv", Precision::kComplexDouble, { | ||
{ // NVIDIA GPUs | ||
CL_DEVICE_TYPE_GPU, "NVIDIA Corporation", { | ||
{ "GeForce GTX 480", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, | ||
{ "Tesla K20m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, | ||
{ "Tesla K40m", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, | ||
} | ||
}, | ||
{ // AMD GPUs | ||
CL_DEVICE_TYPE_GPU, "AMD", { | ||
{ "Tahiti", { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, | ||
} | ||
}, | ||
{ // Intel GPUs | ||
CL_DEVICE_TYPE_GPU, "Intel", { | ||
} | ||
}, | ||
{ // Default | ||
CL_DEVICE_TYPE_ALL, kDefault, { | ||
{ kDefault, { {"WGS1",64}, {"WPT1",1}, {"WGS2",64}, {"WPT2",1}, {"VW2",1}, {"WGS3",64}, {"WPT3",1}, {"VW3",1} } }, | ||
} | ||
}, | ||
} | ||
}; | ||
|
||
// ================================================================================================= | ||
} // namespace clblast |
Oops, something went wrong.