{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":279579416,"defaultBranch":"master","name":"GemmKernels.jl","ownerLogin":"JuliaGPU","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2020-07-14T12:30:23.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/7346142?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"","listCacheKey":"v0:1727438967.0","currentOid":""},"activityList":{"items":[{"before":"0fe0fc039217676d9df55b8a7a392c54a4481af2","after":"07817814f1f08cc867c1c448f1649fce0c91c025","ref":"refs/heads/profile","pushedAt":"2024-09-27T12:28:17.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Fix","shortMessageHtmlLink":"Fix"}},{"before":null,"after":"0fe0fc039217676d9df55b8a7a392c54a4481af2","ref":"refs/heads/profile","pushedAt":"2024-09-27T12:09:27.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Profile","shortMessageHtmlLink":"Profile"}},{"before":"59aec4440e2aa5a6e78a5975d4ba9063c4d020a2","after":null,"ref":"refs/heads/compathelper/new_version/2024-05-08-00-29-36-045-03533719879","pushedAt":"2024-08-26T11:31:52.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"}},{"before":"51836133134c79b7909909e0781d09141eaea2f4","after":"8c894fd6a6739cc6405515f54f8454abcd2787f6","ref":"refs/heads/master","pushedAt":"2024-08-26T11:31:51.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"CompatHelper: bump compat for LLVM to 7, (keep existing compat) (#197)\n\nCo-authored-by: CompatHelper Julia \r\nCo-authored-by: Thomas Faingnaert ","shortMessageHtmlLink":"CompatHelper: bump compat for LLVM to 7, (keep existing compat) (#197)"}},{"before":"c22842bcfcd59bac160945ac129fc1f6ded413ed","after":"59aec4440e2aa5a6e78a5975d4ba9063c4d020a2","ref":"refs/heads/compathelper/new_version/2024-05-08-00-29-36-045-03533719879","pushedAt":"2024-08-26T11:31:36.000Z","pushType":"push","commitsCount":3,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Merge branch 'master' into compathelper/new_version/2024-05-08-00-29-36-045-03533719879","shortMessageHtmlLink":"Merge branch 'master' into compathelper/new_version/2024-05-08-00-29-…"}},{"before":"72344e6aad02049d02ad8672d0fe5ecdde2b754c","after":null,"ref":"refs/heads/compathelper/new_version/2024-08-23-00-39-43-639-03084061231","pushedAt":"2024-08-26T11:30:24.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"}},{"before":"da4f2bfc7a1712372cd95c04afec03ccff342e21","after":"51836133134c79b7909909e0781d09141eaea2f4","ref":"refs/heads/master","pushedAt":"2024-08-26T11:30:22.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"CompatHelper: bump compat for LLVM to 9, (keep existing compat) (#199)\n\nCo-authored-by: CompatHelper Julia ","shortMessageHtmlLink":"CompatHelper: bump compat for LLVM to 9, (keep existing compat) (#199)"}},{"before":null,"after":"72344e6aad02049d02ad8672d0fe5ecdde2b754c","ref":"refs/heads/compathelper/new_version/2024-08-23-00-39-43-639-03084061231","pushedAt":"2024-08-23T00:39:44.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"github-actions[bot]","name":null,"path":"/apps/github-actions","primaryAvatarUrl":"https://avatars.githubusercontent.com/in/15368?s=80&v=4"},"commit":{"message":"CompatHelper: bump compat for LLVM to 9, (keep existing compat)","shortMessageHtmlLink":"CompatHelper: bump compat for LLVM to 9, (keep existing compat)"}},{"before":"1bdddf582187fa7035655bf393cae1a0ef9120fe","after":"bf10d91fbba8100c61c1a5153f4ace70af61c03f","ref":"refs/heads/benchmark-results","pushedAt":"2024-06-28T15:23:56.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"maleadt","name":"Tim Besard","path":"/maleadt","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/383068?s=80&v=4"},"commit":{"message":"Results for da4f2bfc7a1712372cd95c04afec03ccff342e21.","shortMessageHtmlLink":"Results for da4f2bf."}},{"before":"59e109fbd683c79497a623d8f10ae13849b94bd6","after":null,"ref":"refs/heads/compathelper/new_version/2024-06-27-00-37-35-699-03029371406","pushedAt":"2024-06-28T13:59:02.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"maleadt","name":"Tim Besard","path":"/maleadt","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/383068?s=80&v=4"}},{"before":"d3be41cb9c3da5424c29fa7c356412604ef86a5b","after":"da4f2bfc7a1712372cd95c04afec03ccff342e21","ref":"refs/heads/master","pushedAt":"2024-06-28T13:59:00.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"maleadt","name":"Tim Besard","path":"/maleadt","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/383068?s=80&v=4"},"commit":{"message":"CompatHelper: bump compat for LLVM to 8, (keep existing compat) (#198)\n\nCo-authored-by: CompatHelper Julia ","shortMessageHtmlLink":"CompatHelper: bump compat for LLVM to 8, (keep existing compat) (#198)"}},{"before":null,"after":"59e109fbd683c79497a623d8f10ae13849b94bd6","ref":"refs/heads/compathelper/new_version/2024-06-27-00-37-35-699-03029371406","pushedAt":"2024-06-27T00:37:36.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"github-actions[bot]","name":null,"path":"/apps/github-actions","primaryAvatarUrl":"https://avatars.githubusercontent.com/in/15368?s=80&v=4"},"commit":{"message":"CompatHelper: bump compat for LLVM to 8, (keep existing compat)","shortMessageHtmlLink":"CompatHelper: bump compat for LLVM to 8, (keep existing compat)"}},{"before":null,"after":"c22842bcfcd59bac160945ac129fc1f6ded413ed","ref":"refs/heads/compathelper/new_version/2024-05-08-00-29-36-045-03533719879","pushedAt":"2024-05-08T00:29:37.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"github-actions[bot]","name":null,"path":"/apps/github-actions","primaryAvatarUrl":"https://avatars.githubusercontent.com/in/15368?s=80&v=4"},"commit":{"message":"CompatHelper: bump compat for LLVM to 7, (keep existing compat)","shortMessageHtmlLink":"CompatHelper: bump compat for LLVM to 7, (keep existing compat)"}},{"before":"0f8c25d2f3110e0cd02868a5b1508c48d820b172","after":"ff117b357c13ebe43aae8a9a6ff589c0ca9ea2e1","ref":"refs/heads/tf/new-pipelining-kernel","pushedAt":"2024-05-06T12:50:48.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Add new pipelined kernel\n\nAdd alternative pipelining kernel. Compared to the old pipelining\nkernel, the loads/stores are reordered somewhat, and shared memory\nis split in two stages.\n\nThis reduces the number of necessary bar.syncs to 1/3, but\nnecessitates halving the BLOCK_K tile size.","shortMessageHtmlLink":"Add new pipelined kernel"}},{"before":"61052b8fe00b38cfde7bd7ae3881399e4eff5a89","after":"0f8c25d2f3110e0cd02868a5b1508c48d820b172","ref":"refs/heads/tf/new-pipelining-kernel","pushedAt":"2024-05-06T12:43:56.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"TEMP","shortMessageHtmlLink":"TEMP"}},{"before":"f921fa997755b2ed1b28e35f814d94d841488634","after":null,"ref":"refs/heads/tf/cta-swizzle","pushedAt":"2024-05-06T12:41:35.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"}},{"before":"43399267fba813f79571a52aa5718292217734c7","after":"d3be41cb9c3da5424c29fa7c356412604ef86a5b","ref":"refs/heads/master","pushedAt":"2024-05-06T12:41:34.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Add support for CTA swizzling (#195)\n\nApply a swizzling function to the mapping between tiles of the D output\r\nmatrix and the CTA ID. The goal is to maximise the probability that CTAs\r\nthat access the same tile of A/B are scheduled on neighbouring SMs at\r\nthe same time, thereby increasing L2 hit rate.","shortMessageHtmlLink":"Add support for CTA swizzling (#195)"}},{"before":"7d8e3c855388205c54bf61db36110c4d3c60eb6c","after":null,"ref":"refs/heads/tf/zero-shared-c-layout","pushedAt":"2024-05-06T12:41:14.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"}},{"before":"c98fa10dab26afdb5dbe2441ceee173548c290e4","after":"43399267fba813f79571a52aa5718292217734c7","ref":"refs/heads/master","pushedAt":"2024-05-06T12:41:13.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Use a zero layout for C in shared memory if beta=0 (#194)\n\nWhile we already avoid a global load from C in case beta == 0, we still\r\nemit stores to shared memory and loads from shared memory for C.\r\n\r\nInstead, we should also use a zero layout for C in shared memory, which\r\neliminates these extra loads and stores.\r\n\r\nThis does not seem to influence the performance of GEMM, even for small\r\nmatrices, or highly rectangular GEMMs with small K, but it does make a\r\ndifference for some TCs, I've noticed, so let's do this, anyway.","shortMessageHtmlLink":"Use a zero layout for C in shared memory if beta=0 (#194)"}},{"before":"5f29724b7899d2e88c9e61b89049f23bd3bc4eba","after":null,"ref":"refs/heads/tf/fix-inlining-vstorea!","pushedAt":"2024-05-06T12:40:57.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"}},{"before":"26cbc57277781bf2a205fef6bb7470ddb8ad0549","after":"c98fa10dab26afdb5dbe2441ceee173548c290e4","ref":"refs/heads/master","pushedAt":"2024-05-06T12:40:56.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Fix vstorea! not being inlined (#193)\n\nIn some Tensor Contractions, calls to `vstorea!` were not being inlined.\r\nAdding `$(Expr(:meta, :inline))` to `vstorea!` fixes this. For good\r\nmeasure, we might as well add it to `vloada` as well.","shortMessageHtmlLink":"Fix vstorea! not being inlined (#193)"}},{"before":"df973e7b0533b5ae311befb2650aa6936b65a171","after":"f921fa997755b2ed1b28e35f814d94d841488634","ref":"refs/heads/tf/cta-swizzle","pushedAt":"2024-05-02T11:08:37.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Add support for CTA swizzling\n\nApply a swizzling function to the mapping between tiles of the D output\nmatrix and the CTA ID. The goal is to maximise the probability that CTAs\nthat access the same tile of A/B are scheduled on neighbouring SMs at\nthe same time, thereby increasing L2 hit rate.","shortMessageHtmlLink":"Add support for CTA swizzling"}},{"before":null,"after":"df973e7b0533b5ae311befb2650aa6936b65a171","ref":"refs/heads/tf/cta-swizzle","pushedAt":"2024-05-02T11:02:18.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Add support for CTA swizzling\n\nApply a swizzling function to the mapping between tiles of the D output\nmatrix and the CTA ID. The goal is to maximise the probability that CTAs\nthat access the same tile of A/B are scheduled on neighbouring SMs at\nthe same time, thereby increasing L2 hit rate.","shortMessageHtmlLink":"Add support for CTA swizzling"}},{"before":null,"after":"61052b8fe00b38cfde7bd7ae3881399e4eff5a89","ref":"refs/heads/tf/new-pipelining-kernel","pushedAt":"2024-04-26T17:17:26.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"WIP","shortMessageHtmlLink":"WIP"}},{"before":null,"after":"7d8e3c855388205c54bf61db36110c4d3c60eb6c","ref":"refs/heads/tf/zero-shared-c-layout","pushedAt":"2024-04-23T11:13:13.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Use a zero layout for C in shared memory if beta=0\n\nWhile we already avoid a global load from C in case beta == 0, we still\nemit stores to shared memory and loads from shared memory for C.\n\nInstead, we should also use a zero layout for C in shared memory, which\neliminates these extra loads and stores.\n\nThis does not seem to influence the performance of GEMM, even for small\nmatrices, or highly rectangular GEMMs with small K, but it does make a\ndifference for some TCs, I've noticed, so let's do this, anyway.","shortMessageHtmlLink":"Use a zero layout for C in shared memory if beta=0"}},{"before":null,"after":"5f29724b7899d2e88c9e61b89049f23bd3bc4eba","ref":"refs/heads/tf/fix-inlining-vstorea!","pushedAt":"2024-04-23T09:31:40.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Fix vstorea! not being inlined\n\nIn some Tensor Contractions, calls to `vstorea!` were not being inlined.\nAdding `$(Expr(:meta, :inline))` to `vstorea!` fixes this. For good\nmeasure, we might as well add it to `vloada` as well.","shortMessageHtmlLink":"Fix vstorea! not being inlined"}},{"before":"24dff3e0ee9615ca27afa594963ad94addc60f32","after":"1bdddf582187fa7035655bf393cae1a0ef9120fe","ref":"refs/heads/benchmark-results","pushedAt":"2024-04-18T08:33:11.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"maleadt","name":"Tim Besard","path":"/maleadt","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/383068?s=80&v=4"},"commit":{"message":"Results for 26cbc57277781bf2a205fef6bb7470ddb8ad0549.","shortMessageHtmlLink":"Results for 26cbc57."}},{"before":"e3b12f22df09e20c37ff61977fb5ac210e8596a1","after":"26cbc57277781bf2a205fef6bb7470ddb8ad0549","ref":"refs/heads/master","pushedAt":"2024-04-18T07:20:03.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"maleadt","name":"Tim Besard","path":"/maleadt","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/383068?s=80&v=4"},"commit":{"message":"Bump version.","shortMessageHtmlLink":"Bump version."}},{"before":"4afa16d9d408d2b7a6c2e362bafddcf80bbf9759","after":"7230c9fbb288a605a83072b55816603395f49d52","ref":"refs/heads/tf/2024-04-05-profile","pushedAt":"2024-04-06T13:11:47.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Profile all TCs","shortMessageHtmlLink":"Profile all TCs"}},{"before":"55ee2070b72ff3938f4b03c73bd4bac578959293","after":"4afa16d9d408d2b7a6c2e362bafddcf80bbf9759","ref":"refs/heads/tf/2024-04-05-profile","pushedAt":"2024-04-05T13:51:43.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"thomasfaingnaert","name":"Thomas Faingnaert","path":"/thomasfaingnaert","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10748726?s=80&v=4"},"commit":{"message":"Collect all metrics","shortMessageHtmlLink":"Collect all metrics"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"startCursor":"Y3Vyc29yOnYyOpK7MjAyNC0wOS0yN1QxMjoyODoxNy4wMDAwMDBazwAAAATClI07","endCursor":"Y3Vyc29yOnYyOpK7MjAyNC0wNC0wNVQxMzo1MTo0My4wMDAwMDBazwAAAAQo7V8O"}},"title":"Activity · JuliaGPU/GemmKernels.jl"}