Skip to content

Commit

Permalink
cpu: x64: fix postops handling in brgconv postops kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
kwiersch authored and tprimak committed Jan 9, 2023
1 parent 60f1727 commit d28f2c1
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 3 deletions.
5 changes: 2 additions & 3 deletions src/cpu/x64/jit_brgemm_conv.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2021-2022 Intel Corporation
* Copyright 2021-2023 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -379,8 +379,7 @@ status_t brgemm_convolution_fwd_t<isa, use_inversion>::add_po_kernel(
bcfg->LDD = (is_init && jcp.use_buffer) ? jcp.LDC : jcp.LDD;
bcfg->dt_c = (!is_init && jcp.use_buffer) ? jcp.acc_dt : jcp.dst_dt; // inp
bcfg->dt_d = (is_init && jcp.use_buffer) ? jcp.acc_dt : jcp.dst_dt; // out
bcfg->alpha = (!is_init)
&& (IMPLICATION(jcp.with_sum, jcp.use_buffer) || jcp.with_eltwise);
bcfg->alpha = is_init ? 0 : 1;
bcfg->beta = is_init ? 0 : 1;
CHECK(safe_ptr_assign(kernels_po_[ker_idx],
new jit_brgemm_kernel_post_ops(jcp, *bcfg, *_pd->attr())));
Expand Down
5 changes: 5 additions & 0 deletions src/cpu/x64/jit_brgemm_post_ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,11 @@ struct jit_brgemm_kernel_post_ops : public jit_generator {
// if sum then have to init zmm each time
vpxord(vector(m, n), vector(m, n), vector(m, n));
}
} else if (!IMPLICATION(jcp.with_sum, jcp.use_buffer)) {
if (sum_idx != -1 && brg.beta != 0) {
// if sum without buffer then have to init vmm each time
uni_vpxor(vector(m, n), vector(m, n), vector(m, n));
}
} else {
auto inp_addr = ptr[aux_reg_in
+ inp_typesize_ * (m * brg.LDC + n * brg.ld_block)];
Expand Down

0 comments on commit d28f2c1

Please sign in to comment.