From 05e9ab9504dfd2063b5fe8e3d0c6a38790f1f619 Mon Sep 17 00:00:00 2001 From: zifeihan Date: Fri, 15 Mar 2024 21:32:09 +0800 Subject: [PATCH] C2_MacroAssembler::arrays_equals --- .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 137 +++++++++++------- .../cpu/riscv/c2_MacroAssembler_riscv.hpp | 7 +- src/hotspot/cpu/riscv/riscv.ad | 33 ++--- 3 files changed, 100 insertions(+), 77 deletions(-) diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index 89d69102d47bd..6c4c3ec78b936 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -1536,74 +1536,111 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2, BLOCK_COMMENT("} string_compare"); } -void C2_MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, - Register tmp4, Register tmp5, Register tmp6, Register result, - Register cnt1, int elem_size) { - Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR; - Register tmp1 = t0; - Register tmp2 = t1; - Register cnt2 = tmp2; // cnt2 only used in array length compare - Register elem_per_word = tmp6; +void C2_MacroAssembler::arrays_equals(Register a1, Register a2, + Register tmp1, Register tmp2, Register tmp3, + Register result, int elem_size) { + assert(elem_size == 1 || elem_size == 2, "must be char or byte"); + assert_different_registers(a1, a2, result, tmp1, tmp2, tmp3, t0); + + int elem_per_word = wordSize/elem_size; int log_elem_size = exact_log2(elem_size); int length_offset = arrayOopDesc::length_offset_in_bytes(); int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); - assert(elem_size == 1 || elem_size == 2, "must be char or byte"); - assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6); - mv(elem_per_word, wordSize / elem_size); + Register cnt1 = tmp3; + Register cnt2 = tmp1; // cnt2 only used in array length compare + Label DONE, SAME; + Label NEXT_WORD, SHORT, TAIL03, TAIL01, A_MIGHT_BE_NULL, A_IS_NOT_NULL; BLOCK_COMMENT("arrays_equals {"); // if (a1 == a2), return true beq(a1, a2, SAME); + // if (a1 == nullptr || a2 == nullptr) + // return false; + // a1 & a2 == 0 means (some-pointer is null) or + // (very-rare-or-even-probably-impossible-pointer-values) + // so, we can save one branch in most cases mv(result, false); - beqz(a1, DONE); - beqz(a2, DONE); + andr(tmp1, a1, a2); + beqz(tmp1, A_MIGHT_BE_NULL); + + // if (a1.length != a2.length) + // return false; + bind(A_IS_NOT_NULL); lwu(cnt1, Address(a1, length_offset)); lwu(cnt2, Address(a2, length_offset)); - bne(cnt2, cnt1, DONE); - beqz(cnt1, SAME); - - slli(tmp5, cnt1, 3 + log_elem_size); - sub(tmp5, zr, tmp5); - add(a1, a1, base_offset); - add(a2, a2, base_offset); - ld(tmp3, Address(a1, 0)); - ld(tmp4, Address(a2, 0)); - ble(cnt1, elem_per_word, SHORT); // short or same - - // Main 16 byte comparison loop with 2 exits - bind(NEXT_DWORD); { - ld(tmp1, Address(a1, wordSize)); - ld(tmp2, Address(a2, wordSize)); - sub(cnt1, cnt1, 2 * wordSize / elem_size); - blez(cnt1, TAIL); - bne(tmp3, tmp4, DONE); - ld(tmp3, Address(a1, 2 * wordSize)); - ld(tmp4, Address(a2, 2 * wordSize)); - add(a1, a1, 2 * wordSize); - add(a2, a2, 2 * wordSize); - ble(cnt1, elem_per_word, TAIL2); - } beq(tmp1, tmp2, NEXT_DWORD); - j(DONE); + bne(cnt1, cnt2, DONE); - bind(TAIL); - xorr(tmp4, tmp3, tmp4); - xorr(tmp2, tmp1, tmp2); - sll(tmp2, tmp2, tmp5); - orr(tmp5, tmp4, tmp2); - j(IS_TMP5_ZR); + la(a1, Address(a1, base_offset)); + la(a2, Address(a2, base_offset)); + // Check for short strings, i.e. smaller than wordSize. + addi(cnt1, cnt1, -elem_per_word); + bltz(cnt1, SHORT); - bind(TAIL2); + // Main 8 byte comparison loop. + bind(NEXT_WORD); { + ld(tmp1, Address(a1)); + ld(tmp2, Address(a2)); + addi(cnt1, cnt1, -elem_per_word); + addi(a1, a1, wordSize); + addi(a2, a2, wordSize); + bne(tmp1, tmp2, DONE); + } bgtz(cnt1, NEXT_WORD); + + // Last longword. In the case where length == 4 we compare the + // same longword twice, but that's still faster than another + // conditional branch. + // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when + // length == 4. + if (log_elem_size > 0) { + slli(cnt1, cnt1, log_elem_size); + } + add(a1, a1, cnt1); + ld(tmp1, Address(a1)); + add(a2, a2, cnt1); + ld(tmp2, Address(a2)); bne(tmp1, tmp2, DONE); + j(SAME); - bind(SHORT); - xorr(tmp4, tmp3, tmp4); - sll(tmp5, tmp4, tmp5); + bind(A_MIGHT_BE_NULL); + // in case both a1 and a2 are not-null, proceed with loads + beqz(a1, DONE); + beqz(a2, DONE); + j(A_IS_NOT_NULL); - bind(IS_TMP5_ZR); - bnez(tmp5, DONE); + bind(SHORT); + addi(cnt1, cnt1, elem_per_word); + test_bit(tmp1, cnt1, 2 - log_elem_size); + beqz(tmp1, TAIL03); // 0-7 bytes left. + { + lwu(tmp1, Address(a1)); + lwu(tmp2, Address(a2)); + bne(tmp1, tmp2, DONE); + addi(a1, a1 ,4); + addi(a2, a2 ,4); + } + bind(TAIL03); + test_bit(tmp1, cnt1, 1 - log_elem_size); + beqz(tmp1, TAIL03); // 0-3 bytes left. + { + lhu(tmp1, Address(a1)); + lhu(tmp2, Address(a2)); + bne(tmp1, tmp2, DONE); + addi(a1, a1 ,2); + addi(a2, a2 ,2); + } + bind(TAIL01); + if (elem_size == 1) { // Only needed when comparing byte arrays. + test_bit(tmp1, cnt1, 0); + beqz(tmp1, SAME); // 0-1 bytes left. + { + lbu(tmp1, a1); + lbu(tmp2, a2); + bne(tmp1, tmp2, DONE); + } + } bind(SAME); mv(result, true); diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp index 150a8e4f8bc50..944025ffb2f4c 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp @@ -79,16 +79,15 @@ int needle_con_cnt, Register result, int ae); void arrays_equals(Register r1, Register r2, - Register tmp3, Register tmp4, - Register tmp5, Register tmp6, - Register result, Register cnt1, - int elem_size); + Register tmp1, Register tmp2, Register tmp3, + Register result, int elem_size); void arrays_hashcode(Register ary, Register cnt, Register result, Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register tmp6, BasicType eltype); + // helper function for arrays_hashcode int arrays_hashcode_elsize(BasicType eltype); void arrays_hashcode_elload(Register dst, Address src, BasicType eltype); diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad index 9e5f233ce17ea..56a2d71bb5f15 100644 --- a/src/hotspot/cpu/riscv/riscv.ad +++ b/src/hotspot/cpu/riscv/riscv.ad @@ -3286,17 +3286,6 @@ operand iRegP_R15() interface(REG_INTER); %} -operand iRegP_R16() -%{ - constraint(ALLOC_IN_RC(r16_reg)); - match(RegP); - // match(iRegP); - match(iRegPNoSp); - op_cost(0); - format %{ %} - interface(REG_INTER); -%} - // Pointer 64 bit Register R28 only operand iRegP_R28() %{ @@ -10336,35 +10325,33 @@ instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, %} instruct array_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, - iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, - iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr) + iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3) %{ predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); match(Set result (AryEq ary1 ary2)); - effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr); + effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3); - format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp5" %} + format %{ "Array Equals $ary1, $ary2 -> $result\t#@array_equalsB // KILL all" %} ins_encode %{ __ arrays_equals($ary1$$Register, $ary2$$Register, - $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, - $result$$Register, $tmp5$$Register, 1); + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, + $result$$Register, 1); %} ins_pipe(pipe_class_memory); %} instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, - iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, - iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr) + iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3) %{ predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); match(Set result (AryEq ary1 ary2)); - effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr); + effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3); - format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp5" %} + format %{ "Array Equals $ary1, $ary2 -> $result\t#@array_equalsC // KILL all" %} ins_encode %{ __ arrays_equals($ary1$$Register, $ary2$$Register, - $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, - $result$$Register, $tmp5$$Register, 2); + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, + $result$$Register, 2); %} ins_pipe(pipe_class_memory); %}