diff --git a/nebula-exchange/src/main/scala/com/vesoft/nebula/exchange/utils/NebulaUtils.scala b/nebula-exchange/src/main/scala/com/vesoft/nebula/exchange/utils/NebulaUtils.scala index 203d567c..0b4f7579 100644 --- a/nebula-exchange/src/main/scala/com/vesoft/nebula/exchange/utils/NebulaUtils.scala +++ b/nebula-exchange/src/main/scala/com/vesoft/nebula/exchange/utils/NebulaUtils.scala @@ -5,6 +5,8 @@ package com.vesoft.nebula.exchange.utils +import java.nio.charset.Charset + import com.google.common.primitives.UnsignedLong import com.vesoft.nebula.exchange.{MetaProvider, VidType} import com.vesoft.nebula.exchange.config.{SchemaConfigEntry, Type} @@ -84,7 +86,9 @@ object NebulaUtils { def getPartitionId(id: String, partitionSize: Int, vidType: VidType.Value): Int = { val hashValue: Long = if (vidType == VidType.STRING) { - MurmurHash2.hash64(id.getBytes, id.length, 0xc70f6907) + // todo charset must be the same with Nebula Space + val byteId = id.getBytes(Charset.forName("UTF-8")) + MurmurHash2.hash64(byteId, byteId.length, 0xc70f6907) } else { id.toLong } diff --git a/nebula-exchange/src/test/scala/com/vesoft/nebula/exchange/utils/NebulaUtilsSuite.scala b/nebula-exchange/src/test/scala/com/vesoft/nebula/exchange/utils/NebulaUtilsSuite.scala index 1c5dd675..3f66ac49 100644 --- a/nebula-exchange/src/test/scala/com/vesoft/nebula/exchange/utils/NebulaUtilsSuite.scala +++ b/nebula-exchange/src/test/scala/com/vesoft/nebula/exchange/utils/NebulaUtilsSuite.scala @@ -152,6 +152,15 @@ class NebulaUtilsSuite { assert(NebulaUtils.getPartitionId("-1", 10, VidType.INT) == 6) assert(NebulaUtils.getPartitionId("-2", 10, VidType.INT) == 5) assert(NebulaUtils.getPartitionId("-3", 10, VidType.INT) == 4) + + // for chinese + assert(NebulaUtils.getPartitionId("中文", 10, VidType.STRING) == 5) + assert(NebulaUtils.getPartitionId("北京", 10, VidType.STRING) == 7) + assert(NebulaUtils.getPartitionId("北京123", 10, VidType.STRING) == 1) + assert(NebulaUtils.getPartitionId("北A12ABC", 10, VidType.STRING) == 3) + assert(NebulaUtils.getPartitionId("蒙DPP8EC", 10, VidType.STRING) == 4) + assert(NebulaUtils.getPartitionId("赣F6893_Vehicle", 10, VidType.STRING) == 2) + assert(NebulaUtils.getPartitionId("湘3Z4A1E_vehicle", 10, VidType.STRING) == 1) } @Test