diff --git a/ch_128.go b/ch_128.go
index 5524ccf..a69c015 100644
--- a/ch_128.go
+++ b/ch_128.go
@@ -11,7 +11,7 @@ func chMurmur(s []byte, seed U128) U128 {
 	c := uint64(0)
 	d := uint64(0)
 	l := length - 16
-	if l <= 0 { // length <= 16
+	if len(s) <= 16 { // length <= 16
 		a = shiftMix(a*k1) * k1
 		c = b*k1 + ch0to16(s, length)
 
@@ -25,18 +25,31 @@ func chMurmur(s []byte, seed U128) U128 {
 		d = ch16(b+uint64(length), c+binary.LittleEndian.Uint64(s[length-16:]))
 		a += d
 
-		for {
-			a ^= shiftMix(binary.LittleEndian.Uint64(s)*k1) * k1
+		{
+			a ^= shiftMix(binary.LittleEndian.Uint64(s[0:8:8])*k1) * k1
 			a *= k1
 			b ^= a
-			c ^= shiftMix(binary.LittleEndian.Uint64(s[8:])*k1) * k1
+			c ^= shiftMix(binary.LittleEndian.Uint64(s[8:8+8:8+8])*k1) * k1
 			c *= k1
 			d ^= c
 			s = s[16:]
 			l -= 16
+		}
 
-			if l <= 0 {
-				break
+		if l > 0 {
+			for len(s) >= 16 {
+				a ^= shiftMix(binary.LittleEndian.Uint64(s[0:8:8])*k1) * k1
+				a *= k1
+				b ^= a
+				c ^= shiftMix(binary.LittleEndian.Uint64(s[8:8+8:8+8])*k1) * k1
+				c *= k1
+				d ^= c
+				s = s[16:]
+				l -= 16
+
+				if l <= 0 {
+					break
+				}
 			}
 		}
 	}
@@ -49,8 +62,8 @@ func chMurmur(s []byte, seed U128) U128 {
 func CH128(s []byte) U128 {
 	if len(s) >= 16 {
 		return CH128Seed(s[16:], U128{
-			Low:  binary.LittleEndian.Uint64(s) ^ k3,
-			High: binary.LittleEndian.Uint64(s[8:]),
+			Low:  binary.LittleEndian.Uint64(s[0:8:8]) ^ k3,
+			High: binary.LittleEndian.Uint64(s[8 : 8+8 : 8+8]),
 		})
 	}
 	if len(s) >= 8 {
@@ -79,17 +92,20 @@ func CH128Seed(s []byte, seed U128) U128 {
 	y := seed.High
 	z := uint64(len(s)) * k1
 
-	v.Low = rot64(y^k1, 49)*k1 + binary.LittleEndian.Uint64(s)
-	v.High = rot64(v.Low, 42)*k1 + binary.LittleEndian.Uint64(s[8:])
-	w.Low = rot64(y+z, 35)*k1 + x
-	w.High = rot64(x+binary.LittleEndian.Uint64(s[88:]), 53) * k1
+	{
+		subSlice := (*[96]byte)(s[0:])
+		v.Low = rot64(y^k1, 49)*k1 + binary.LittleEndian.Uint64(subSlice[0:])
+		v.High = rot64(v.Low, 42)*k1 + binary.LittleEndian.Uint64(subSlice[8:])
+		w.Low = rot64(y+z, 35)*k1 + x
+		w.High = rot64(x+binary.LittleEndian.Uint64(subSlice[88:]), 53) * k1
+	}
 
 	// This is the same inner loop as CH64(), manually unrolled.
 	for len(s) >= 128 {
 		// Roll 1.
 		{
-			x = rot64(x+y+v.Low+binary.LittleEndian.Uint64(s[16:]), 37) * k1
-			y = rot64(y+v.High+binary.LittleEndian.Uint64(s[48:]), 42) * k1
+			x = rot64(x+y+v.Low+binary.LittleEndian.Uint64(s[16:16+8:16+8]), 37) * k1
+			y = rot64(y+v.High+binary.LittleEndian.Uint64(s[48:48+8:48+8]), 42) * k1
 
 			x ^= w.High
 			y ^= v.Low
@@ -103,8 +119,8 @@ func CH128Seed(s []byte, seed U128) U128 {
 		// Roll 2.
 		{
 			const offset = 64
-			x = rot64(x+y+v.Low+binary.LittleEndian.Uint64(s[offset+16:]), 37) * k1
-			y = rot64(y+v.High+binary.LittleEndian.Uint64(s[offset+48:]), 42) * k1
+			x = rot64(x+y+v.Low+binary.LittleEndian.Uint64(s[offset+16:offset+16+8:offset+16+8]), 37) * k1
+			y = rot64(y+v.High+binary.LittleEndian.Uint64(s[offset+48:offset+48+8:offset+48+8]), 42) * k1
 			x ^= w.High
 			y ^= v.Low
 
diff --git a/go.mod b/go.mod
index 34825ba..24c6360 100644
--- a/go.mod
+++ b/go.mod
@@ -1,3 +1,3 @@
 module github.com/go-faster/city
 
-go 1.16
+go 1.17