Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[improvement] Some performance related changes to evaluate #1426

Merged
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
module github.com/ClickHouse/clickhouse-go/v2

go 1.21
go 1.22.0

toolchain go1.23.1

require (
github.com/ClickHouse/ch-go v0.63.1
Expand All @@ -17,6 +19,7 @@ require (
github.com/stretchr/testify v1.10.0
github.com/testcontainers/testcontainers-go v0.33.0
go.opentelemetry.io/otel/trace v1.26.0
golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c
golang.org/x/net v0.33.0
gopkg.in/yaml.v3 v3.0.1
)
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,8 @@ golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPh
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U=
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c h1:7dEasQXItcW1xKJ2+gg5VOiBnqWrJc+rq0DPKyvvdbY=
golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c/go.mod h1:NQtJDoLvd6faHhE7m4T/1IY708gDefGGjR/iUW8yQQ8=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
Expand Down
2 changes: 1 addition & 1 deletion lib/column/column_gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions lib/column/column_gen_option.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ package column
import "github.com/ClickHouse/ch-go/proto"

// ColStrProvider defines provider of proto.ColStr
type ColStrProvider func() proto.ColStr
type ColStrProvider func(name string) proto.ColStr

// colStrProvider provide proto.ColStr for Column() when type is String
var colStrProvider ColStrProvider = defaultColStrProvider

// defaultColStrProvider defines sample provider for proto.ColStr
func defaultColStrProvider() proto.ColStr {
func defaultColStrProvider(string) proto.ColStr {
return proto.ColStr{}
}

Expand All @@ -35,7 +35,7 @@ func defaultColStrProvider() proto.ColStr {
//
// It is more suitable for scenarios where a lot of data is written in batches
func WithAllocBufferColStrProvider(cap int) {
colStrProvider = func() proto.ColStr {
colStrProvider = func(string) proto.ColStr {
return proto.ColStr{Buf: make([]byte, 0, cap)}
}
}
Expand Down
20 changes: 18 additions & 2 deletions lib/column/enum.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@ import (
"bytes"
"errors"
"math"
"slices"
"strconv"

"github.com/ClickHouse/ch-go/proto"
"golang.org/x/exp/maps"
)

func Enum(chType Type, name string) (Interface, error) {
Expand All @@ -47,19 +49,33 @@ func Enum(chType Type, name string) (Interface, error) {
enum.iv[values[i]] = proto.Enum8(v)
enum.vi[proto.Enum8(v)] = values[i]
}
enum.minEnum = int8(slices.Min(maps.Keys(enum.vi)))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need to create slice of keys (& bring in exp/maps dependency)

Instead you can have one loop over keys (either using plain range enum.vi or range maps.Key(enum.vi) from golang stdlib, not sure if the latter is still too recent in golang for us to use in this library), & calculate min/max at the same time

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree, I can make those changes if the solution in general is accepted (something I wasn't sure about).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just noticed range values loop above, can calc min/max in there

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed this for Enum16, but will wait a bit for the Enum8 case for a response here: https://github.com/ClickHouse/clickhouse-go/pull/1426/files#r1905195485

enum.maxEnum = int8(slices.Max(maps.Keys(enum.vi)))
enum.continuous = (enum.maxEnum-enum.minEnum)+1 == int8(len(enum.vi))
Copy link
Member

@serprex serprex Dec 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For Enum8 non-continuous & continous could both use a 256 bit bitmap (only 32 bytes, could skip heap by being in struct as [4]uint64)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure I understand your suggestion. Do you have some (semi-code) example of how you would use that, and which library? (Since I don't know of any bitmap implementations in the Golang standard library itself.)

Copy link
Member

@serprex serprex Dec 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need to use library, can be done in loop above, something like

var bitset [4]uint64
for i := range values {
  bitIndex := uint8(indexes[i])
  bitset[bitIndex>>6] |= 1 << (bitIndex&63)
}

then when checking existence bitset[bitIndex>>6] & (1 << (bitIndex&63)) != 0

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gave this solution some thought, but I think its defeating the purpose. The whole idea was to skip the map lookup (col.vi[v]) which is the thing causing lots of overhead.
Using the bitmap as you proposed, it still needs to do a map lookup: bitset[bitIndex>>6]. I think whether the map appears on the heap or not doesn't matter.

If you think it does, I can try creating a benchmark and compare the different solutions. But would need to find a bit of time to put in that effort.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe the expense on the CPU would be different between a standard map type and the bitmap. It should be similarly as fast as the integer bounds check.

If the solution works as-is then I'm okay with keeping it, if it's invalid then the server will throw an error. Most clients are already fairly in sync with their data model, especially with enums. I wouldn't want to hold the PR back due to this, we can iterate on it later

Copy link
Member

@serprex serprex Jan 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[4]uint64 isn't a map. Array access will be about as fast as field access on continuous/min/max

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤦‍♂️ of course. Somehow I had a map in my mind due to the bitmap

I'll make the changes accordingly and verify

return &enum, nil
}
enum := Enum16{
iv: make(map[string]proto.Enum16, len(values)),
vi: make(map[proto.Enum16]string, len(values)),
chType: chType,
name: name,
// to be updated below, when ranging over all index/enum values
minEnum: math.MaxInt16,
maxEnum: math.MinInt16,
}

for i := range values {
enum.iv[values[i]] = proto.Enum16(indexes[i])
enum.vi[proto.Enum16(indexes[i])] = values[i]
k := int16(indexes[i])
Dismissed Show dismissed Hide dismissed
enum.iv[values[i]] = proto.Enum16(k)
enum.vi[proto.Enum16(k)] = values[i]
if k < enum.minEnum {
enum.minEnum = k
}
if k > enum.maxEnum {
enum.maxEnum = k
}
}
enum.continuous = (enum.maxEnum-enum.minEnum)+1 == int16(len(enum.vi))
return &enum, nil
}

Expand Down
16 changes: 14 additions & 2 deletions lib/column/enum16.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ type Enum16 struct {
chType Type
col proto.ColEnum16
name string

continuous bool
minEnum int16
maxEnum int16
}

func (col *Enum16) Reset() {
Expand Down Expand Up @@ -179,9 +183,17 @@ func (col *Enum16) Append(v any) (nulls []uint8, err error) {
func (col *Enum16) AppendRow(elem any) error {
switch elem := elem.(type) {
case int16:
return col.AppendRow(int(elem))
if col.continuous && elem >= col.minEnum && elem <= col.maxEnum {
col.col.Append(proto.Enum16(elem))
} else {
return col.AppendRow(int(elem))
}
case *int16:
return col.AppendRow(int(*elem))
if col.continuous && *elem >= col.minEnum && *elem <= col.maxEnum {
col.col.Append(proto.Enum16(*elem))
} else {
return col.AppendRow(int(*elem))
}
case int:
v := proto.Enum16(elem)
_, ok := col.vi[v]
Expand Down
16 changes: 14 additions & 2 deletions lib/column/enum8.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ type Enum8 struct {
chType Type
name string
col proto.ColEnum8

continuous bool
minEnum int8
maxEnum int8
}

func (col *Enum8) Reset() {
Expand Down Expand Up @@ -179,9 +183,17 @@ func (col *Enum8) Append(v any) (nulls []uint8, err error) {
func (col *Enum8) AppendRow(elem any) error {
switch elem := elem.(type) {
case int8:
return col.AppendRow(int(elem))
if col.continuous && elem >= col.minEnum && elem <= col.maxEnum {
col.col.Append(proto.Enum8(elem))
} else {
return col.AppendRow(int(elem))
}
case *int8:
return col.AppendRow(int(*elem))
if col.continuous && *elem >= col.minEnum && *elem <= col.maxEnum {
col.col.Append(proto.Enum8(*elem))
} else {
return col.AppendRow(int(*elem))
}
case int:
v := proto.Enum8(elem)
_, ok := col.vi[v]
Expand Down