Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add binary COPY encoders and decoders and binary timestamp encoders #511

Merged
merged 13 commits into from
Mar 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,9 @@ to convert single values to/from their string representation.
The following PostgreSQL column types are supported by ruby-pg (TE = Text Encoder, TD = Text Decoder, BE = Binary Encoder, BD = Binary Decoder):

* Integer: [TE](rdoc-ref:PG::TextEncoder::Integer), [TD](rdoc-ref:PG::TextDecoder::Integer), [BD](rdoc-ref:PG::BinaryDecoder::Integer) 💡 No links? Switch to [here](https://deveiate.org/code/pg/README_md.html#label-Type+Casts) 💡
* BE: [Int2](rdoc-ref:PG::BinaryEncoder::Int2), [Int4](rdoc-ref:PG::BinaryEncoder::Int4), [Int8](rdoc-ref:PG::BinaryEncoder::Int8)
* BE: [Int2](rdoc-ref:PG::BinaryEncoder::Int2), [Int4](rdoc-ref:PG::BinaryEncoder::Int4), [Int8](rdoc-ref:PG::BinaryEncoder::Int8)
* Float: [TE](rdoc-ref:PG::TextEncoder::Float), [TD](rdoc-ref:PG::TextDecoder::Float), [BD](rdoc-ref:PG::BinaryDecoder::Float)
* BE: [Float4](rdoc-ref:PG::BinaryEncoder::Float4), [Float8](rdoc-ref:PG::BinaryEncoder::Float8)
* Numeric: [TE](rdoc-ref:PG::TextEncoder::Numeric), [TD](rdoc-ref:PG::TextDecoder::Numeric)
* Boolean: [TE](rdoc-ref:PG::TextEncoder::Boolean), [TD](rdoc-ref:PG::TextDecoder::Boolean), [BE](rdoc-ref:PG::BinaryEncoder::Boolean), [BD](rdoc-ref:PG::BinaryDecoder::Boolean)
* String: [TE](rdoc-ref:PG::TextEncoder::String), [TD](rdoc-ref:PG::TextDecoder::String), [BE](rdoc-ref:PG::BinaryEncoder::String), [BD](rdoc-ref:PG::BinaryDecoder::String)
Expand All @@ -135,16 +136,17 @@ The following PostgreSQL column types are supported by ruby-pg (TE = Text Encode
* Timestamp:
* TE: [local](rdoc-ref:PG::TextEncoder::TimestampWithoutTimeZone), [UTC](rdoc-ref:PG::TextEncoder::TimestampUtc), [with-TZ](rdoc-ref:PG::TextEncoder::TimestampWithTimeZone)
* TD: [local](rdoc-ref:PG::TextDecoder::TimestampLocal), [UTC](rdoc-ref:PG::TextDecoder::TimestampUtc), [UTC-to-local](rdoc-ref:PG::TextDecoder::TimestampUtcToLocal)
* BE: [local](rdoc-ref:PG::BinaryEncoder::TimestampLocal), [UTC](rdoc-ref:PG::BinaryEncoder::TimestampUtc)
* BD: [local](rdoc-ref:PG::BinaryDecoder::TimestampLocal), [UTC](rdoc-ref:PG::BinaryDecoder::TimestampUtc), [UTC-to-local](rdoc-ref:PG::BinaryDecoder::TimestampUtcToLocal)
* Date: [TE](rdoc-ref:PG::TextEncoder::Date), [TD](rdoc-ref:PG::TextDecoder::Date)
* JSON and JSONB: [TE](rdoc-ref:PG::TextEncoder::JSON), [TD](rdoc-ref:PG::TextDecoder::JSON)
* Inet: [TE](rdoc-ref:PG::TextEncoder::Inet), [TD](rdoc-ref:PG::TextDecoder::Inet)
* Array: [TE](rdoc-ref:PG::TextEncoder::Array), [TD](rdoc-ref:PG::TextDecoder::Array)
* Composite Type (also called "Row" or "Record"): [TE](rdoc-ref:PG::TextEncoder::Record), [TD](rdoc-ref:PG::TextDecoder::Record)

The following text formats can also be encoded although they are not used as column type:
The following text and binary formats can also be encoded although they are not used as column type:

* COPY input and output data: [TE](rdoc-ref:PG::TextEncoder::CopyRow), [TD](rdoc-ref:PG::TextDecoder::CopyRow)
* COPY input and output data: [TE](rdoc-ref:PG::TextEncoder::CopyRow), [TD](rdoc-ref:PG::TextDecoder::CopyRow), [BE](rdoc-ref:PG::BinaryEncoder::CopyRow), [BD](rdoc-ref:PG::BinaryDecoder::CopyRow)
* Literal for insertion into SQL string: [TE](rdoc-ref:PG::TextEncoder::QuotedLiteral)
* SQL-Identifier: [TE](rdoc-ref:PG::TextEncoder::Identifier), [TD](rdoc-ref:PG::TextDecoder::Identifier)

Expand Down
1 change: 0 additions & 1 deletion ext/pg.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
*
* - PQfreemem -- unnecessary: copied to ruby object, then freed. Ruby object's
* memory is freed when it is garbage collected.
* - PQbinaryTuples -- better to use PQfformat
* - PQprint -- not very useful
* - PQsetdb -- not very useful
* - PQoidStatus -- deprecated, use PQoidValue
Expand Down
2 changes: 1 addition & 1 deletion ext/pg.h
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ int pg_coder_enc_to_s _(( t_pg_coder*, VALUE, c
int pg_text_enc_identifier _(( t_pg_coder*, VALUE, char *, VALUE *, int));
t_pg_coder_enc_func pg_coder_enc_func _(( t_pg_coder* ));
t_pg_coder_dec_func pg_coder_dec_func _(( t_pg_coder*, int ));
void pg_define_coder _(( const char *, void *, VALUE, VALUE ));
VALUE pg_define_coder _(( const char *, void *, VALUE, VALUE ));
VALUE pg_obj_to_i _(( VALUE ));
VALUE pg_tmbc_allocate _(( void ));
void pg_coder_init_encoder _(( VALUE ));
Expand Down
131 changes: 131 additions & 0 deletions ext/pg_binary_encoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,131 @@ pg_bin_enc_int8(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate, i
return 8;
}

/*
* Document-class: PG::BinaryEncoder::Float4 < PG::SimpleEncoder
*
* This is the binary encoder class for the PostgreSQL +float4+ type.
*
*/
static int
pg_bin_enc_float4(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate, int enc_idx)
{
union {
float f;
int32_t i;
} swap4;

if(out){
swap4.f = NUM2DBL(*intermediate);
write_nbo32(swap4.i, out);
}else{
*intermediate = value;
}
return 4;
}

/*
* Document-class: PG::BinaryEncoder::Float8 < PG::SimpleEncoder
*
* This is the binary encoder class for the PostgreSQL +float8+ type.
*
*/
static int
pg_bin_enc_float8(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate, int enc_idx)
{
union {
double f;
int64_t i;
} swap8;

if(out){
swap8.f = NUM2DBL(*intermediate);
write_nbo64(swap8.i, out);
}else{
*intermediate = value;
}
return 8;
}

#define PG_INT64_MIN (-0x7FFFFFFFFFFFFFFFL - 1)
#define PG_INT64_MAX 0x7FFFFFFFFFFFFFFFL

/*
* Document-class: PG::BinaryEncoder::Timestamp < PG::SimpleEncoder
*
* This is a encoder class for conversion of Ruby Time objects to PostgreSQL binary timestamps.
*
* The following flags can be used to specify timezone interpretation:
* * +PG::Coder::TIMESTAMP_DB_UTC+ : Send timestamp as UTC time (default)
* * +PG::Coder::TIMESTAMP_DB_LOCAL+ : Send timestamp as local time (slower)
*
* Example:
* enco = PG::BinaryEncoder::Timestamp.new(flags: PG::Coder::TIMESTAMP_DB_UTC)
* enco.encode(Time.utc(2000, 1, 1)) # => "\x00\x00\x00\x00\x00\x00\x00\x00"
*
* String values are expected to contain a binary data with a length of 8 byte.
*
*/
static int
pg_bin_enc_timestamp(t_pg_coder *this, VALUE value, char *out, VALUE *intermediate, int enc_idx)
{
if(out){
int64_t timestamp;
struct timespec ts;

/* second call -> write data to *out */
switch(TYPE(*intermediate)){
case T_STRING:
return pg_coder_enc_to_s(this, value, out, intermediate, enc_idx);
case T_TRUE:
write_nbo64(PG_INT64_MAX, out);
return 8;
case T_FALSE:
write_nbo64(PG_INT64_MIN, out);
return 8;
}

ts = rb_time_timespec(*intermediate);
/* PostgreSQL's timestamp is based on year 2000 and Ruby's time is based on 1970.
* Adjust the 30 years difference. */
timestamp = (ts.tv_sec - 10957L * 24L * 3600L) * 1000000 + (ts.tv_nsec / 1000);

if( this->flags & PG_CODER_TIMESTAMP_DB_LOCAL ) {
/* send as local time */
timestamp += NUM2LL(rb_funcall(*intermediate, rb_intern("utc_offset"), 0)) * 1000000;
}

write_nbo64(timestamp, out);
}else{
/* first call -> determine the required length */
if(TYPE(value) == T_STRING){
char *pstr = RSTRING_PTR(value);
if(RSTRING_LEN(value) >= 1){
switch(pstr[0]) {
case 'I':
case 'i':
*intermediate = Qtrue;
return 8;
case '-':
if (RSTRING_LEN(value) >= 2 && (pstr[1] == 'I' || pstr[1] == 'i')) {
*intermediate = Qfalse;
return 8;
}
}
}

return pg_coder_enc_to_s(this, value, out, intermediate, enc_idx);
}

if( this->flags & PG_CODER_TIMESTAMP_DB_LOCAL ) {
/* make a local time, so that utc_offset is set */
value = rb_funcall(value, rb_intern("getlocal"), 0);
}
*intermediate = value;
}
return 8;
}

/*
* Document-class: PG::BinaryEncoder::FromBase64 < PG::CompositeEncoder
*
Expand Down Expand Up @@ -153,10 +278,16 @@ init_pg_binary_encoder(void)
pg_define_coder( "Int4", pg_bin_enc_int4, rb_cPG_SimpleEncoder, rb_mPG_BinaryEncoder );
/* dummy = rb_define_class_under( rb_mPG_BinaryEncoder, "Int8", rb_cPG_SimpleEncoder ); */
pg_define_coder( "Int8", pg_bin_enc_int8, rb_cPG_SimpleEncoder, rb_mPG_BinaryEncoder );
/* dummy = rb_define_class_under( rb_mPG_BinaryEncoder, "Float4", rb_cPG_SimpleEncoder ); */
pg_define_coder( "Float4", pg_bin_enc_float4, rb_cPG_SimpleEncoder, rb_mPG_BinaryEncoder );
/* dummy = rb_define_class_under( rb_mPG_BinaryEncoder, "Float8", rb_cPG_SimpleEncoder ); */
pg_define_coder( "Float8", pg_bin_enc_float8, rb_cPG_SimpleEncoder, rb_mPG_BinaryEncoder );
/* dummy = rb_define_class_under( rb_mPG_BinaryEncoder, "String", rb_cPG_SimpleEncoder ); */
pg_define_coder( "String", pg_coder_enc_to_s, rb_cPG_SimpleEncoder, rb_mPG_BinaryEncoder );
/* dummy = rb_define_class_under( rb_mPG_BinaryEncoder, "Bytea", rb_cPG_SimpleEncoder ); */
pg_define_coder( "Bytea", pg_coder_enc_to_s, rb_cPG_SimpleEncoder, rb_mPG_BinaryEncoder );
/* dummy = rb_define_class_under( rb_mPG_BinaryEncoder, "Timestamp", rb_cPG_SimpleEncoder ); */
pg_define_coder( "Timestamp", pg_bin_enc_timestamp, rb_cPG_SimpleEncoder, rb_mPG_BinaryEncoder );

/* dummy = rb_define_class_under( rb_mPG_BinaryEncoder, "FromBase64", rb_cPG_CompositeEncoder ); */
pg_define_coder( "FromBase64", pg_bin_enc_from_base64, rb_cPG_CompositeEncoder, rb_mPG_BinaryEncoder );
Expand Down
3 changes: 2 additions & 1 deletion ext/pg_coder.c
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ static const rb_data_type_t pg_coder_cfunc_type = {
RUBY_TYPED_FREE_IMMEDIATELY,
};

void
VALUE
pg_define_coder( const char *name, void *func, VALUE base_klass, VALUE nsp )
{
VALUE cfunc_obj = TypedData_Wrap_Struct( rb_cObject, &pg_coder_cfunc_type, func );
Expand All @@ -471,6 +471,7 @@ pg_define_coder( const char *name, void *func, VALUE base_klass, VALUE nsp )
rb_define_const( coder_klass, "CFUNC", cfunc_obj );

RB_GC_GUARD(cfunc_obj);
return coder_klass;
}


Expand Down
Loading