From c13fb881f128971ad38397ad346792b66abffc3c Mon Sep 17 00:00:00 2001 From: Lars Kanis Date: Sat, 14 Jan 2017 19:49:17 +0100 Subject: [PATCH 1/7] Add PG::RowCoder for en/decoding of Composite Types This implements proper encoder and decoder for Composite Types as described in https://www.postgresql.org/docs/11/rowtypes.html and as requested in https://bitbucket.org/ged/ruby-pg/issues/258 This adds the following classes: * PG::RowCoder < PG::Coder * PG::RowEncoder < PG::RowCoder * PG::RowDecoder < PG::RowCoder * PG::TextEncoder::Row < PG::RowEncoder * PG::TextDecoder::Row < PG::RowDecoder Fixes #258 --- Manifest.txt | 1 + ext/pg.c | 1 + ext/pg.h | 1 + ext/pg_row_coder.c | 444 +++++++++++++++++++++++++++++++++++++++++++ lib/pg/coder.rb | 9 +- spec/pg/type_spec.rb | 102 ++++++++++ 6 files changed, 557 insertions(+), 1 deletion(-) create mode 100644 ext/pg_row_coder.c diff --git a/Manifest.txt b/Manifest.txt index 97ced89d8..24532fb77 100644 --- a/Manifest.txt +++ b/Manifest.txt @@ -27,6 +27,7 @@ ext/pg_connection.c ext/pg_copy_coder.c ext/pg_errors.c ext/pg_result.c +ext/pg_row_coder.c ext/pg_text_decoder.c ext/pg_text_encoder.c ext/pg_tuple.c diff --git a/ext/pg.c b/ext/pg.c index b893ab9cd..174d4af0d 100644 --- a/ext/pg.c +++ b/ext/pg.c @@ -635,6 +635,7 @@ Init_pg_ext() init_pg_binary_encoder(); init_pg_binary_decoder(); init_pg_copycoder(); + init_pg_rowcoder(); init_pg_tuple(); } diff --git a/ext/pg.h b/ext/pg.h index 97fd2ac08..aa336c35b 100644 --- a/ext/pg.h +++ b/ext/pg.h @@ -272,6 +272,7 @@ void init_pg_type_map_by_oid _(( void )); void init_pg_type_map_in_ruby _(( void )); void init_pg_coder _(( void )); void init_pg_copycoder _(( void )); +void init_pg_rowcoder _(( void )); void init_pg_text_encoder _(( void )); void init_pg_text_decoder _(( void )); void init_pg_binary_encoder _(( void )); diff --git a/ext/pg_row_coder.c b/ext/pg_row_coder.c new file mode 100644 index 000000000..38039c0a8 --- /dev/null +++ b/ext/pg_row_coder.c @@ -0,0 +1,444 @@ +/* + * pg_row_coder.c - PG::Coder class extension + * + */ + +#include "pg.h" + +VALUE rb_cPG_RowCoder; +VALUE rb_cPG_RowEncoder; +VALUE rb_cPG_RowDecoder; + +typedef struct { + t_pg_coder comp; + VALUE typemap; +} t_pg_rowcoder; + + +static void +pg_rowcoder_mark( t_pg_rowcoder *this ) +{ + rb_gc_mark(this->typemap); +} + +static VALUE +pg_rowcoder_encoder_allocate( VALUE klass ) +{ + t_pg_rowcoder *this; + VALUE self = Data_Make_Struct( klass, t_pg_rowcoder, pg_rowcoder_mark, -1, this ); + pg_coder_init_encoder( self ); + this->typemap = pg_typemap_all_strings; + return self; +} + +static VALUE +pg_rowcoder_decoder_allocate( VALUE klass ) +{ + t_pg_rowcoder *this; + VALUE self = Data_Make_Struct( klass, t_pg_rowcoder, pg_rowcoder_mark, -1, this ); + pg_coder_init_decoder( self ); + this->typemap = pg_typemap_all_strings; + return self; +} + +/* + * call-seq: + * coder.type_map = map + * + * +map+ must be a kind of PG::TypeMap . + * + * Defaults to a PG::TypeMapAllStrings , so that PG::TextEncoder::String respectively + * PG::TextDecoder::String is used for encoding/decoding of all columns. + * + */ +static VALUE +pg_rowcoder_type_map_set(VALUE self, VALUE type_map) +{ + t_pg_rowcoder *this = DATA_PTR( self ); + + if ( !rb_obj_is_kind_of(type_map, rb_cTypeMap) ){ + rb_raise( rb_eTypeError, "wrong elements type %s (expected some kind of PG::TypeMap)", + rb_obj_classname( type_map ) ); + } + this->typemap = type_map; + + return type_map; +} + +/* + * call-seq: + * coder.type_map -> PG::TypeMap + * + */ +static VALUE +pg_rowcoder_type_map_get(VALUE self) +{ + t_pg_rowcoder *this = DATA_PTR( self ); + + return this->typemap; +} + + +/* + * Document-class: PG::TextEncoder::RowRow < PG::RowEncoder + * + * This class encodes one row of arbitrary columns for transmission as COPY data in text format. + * See the {COPY command}[http://www.postgresql.org/docs/current/static/sql-copy.html] + * for description of the format. + * + * It is intended to be used in conjunction with PG::Connection#put_copy_data . + * + * The columns are expected as Array of values. The single values are encoded as defined + * in the assigned #type_map. If no type_map was assigned, all values are converted to + * strings by PG::TextEncoder::String. + * + * Example with default type map ( TypeMapAllStrings ): + * conn.exec "create table my_table (a text,b int,c bool)" + * enco = PG::TextEncoder::RowRow.new + * conn.copy_data "COPY my_table FROM STDIN", enco do + * conn.put_copy_data ["astring", 7, false] + * conn.put_copy_data ["string2", 42, true] + * end + * This creates +my_table+ and inserts two rows. + * + * It is possible to manually assign a type encoder for each column per PG::TypeMapByColumn, + * or to make use of PG::BasicTypeMapBasedOnResult to assign them based on the table OIDs. + * + * See also PG::TextDecoder::RowRow for the decoding direction with + * PG::Connection#get_copy_data . + */ +static int +pg_text_enc_row(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate, int enc_idx) +{ + t_pg_rowcoder *this = (t_pg_rowcoder *)conv; + t_pg_coder_enc_func enc_func; + static t_pg_coder *p_elem_coder; + int i; + t_typemap *p_typemap; + char *current_out; + char *end_capa_ptr; + + p_typemap = DATA_PTR( this->typemap ); + p_typemap->funcs.fit_to_query( this->typemap, value ); + + /* Allocate a new string with embedded capacity and realloc exponential when needed. */ + PG_RB_STR_NEW( *intermediate, current_out, end_capa_ptr ); + PG_ENCODING_SET_NOCHECK(*intermediate, enc_idx); + PG_RB_STR_ENSURE_CAPA( *intermediate, 1, current_out, end_capa_ptr ); + *current_out++ = '('; + + for( i=0; i 0 ){ + PG_RB_STR_ENSURE_CAPA( *intermediate, 1, current_out, end_capa_ptr ); + *current_out++ = ','; + } + + switch(TYPE(entry)){ + case T_NIL: + /* emit nothing... */ + break; + default: + p_elem_coder = p_typemap->funcs.typecast_query_param(p_typemap, entry, i); + enc_func = pg_coder_enc_func(p_elem_coder); + + /* 1st pass for retiving the required memory space */ + strlen = enc_func(p_elem_coder, entry, NULL, &subint, enc_idx); + + if( strlen == -1 ){ + /* we can directly use String value in subint */ + strlen = RSTRING_LEN(subint); + + /* size of string assuming the worst case, that every character must be escaped. */ + PG_RB_STR_ENSURE_CAPA( *intermediate, strlen * 2 + 2, current_out, end_capa_ptr ); + + *current_out++ = '"'; + /* Row string from subint with backslash escaping */ + for(ptr1 = RSTRING_PTR(subint); ptr1 < RSTRING_PTR(subint) + strlen; ptr1++) { + if (*ptr1 == '"' || *ptr1 == '\\') { + *current_out++ = *ptr1; + } + *current_out++ = *ptr1; + } + *current_out++ = '"'; + } else { + /* 2nd pass for writing the data to prepared buffer */ + /* size of string assuming the worst case, that every character must be escaped. */ + PG_RB_STR_ENSURE_CAPA( *intermediate, strlen * 2 + 2, current_out, end_capa_ptr ); + + *current_out++ = '"'; + /* Place the unescaped string at current output position. */ + strlen = enc_func(p_elem_coder, entry, current_out, &subint, enc_idx); + + ptr1 = current_out; + ptr2 = current_out + strlen; + + /* count required backlashs */ + for(backslashs = 0; ptr1 != ptr2; ptr1++) { + /* Escape backslash itself, newline, carriage return, and the current delimiter character. */ + if(*ptr1 == '"' || *ptr1 == '\\'){ + backslashs++; + } + } + + ptr1 = current_out + strlen; + ptr2 = current_out + strlen + backslashs; + current_out = ptr2; + + /* Then store the escaped string on the final position, walking + * right to left, until all backslashs are placed. */ + while( ptr1 != ptr2 ) { + *--ptr2 = *--ptr1; + if(*ptr1 == '"' || *ptr1 == '\\'){ + *--ptr2 = *ptr1; + } + } + *current_out++ = '"'; + } + } + } + PG_RB_STR_ENSURE_CAPA( *intermediate, 1, current_out, end_capa_ptr ); + *current_out++ = ')'; + + rb_str_set_len( *intermediate, current_out - RSTRING_PTR(*intermediate) ); + + return -1; +} + +/* + * row_isspace() --- a non-locale-dependent isspace() + * + * We used to use isspace() for parsing array values, but that has + * undesirable results: an array value might be silently interpreted + * differently depending on the locale setting. Now we just hard-wire + * the traditional ASCII definition of isspace(). + */ +static int +row_isspace(char ch) +{ + if (ch == ' ' || + ch == '\t' || + ch == '\n' || + ch == '\r' || + ch == '\v' || + ch == '\f') + return 1; + return 0; +} + +/* + * Document-class: PG::TextDecoder::RowRow < PG::RowDecoder + * + * This class decodes one row of arbitrary columns received as COPY data in text format. + * See the {COPY command}[http://www.postgresql.org/docs/current/static/sql-copy.html] + * for description of the format. + * + * It is intended to be used in conjunction with PG::Connection#get_copy_data . + * + * The columns are retrieved as Array of values. The single values are decoded as defined + * in the assigned #type_map. If no type_map was assigned, all values are converted to + * strings by PG::TextDecoder::String. + * + * Example with default type map ( TypeMapAllStrings ): + * conn.exec("CREATE TABLE my_table AS VALUES('astring', 7, FALSE), ('string2', 42, TRUE) ") + * + * deco = PG::TextDecoder::RowRow.new + * conn.copy_data "COPY my_table TO STDOUT", deco do + * while row=conn.get_copy_data + * p row + * end + * end + * This prints all rows of +my_table+ : + * ["astring", "7", "f"] + * ["string2", "42", "t"] + * + * Example with column based type map: + * tm = PG::TypeMapByColumn.new( [ + * PG::TextDecoder::String.new, + * PG::TextDecoder::Integer.new, + * PG::TextDecoder::Boolean.new] ) + * deco = PG::TextDecoder::RowRow.new( type_map: tm ) + * conn.copy_data "COPY my_table TO STDOUT", deco do + * while row=conn.get_copy_data + * p row + * end + * end + * This prints the rows with type casted columns: + * ["astring", 7, false] + * ["string2", 42, true] + * + * Instead of manually assigning a type decoder for each column, PG::BasicTypeMapForResults + * can be used to assign them based on the table OIDs. + * + * See also PG::TextEncoder::RowRow for the encoding direction with + * PG::Connection#put_copy_data . + */ +/* + * Parse the current line into separate attributes (fields), + * performing de-escaping as needed. + * + * All fields are gathered into a ruby Array. The de-escaped field data is written + * into to a ruby String. This object is reused for non string columns. + * For String columns the field value is directly used as return value and no + * reuse of the memory is done. + * + * The parser is thankfully borrowed from the PostgreSQL sources: + * src/backend/utils/adt/rowtypes.c + */ +static VALUE +pg_text_dec_row(t_pg_coder *conv, char *input_line, int len, int _tuple, int _field, int enc_idx) +{ + t_pg_rowcoder *this = (t_pg_rowcoder *)conv; + + /* Return value: array */ + VALUE array; + + /* Current field */ + VALUE field_str; + + int fieldno; + int expected_fields; + char *output_ptr; + char *cur_ptr; + char *end_capa_ptr; + t_typemap *p_typemap; + + p_typemap = DATA_PTR( this->typemap ); + expected_fields = p_typemap->funcs.fit_to_copy_get( this->typemap ); + + /* The received input string will probably have this->nfields fields. */ + array = rb_ary_new2(expected_fields); + + /* Allocate a new string with embedded capacity and realloc later with + * exponential growing size when needed. */ + PG_RB_TAINTED_STR_NEW( field_str, output_ptr, end_capa_ptr ); + + /* set pointer variables for loop */ + cur_ptr = input_line; + + /* + * Scan the string. We use "buf" to accumulate the de-quoted data for + * each column, which is then fed to the appropriate input converter. + */ + /* Allow leading whitespace */ + while (*cur_ptr && row_isspace(*cur_ptr)) + cur_ptr++; + if (*cur_ptr++ != '(') + rb_raise( rb_eArgError, "malformed record literal: \"%s\" - Missing left parenthesis.", input_line ); + + for (fieldno = 0; ; fieldno++) + { + /* Check for null: completely empty input means null */ + if (*cur_ptr == ',' || *cur_ptr == ')') + { + rb_ary_push(array, Qnil); + } + else + { + /* Extract string for this column */ + int inquote = 0; + VALUE field_value; + + while (inquote || !(*cur_ptr == ',' || *cur_ptr == ')')) + { + char ch = *cur_ptr++; + + if (ch == '\0') + rb_raise( rb_eArgError, "malformed record literal: \"%s\" - Unexpected end of input.", input_line ); + if (ch == '\\') + { + if (*cur_ptr == '\0') + rb_raise( rb_eArgError, "malformed record literal: \"%s\" - Unexpected end of input.", input_line ); + PG_RB_STR_ENSURE_CAPA( field_str, 1, output_ptr, end_capa_ptr ); + *output_ptr++ = *cur_ptr++; + } + else if (ch == '"') + { + if (!inquote) + inquote = 1; + else if (*cur_ptr == '"') + { + /* doubled quote within quote sequence */ + PG_RB_STR_ENSURE_CAPA( field_str, 1, output_ptr, end_capa_ptr ); + *output_ptr++ = *cur_ptr++; + } + else + inquote = 0; + } else { + PG_RB_STR_ENSURE_CAPA( field_str, 1, output_ptr, end_capa_ptr ); + /* Add ch to output string */ + *output_ptr++ = ch; + } + } + + /* Convert the column value */ + rb_str_set_len( field_str, output_ptr - RSTRING_PTR(field_str) ); + field_value = p_typemap->funcs.typecast_copy_get( p_typemap, field_str, fieldno, 0, enc_idx ); + + rb_ary_push(array, field_value); + + if( field_value == field_str ){ + /* Our output string will be send to the user, so we can not reuse + * it for the next field. */ + PG_RB_TAINTED_STR_NEW( field_str, output_ptr, end_capa_ptr ); + } + /* Reset the pointer to the start of the output/buffer string. */ + output_ptr = RSTRING_PTR(field_str); + } + + /* Skip comma that separates prior field from this one */ + if (*cur_ptr == ',') { + cur_ptr++; + } else if (*cur_ptr == ')') { + cur_ptr++; + /* Done if we hit closing parenthesis */ + break; + } else { + rb_raise( rb_eArgError, "malformed record literal: \"%s\" - Too few columns.", input_line ); + } + } + + /* Allow trailing whitespace */ + while (*cur_ptr && row_isspace(*cur_ptr)) + cur_ptr++; + if (*cur_ptr) + rb_raise( rb_eArgError, "malformed record literal: \"%s\" - Junk after right parenthesis.", input_line ); + + return array; +} + + +void +init_pg_rowcoder() +{ + /* Document-class: PG::RowCoder < PG::Coder + * + * This is the base class for all type cast classes for COPY data, + */ + rb_cPG_RowCoder = rb_define_class_under( rb_mPG, "RowCoder", rb_cPG_Coder ); + rb_define_method( rb_cPG_RowCoder, "type_map=", pg_rowcoder_type_map_set, 1 ); + rb_define_method( rb_cPG_RowCoder, "type_map", pg_rowcoder_type_map_get, 0 ); + + /* Document-class: PG::RowEncoder < PG::RowCoder */ + rb_cPG_RowEncoder = rb_define_class_under( rb_mPG, "RowEncoder", rb_cPG_RowCoder ); + rb_define_alloc_func( rb_cPG_RowEncoder, pg_rowcoder_encoder_allocate ); + /* Document-class: PG::RowDecoder < PG::RowCoder */ + rb_cPG_RowDecoder = rb_define_class_under( rb_mPG, "RowDecoder", rb_cPG_RowCoder ); + rb_define_alloc_func( rb_cPG_RowDecoder, pg_rowcoder_decoder_allocate ); + + /* Make RDoc aware of the encoder classes... */ + /* rb_mPG_TextEncoder = rb_define_module_under( rb_mPG, "TextEncoder" ); */ + /* dummy = rb_define_class_under( rb_mPG_TextEncoder, "RowRow", rb_cPG_RowEncoder ); */ + pg_define_coder( "Row", pg_text_enc_row, rb_cPG_RowEncoder, rb_mPG_TextEncoder ); + /* rb_mPG_TextDecoder = rb_define_module_under( rb_mPG, "TextDecoder" ); */ + /* dummy = rb_define_class_under( rb_mPG_TextDecoder, "RowRow", rb_cPG_RowDecoder ); */ + pg_define_coder( "Row", pg_text_dec_row, rb_cPG_RowDecoder, rb_mPG_TextDecoder ); +} diff --git a/lib/pg/coder.rb b/lib/pg/coder.rb index a2963c948..1b5d79660 100644 --- a/lib/pg/coder.rb +++ b/lib/pg/coder.rb @@ -92,5 +92,12 @@ def to_h }) end end -end # module PG + class RowCoder < Coder + def to_h + super.merge!({ + type_map: type_map, + }) + end + end +end # module PG diff --git a/spec/pg/type_spec.rb b/spec/pg/type_spec.rb index 6e2548fbe..2f4a2fb19 100644 --- a/spec/pg/type_spec.rb +++ b/spec/pg/type_spec.rb @@ -956,4 +956,106 @@ def textdec_timestamptz_decode_should_fail(str) end end end + + describe PG::RowCoder do + describe PG::TextEncoder::Row do + context "with default typemap" do + let!(:encoder) do + PG::TextEncoder::Row.new + end + + it "should encode different types of Ruby objects" do + expect( encoder.encode([:xyz, 123, 2456, 34567, 456789, 5678901, [1,2,3], 12.1, "abcdefg", nil]) ). + to eq('("xyz","123","2456","34567","456789","5678901","[1, 2, 3]","12.1","abcdefg",)') + end + + it 'should output a string with correct character encoding' do + v = encoder.encode(["Héllo"], "iso-8859-1") + expect( v.encoding ).to eq( Encoding::ISO_8859_1 ) + expect( v ).to eq( '("Héllo")'.encode(Encoding::ISO_8859_1) ) + end + end + + context "with TypeMapByClass" do + let!(:tm) do + tm = PG::TypeMapByClass.new + tm[Integer] = textenc_int + tm[Float] = intenc_incrementer + tm[Array] = PG::TextEncoder::Array.new elements_type: textenc_string + tm + end + let!(:encoder) do + PG::TextEncoder::Row.new type_map: tm + end + + it "should have reasonable default values" do + expect( encoder.name ).to be_nil + end + + it "copies all attributes with #dup" do + encoder.name = "test" + encoder.type_map = PG::TypeMapByColumn.new [] + encoder2 = encoder.dup + expect( encoder.object_id ).to_not eq( encoder2.object_id ) + expect( encoder2.name ).to eq( "test" ) + expect( encoder2.type_map ).to be_a_kind_of( PG::TypeMapByColumn ) + end + + describe '#encode' do + it "should encode different types of Ruby objects" do + expect( encoder.encode([]) ).to eq("()") + expect( encoder.encode(["a"]) ).to eq('("a")') + expect( encoder.encode([:xyz, 123, 2456, 34567, 456789, 5678901, [1,2,3], 12.1, "abcdefg", nil]) ). + to eq('("xyz","123","2456","34567","456789","5678901","{1,2,3}","13 ","abcdefg",)') + end + + it "should escape special characters" do + expect( encoder.encode([" \"\t\n\\\r"]) ).to eq("(\" \"\"\t\n##\r\")".gsub("#", "\\")) + end + end + end + end + + describe PG::TextDecoder::Row do + context "with default typemap" do + let!(:decoder) do + PG::TextDecoder::Row.new + end + + describe '#decode' do + it "should decode composite text format to array of strings" do + expect( decoder.decode('("fuzzy dice",,"",42,)') ).to eq( ["fuzzy dice",nil, "", "42", nil] ) + end + + it 'should respect input character encoding' do + v = decoder.decode("(Héllo)".encode("iso-8859-1")).first + expect( v.encoding ).to eq(Encoding::ISO_8859_1) + expect( v ).to eq("Héllo".encode("iso-8859-1")) + end + + it 'should raise an error on malformed input' do + expect{ decoder.decode('') }.to raise_error(ArgumentError, /"" - Missing left parenthesis/) + expect{ decoder.decode('(') }.to raise_error(ArgumentError, /"\(" - Unexpected end of input/) + expect{ decoder.decode('(\\') }.to raise_error(ArgumentError, /"\(\\" - Unexpected end of input/) + expect{ decoder.decode('()x') }.to raise_error(ArgumentError, /"\(\)x" - Junk after right parenthesis/) + end + end + end + + context "with TypeMapByColumn" do + let!(:tm) do + PG::TypeMapByColumn.new [textdec_int, textdec_string, intdec_incrementer, nil] + end + let!(:decoder) do + PG::TextDecoder::Row.new type_map: tm + end + + describe '#decode' do + it "should decode different types of Ruby objects" do + expect( decoder.decode("(123,\" #,#\n#\r#\\ \",234,#\x01#\002)".gsub("#", "\\"))).to eq( [123, " ,\n\r\\ ", 235, "\x01\x02"] ) + end + end + end + end + end end From c8e6ef8ec5515518774439110dbd247cdc8dd31c Mon Sep 17 00:00:00 2001 From: Lars Kanis Date: Sat, 20 Apr 2019 23:35:38 +0200 Subject: [PATCH 2/7] Rename coder for composite types from Row to Record I think this makes the purpose of the en/decoder more clear and distances from CopyRow coders. So this renames the classes to these: * PG::RecordCoder < PG::Coder * PG::RecordEncoder < PG::RecordCoder * PG::RecordDecoder < PG::RecordCoder * PG::TextEncoder::Record < PG::RecordEncoder * PG::TextDecoder::Record < PG::RecordDecoder --- Manifest.txt | 2 +- ext/pg.c | 2 +- ext/pg.h | 2 +- ext/{pg_row_coder.c => pg_record_coder.c} | 112 +++++++++++----------- lib/pg/coder.rb | 2 +- spec/pg/type_spec.rb | 14 +-- 6 files changed, 67 insertions(+), 67 deletions(-) rename ext/{pg_row_coder.c => pg_record_coder.c} (76%) diff --git a/Manifest.txt b/Manifest.txt index 24532fb77..320a61b04 100644 --- a/Manifest.txt +++ b/Manifest.txt @@ -26,8 +26,8 @@ ext/pg_coder.c ext/pg_connection.c ext/pg_copy_coder.c ext/pg_errors.c +ext/pg_record_coder.c ext/pg_result.c -ext/pg_row_coder.c ext/pg_text_decoder.c ext/pg_text_encoder.c ext/pg_tuple.c diff --git a/ext/pg.c b/ext/pg.c index 174d4af0d..bca7c7126 100644 --- a/ext/pg.c +++ b/ext/pg.c @@ -635,7 +635,7 @@ Init_pg_ext() init_pg_binary_encoder(); init_pg_binary_decoder(); init_pg_copycoder(); - init_pg_rowcoder(); + init_pg_recordcoder(); init_pg_tuple(); } diff --git a/ext/pg.h b/ext/pg.h index aa336c35b..f61d8a2ef 100644 --- a/ext/pg.h +++ b/ext/pg.h @@ -272,7 +272,7 @@ void init_pg_type_map_by_oid _(( void )); void init_pg_type_map_in_ruby _(( void )); void init_pg_coder _(( void )); void init_pg_copycoder _(( void )); -void init_pg_rowcoder _(( void )); +void init_pg_recordcoder _(( void )); void init_pg_text_encoder _(( void )); void init_pg_text_decoder _(( void )); void init_pg_binary_encoder _(( void )); diff --git a/ext/pg_row_coder.c b/ext/pg_record_coder.c similarity index 76% rename from ext/pg_row_coder.c rename to ext/pg_record_coder.c index 38039c0a8..8991cce3b 100644 --- a/ext/pg_row_coder.c +++ b/ext/pg_record_coder.c @@ -1,41 +1,41 @@ /* - * pg_row_coder.c - PG::Coder class extension + * pg_record_coder.c - PG::Coder class extension * */ #include "pg.h" -VALUE rb_cPG_RowCoder; -VALUE rb_cPG_RowEncoder; -VALUE rb_cPG_RowDecoder; +VALUE rb_cPG_RecordCoder; +VALUE rb_cPG_RecordEncoder; +VALUE rb_cPG_RecordDecoder; typedef struct { t_pg_coder comp; VALUE typemap; -} t_pg_rowcoder; +} t_pg_recordcoder; static void -pg_rowcoder_mark( t_pg_rowcoder *this ) +pg_recordcoder_mark( t_pg_recordcoder *this ) { rb_gc_mark(this->typemap); } static VALUE -pg_rowcoder_encoder_allocate( VALUE klass ) +pg_recordcoder_encoder_allocate( VALUE klass ) { - t_pg_rowcoder *this; - VALUE self = Data_Make_Struct( klass, t_pg_rowcoder, pg_rowcoder_mark, -1, this ); + t_pg_recordcoder *this; + VALUE self = Data_Make_Struct( klass, t_pg_recordcoder, pg_recordcoder_mark, -1, this ); pg_coder_init_encoder( self ); this->typemap = pg_typemap_all_strings; return self; } static VALUE -pg_rowcoder_decoder_allocate( VALUE klass ) +pg_recordcoder_decoder_allocate( VALUE klass ) { - t_pg_rowcoder *this; - VALUE self = Data_Make_Struct( klass, t_pg_rowcoder, pg_rowcoder_mark, -1, this ); + t_pg_recordcoder *this; + VALUE self = Data_Make_Struct( klass, t_pg_recordcoder, pg_recordcoder_mark, -1, this ); pg_coder_init_decoder( self ); this->typemap = pg_typemap_all_strings; return self; @@ -52,9 +52,9 @@ pg_rowcoder_decoder_allocate( VALUE klass ) * */ static VALUE -pg_rowcoder_type_map_set(VALUE self, VALUE type_map) +pg_recordcoder_type_map_set(VALUE self, VALUE type_map) { - t_pg_rowcoder *this = DATA_PTR( self ); + t_pg_recordcoder *this = DATA_PTR( self ); if ( !rb_obj_is_kind_of(type_map, rb_cTypeMap) ){ rb_raise( rb_eTypeError, "wrong elements type %s (expected some kind of PG::TypeMap)", @@ -71,18 +71,18 @@ pg_rowcoder_type_map_set(VALUE self, VALUE type_map) * */ static VALUE -pg_rowcoder_type_map_get(VALUE self) +pg_recordcoder_type_map_get(VALUE self) { - t_pg_rowcoder *this = DATA_PTR( self ); + t_pg_recordcoder *this = DATA_PTR( self ); return this->typemap; } /* - * Document-class: PG::TextEncoder::RowRow < PG::RowEncoder + * Document-class: PG::TextEncoder::Record < PG::RecordEncoder * - * This class encodes one row of arbitrary columns for transmission as COPY data in text format. + * This class encodes one record of arbitrary columns for transmission as COPY data in text format. * See the {COPY command}[http://www.postgresql.org/docs/current/static/sql-copy.html] * for description of the format. * @@ -94,23 +94,23 @@ pg_rowcoder_type_map_get(VALUE self) * * Example with default type map ( TypeMapAllStrings ): * conn.exec "create table my_table (a text,b int,c bool)" - * enco = PG::TextEncoder::RowRow.new + * enco = PG::TextEncoder::Record.new * conn.copy_data "COPY my_table FROM STDIN", enco do * conn.put_copy_data ["astring", 7, false] * conn.put_copy_data ["string2", 42, true] * end - * This creates +my_table+ and inserts two rows. + * This creates +my_table+ and inserts two records. * * It is possible to manually assign a type encoder for each column per PG::TypeMapByColumn, * or to make use of PG::BasicTypeMapBasedOnResult to assign them based on the table OIDs. * - * See also PG::TextDecoder::RowRow for the decoding direction with + * See also PG::TextDecoder::Record for the decoding direction with * PG::Connection#get_copy_data . */ static int -pg_text_enc_row(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate, int enc_idx) +pg_text_enc_record(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate, int enc_idx) { - t_pg_rowcoder *this = (t_pg_rowcoder *)conv; + t_pg_recordcoder *this = (t_pg_recordcoder *)conv; t_pg_coder_enc_func enc_func; static t_pg_coder *p_elem_coder; int i; @@ -161,7 +161,7 @@ pg_text_enc_row(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate, i PG_RB_STR_ENSURE_CAPA( *intermediate, strlen * 2 + 2, current_out, end_capa_ptr ); *current_out++ = '"'; - /* Row string from subint with backslash escaping */ + /* Record string from subint with backslash escaping */ for(ptr1 = RSTRING_PTR(subint); ptr1 < RSTRING_PTR(subint) + strlen; ptr1++) { if (*ptr1 == '"' || *ptr1 == '\\') { *current_out++ = *ptr1; @@ -214,7 +214,7 @@ pg_text_enc_row(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate, i } /* - * row_isspace() --- a non-locale-dependent isspace() + * record_isspace() --- a non-locale-dependent isspace() * * We used to use isspace() for parsing array values, but that has * undesirable results: an array value might be silently interpreted @@ -222,7 +222,7 @@ pg_text_enc_row(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate, i * the traditional ASCII definition of isspace(). */ static int -row_isspace(char ch) +record_isspace(char ch) { if (ch == ' ' || ch == '\t' || @@ -235,9 +235,9 @@ row_isspace(char ch) } /* - * Document-class: PG::TextDecoder::RowRow < PG::RowDecoder + * Document-class: PG::TextDecoder::Record < PG::RecordDecoder * - * This class decodes one row of arbitrary columns received as COPY data in text format. + * This class decodes one record of arbitrary columns received as COPY data in text format. * See the {COPY command}[http://www.postgresql.org/docs/current/static/sql-copy.html] * for description of the format. * @@ -250,13 +250,13 @@ row_isspace(char ch) * Example with default type map ( TypeMapAllStrings ): * conn.exec("CREATE TABLE my_table AS VALUES('astring', 7, FALSE), ('string2', 42, TRUE) ") * - * deco = PG::TextDecoder::RowRow.new + * deco = PG::TextDecoder::Record.new * conn.copy_data "COPY my_table TO STDOUT", deco do - * while row=conn.get_copy_data - * p row + * while record=conn.get_copy_data + * p record * end * end - * This prints all rows of +my_table+ : + * This prints all records of +my_table+ : * ["astring", "7", "f"] * ["string2", "42", "t"] * @@ -265,20 +265,20 @@ row_isspace(char ch) * PG::TextDecoder::String.new, * PG::TextDecoder::Integer.new, * PG::TextDecoder::Boolean.new] ) - * deco = PG::TextDecoder::RowRow.new( type_map: tm ) + * deco = PG::TextDecoder::Record.new( type_map: tm ) * conn.copy_data "COPY my_table TO STDOUT", deco do - * while row=conn.get_copy_data - * p row + * while record=conn.get_copy_data + * p record * end * end - * This prints the rows with type casted columns: + * This prints the records with type casted columns: * ["astring", 7, false] * ["string2", 42, true] * * Instead of manually assigning a type decoder for each column, PG::BasicTypeMapForResults * can be used to assign them based on the table OIDs. * - * See also PG::TextEncoder::RowRow for the encoding direction with + * See also PG::TextEncoder::Record for the encoding direction with * PG::Connection#put_copy_data . */ /* @@ -294,9 +294,9 @@ row_isspace(char ch) * src/backend/utils/adt/rowtypes.c */ static VALUE -pg_text_dec_row(t_pg_coder *conv, char *input_line, int len, int _tuple, int _field, int enc_idx) +pg_text_dec_record(t_pg_coder *conv, char *input_line, int len, int _tuple, int _field, int enc_idx) { - t_pg_rowcoder *this = (t_pg_rowcoder *)conv; + t_pg_recordcoder *this = (t_pg_recordcoder *)conv; /* Return value: array */ VALUE array; @@ -329,7 +329,7 @@ pg_text_dec_row(t_pg_coder *conv, char *input_line, int len, int _tuple, int _fi * each column, which is then fed to the appropriate input converter. */ /* Allow leading whitespace */ - while (*cur_ptr && row_isspace(*cur_ptr)) + while (*cur_ptr && record_isspace(*cur_ptr)) cur_ptr++; if (*cur_ptr++ != '(') rb_raise( rb_eArgError, "malformed record literal: \"%s\" - Missing left parenthesis.", input_line ); @@ -407,7 +407,7 @@ pg_text_dec_row(t_pg_coder *conv, char *input_line, int len, int _tuple, int _fi } /* Allow trailing whitespace */ - while (*cur_ptr && row_isspace(*cur_ptr)) + while (*cur_ptr && record_isspace(*cur_ptr)) cur_ptr++; if (*cur_ptr) rb_raise( rb_eArgError, "malformed record literal: \"%s\" - Junk after right parenthesis.", input_line ); @@ -417,28 +417,28 @@ pg_text_dec_row(t_pg_coder *conv, char *input_line, int len, int _tuple, int _fi void -init_pg_rowcoder() +init_pg_recordcoder() { - /* Document-class: PG::RowCoder < PG::Coder + /* Document-class: PG::RecordCoder < PG::Coder * * This is the base class for all type cast classes for COPY data, */ - rb_cPG_RowCoder = rb_define_class_under( rb_mPG, "RowCoder", rb_cPG_Coder ); - rb_define_method( rb_cPG_RowCoder, "type_map=", pg_rowcoder_type_map_set, 1 ); - rb_define_method( rb_cPG_RowCoder, "type_map", pg_rowcoder_type_map_get, 0 ); + rb_cPG_RecordCoder = rb_define_class_under( rb_mPG, "RecordCoder", rb_cPG_Coder ); + rb_define_method( rb_cPG_RecordCoder, "type_map=", pg_recordcoder_type_map_set, 1 ); + rb_define_method( rb_cPG_RecordCoder, "type_map", pg_recordcoder_type_map_get, 0 ); - /* Document-class: PG::RowEncoder < PG::RowCoder */ - rb_cPG_RowEncoder = rb_define_class_under( rb_mPG, "RowEncoder", rb_cPG_RowCoder ); - rb_define_alloc_func( rb_cPG_RowEncoder, pg_rowcoder_encoder_allocate ); - /* Document-class: PG::RowDecoder < PG::RowCoder */ - rb_cPG_RowDecoder = rb_define_class_under( rb_mPG, "RowDecoder", rb_cPG_RowCoder ); - rb_define_alloc_func( rb_cPG_RowDecoder, pg_rowcoder_decoder_allocate ); + /* Document-class: PG::RecordEncoder < PG::RecordCoder */ + rb_cPG_RecordEncoder = rb_define_class_under( rb_mPG, "RecordEncoder", rb_cPG_RecordCoder ); + rb_define_alloc_func( rb_cPG_RecordEncoder, pg_recordcoder_encoder_allocate ); + /* Document-class: PG::RecordDecoder < PG::RecordCoder */ + rb_cPG_RecordDecoder = rb_define_class_under( rb_mPG, "RecordDecoder", rb_cPG_RecordCoder ); + rb_define_alloc_func( rb_cPG_RecordDecoder, pg_recordcoder_decoder_allocate ); /* Make RDoc aware of the encoder classes... */ /* rb_mPG_TextEncoder = rb_define_module_under( rb_mPG, "TextEncoder" ); */ - /* dummy = rb_define_class_under( rb_mPG_TextEncoder, "RowRow", rb_cPG_RowEncoder ); */ - pg_define_coder( "Row", pg_text_enc_row, rb_cPG_RowEncoder, rb_mPG_TextEncoder ); + /* dummy = rb_define_class_under( rb_mPG_TextEncoder, "Record", rb_cPG_RecordEncoder ); */ + pg_define_coder( "Record", pg_text_enc_record, rb_cPG_RecordEncoder, rb_mPG_TextEncoder ); /* rb_mPG_TextDecoder = rb_define_module_under( rb_mPG, "TextDecoder" ); */ - /* dummy = rb_define_class_under( rb_mPG_TextDecoder, "RowRow", rb_cPG_RowDecoder ); */ - pg_define_coder( "Row", pg_text_dec_row, rb_cPG_RowDecoder, rb_mPG_TextDecoder ); + /* dummy = rb_define_class_under( rb_mPG_TextDecoder, "Record", rb_cPG_RecordDecoder ); */ + pg_define_coder( "Record", pg_text_dec_record, rb_cPG_RecordDecoder, rb_mPG_TextDecoder ); } diff --git a/lib/pg/coder.rb b/lib/pg/coder.rb index 1b5d79660..dca1005b5 100644 --- a/lib/pg/coder.rb +++ b/lib/pg/coder.rb @@ -93,7 +93,7 @@ def to_h end end - class RowCoder < Coder + class RecordCoder < Coder def to_h super.merge!({ type_map: type_map, diff --git a/spec/pg/type_spec.rb b/spec/pg/type_spec.rb index 2f4a2fb19..550d89c70 100644 --- a/spec/pg/type_spec.rb +++ b/spec/pg/type_spec.rb @@ -957,11 +957,11 @@ def textdec_timestamptz_decode_should_fail(str) end end - describe PG::RowCoder do - describe PG::TextEncoder::Row do + describe PG::RecordCoder do + describe PG::TextEncoder::Record do context "with default typemap" do let!(:encoder) do - PG::TextEncoder::Row.new + PG::TextEncoder::Record.new end it "should encode different types of Ruby objects" do @@ -985,7 +985,7 @@ def textdec_timestamptz_decode_should_fail(str) tm end let!(:encoder) do - PG::TextEncoder::Row.new type_map: tm + PG::TextEncoder::Record.new type_map: tm end it "should have reasonable default values" do @@ -1016,10 +1016,10 @@ def textdec_timestamptz_decode_should_fail(str) end end - describe PG::TextDecoder::Row do + describe PG::TextDecoder::Record do context "with default typemap" do let!(:decoder) do - PG::TextDecoder::Row.new + PG::TextDecoder::Record.new end describe '#decode' do @@ -1047,7 +1047,7 @@ def textdec_timestamptz_decode_should_fail(str) PG::TypeMapByColumn.new [textdec_int, textdec_string, intdec_incrementer, nil] end let!(:decoder) do - PG::TextDecoder::Row.new type_map: tm + PG::TextDecoder::Record.new type_map: tm end describe '#decode' do From bb0875a3b1435e4358d5b6ae5dfafb5bda68cbe5 Mon Sep 17 00:00:00 2001 From: Lars Kanis Date: Sat, 20 Apr 2019 23:46:02 +0200 Subject: [PATCH 3/7] Add PG::BasicTypeRegistry#register_coder to register instances instead of classes --- lib/pg/basic_type_mapping.rb | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/pg/basic_type_mapping.rb b/lib/pg/basic_type_mapping.rb index d7b36155f..9fcc14b5e 100644 --- a/lib/pg/basic_type_mapping.rb +++ b/lib/pg/basic_type_mapping.rb @@ -154,14 +154,20 @@ def check_format_and_direction(format, direction) # objects as values. CODERS_BY_NAME = [] + def self.register_coder(coder) + h = CODERS_BY_NAME[coder.format] ||= { encoder: {}, decoder: {} } + name = coder.name || raise(ArgumentError, "name of #{coder.inspect} must be defined") + h[:encoder][name] = coder if coder.respond_to?(:encode) + h[:decoder][name] = coder if coder.respond_to?(:decode) + end + # Register an OID type named +name+ with a typecasting encoder and decoder object in # +type+. +name+ should correspond to the `typname` column in # the `pg_type` table. # +format+ can be 0 for text format and 1 for binary. def self.register_type(format, name, encoder_class, decoder_class) - CODERS_BY_NAME[format] ||= { encoder: {}, decoder: {} } - CODERS_BY_NAME[format][:encoder][name] = encoder_class.new(name: name, format: format) if encoder_class - CODERS_BY_NAME[format][:decoder][name] = decoder_class.new(name: name, format: format) if decoder_class + register_coder(encoder_class.new(name: name, format: format)) if encoder_class + register_coder(decoder_class.new(name: name, format: format)) if decoder_class end # Alias the +old+ type to the +new+ type. From e629ef63a7a5fa74af5db727c6c588d9d7054ddc Mon Sep 17 00:00:00 2001 From: Lars Kanis Date: Sun, 29 Sep 2019 16:09:16 +0200 Subject: [PATCH 4/7] Add different ways to interpret Array params to BasicTypeMapForQueries ... and add documentation to record encoder and decoder. --- ext/pg_record_coder.c | 106 ++++++++++++++++------------- lib/pg/basic_type_mapping.rb | 43 +++++++++++- spec/pg/basic_type_mapping_spec.rb | 67 +++++++++++++++++- 3 files changed, 162 insertions(+), 54 deletions(-) diff --git a/ext/pg_record_coder.c b/ext/pg_record_coder.c index 8991cce3b..20fc6915b 100644 --- a/ext/pg_record_coder.c +++ b/ext/pg_record_coder.c @@ -45,10 +45,11 @@ pg_recordcoder_decoder_allocate( VALUE klass ) * call-seq: * coder.type_map = map * + * Defines how single columns are encoded or decoded. * +map+ must be a kind of PG::TypeMap . * * Defaults to a PG::TypeMapAllStrings , so that PG::TextEncoder::String respectively - * PG::TextDecoder::String is used for encoding/decoding of all columns. + * PG::TextDecoder::String is used for encoding/decoding of each column. * */ static VALUE @@ -69,6 +70,7 @@ pg_recordcoder_type_map_set(VALUE self, VALUE type_map) * call-seq: * coder.type_map -> PG::TypeMap * + * The PG::TypeMap that will be used for encoding and decoding of columns. */ static VALUE pg_recordcoder_type_map_get(VALUE self) @@ -82,15 +84,18 @@ pg_recordcoder_type_map_get(VALUE self) /* * Document-class: PG::TextEncoder::Record < PG::RecordEncoder * - * This class encodes one record of arbitrary columns for transmission as COPY data in text format. - * See the {COPY command}[http://www.postgresql.org/docs/current/static/sql-copy.html] - * for description of the format. + * This class encodes one record of columns for transmission as query parameter in text format. + * See PostgreSQL {Composite Types}[https://www.postgresql.org/docs/current/rowtypes.html] for a description of the format and how it can be used. * - * It is intended to be used in conjunction with PG::Connection#put_copy_data . + * PostgreSQL allows composite types to be used in many of the same ways that simple types can be used. + * For example, a column of a table can be declared to be of a composite type. * - * The columns are expected as Array of values. The single values are encoded as defined - * in the assigned #type_map. If no type_map was assigned, all values are converted to - * strings by PG::TextEncoder::String. + * The columns are expected as Array of values. + * The single values are encoded as defined in the assigned #type_map. + * If no type_map was assigned, all values are converted to strings by PG::TextEncoder::String. + * + * It is possible to manually assign a type encoder for each column per PG::TypeMapByColumn, + * or to make use of PG::BasicTypeMapBasedOnResult to assign them based on the table OIDs. * * Example with default type map ( TypeMapAllStrings ): * conn.exec "create table my_table (a text,b int,c bool)" @@ -101,11 +106,7 @@ pg_recordcoder_type_map_get(VALUE self) * end * This creates +my_table+ and inserts two records. * - * It is possible to manually assign a type encoder for each column per PG::TypeMapByColumn, - * or to make use of PG::BasicTypeMapBasedOnResult to assign them based on the table OIDs. - * - * See also PG::TextDecoder::Record for the decoding direction with - * PG::Connection#get_copy_data . + * See also PG::TextDecoder::Record for the decoding direction. */ static int pg_text_enc_record(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate, int enc_idx) @@ -237,49 +238,56 @@ record_isspace(char ch) /* * Document-class: PG::TextDecoder::Record < PG::RecordDecoder * - * This class decodes one record of arbitrary columns received as COPY data in text format. - * See the {COPY command}[http://www.postgresql.org/docs/current/static/sql-copy.html] - * for description of the format. + * This class decodes one record of values received from a composite type column in text format. + * See PostgreSQL {Composite Types}[https://www.postgresql.org/docs/current/rowtypes.html] for a description of the format and how it can be used. * - * It is intended to be used in conjunction with PG::Connection#get_copy_data . + * PostgreSQL allows composite types to be used in many of the same ways that simple types can be used. + * For example, a column of a table can be declared to be of a composite type. * - * The columns are retrieved as Array of values. The single values are decoded as defined - * in the assigned #type_map. If no type_map was assigned, all values are converted to - * strings by PG::TextDecoder::String. + * The columns are retrieved as Array of values. + * The single values are decoded as defined in the assigned #type_map. + * If no type_map was assigned, all values are converted to strings by PG::TextDecoder::String. * - * Example with default type map ( TypeMapAllStrings ): - * conn.exec("CREATE TABLE my_table AS VALUES('astring', 7, FALSE), ('string2', 42, TRUE) ") + * Decode a record from +String+ to +Array+ (uses default type map TypeMapAllStrings): + * PG::TextDecoder::Record.new.decode("(1,2)") # => ["1", "2"] * - * deco = PG::TextDecoder::Record.new - * conn.copy_data "COPY my_table TO STDOUT", deco do - * while record=conn.get_copy_data - * p record - * end - * end - * This prints all records of +my_table+ : - * ["astring", "7", "f"] - * ["string2", "42", "t"] + * Decode a record from +String+ to +Array+ : + * # Build a type map for two Floats + * tm = PG::TypeMapByColumn.new([PG::TextDecoder::Float.new]*2) + * # Use this type map to decode the record: + * PG::TextDecoder::Record.new(type_map: tm).decode("(1,2)") + * # => [1.0, 2.0] * - * Example with column based type map: - * tm = PG::TypeMapByColumn.new( [ - * PG::TextDecoder::String.new, - * PG::TextDecoder::Integer.new, - * PG::TextDecoder::Boolean.new] ) - * deco = PG::TextDecoder::Record.new( type_map: tm ) - * conn.copy_data "COPY my_table TO STDOUT", deco do - * while record=conn.get_copy_data - * p record - * end - * end - * This prints the records with type casted columns: - * ["astring", 7, false] - * ["string2", 42, true] + * Records can also be encoded and decoded directly to and from the database. + * This avoids intermediate String allocations and is very fast. + * Take the following type and table definitions: + * conn.exec("CREATE TYPE complex AS (r float, i float) ") + * conn.exec("CREATE TABLE my_table (v1 complex, v2 complex) ") + * conn.exec("INSERT INTO my_table VALUES((2,3), (4,5)), ((6,7), (8,9)) ") + * + * The record can be decoded by applying a type map to the PG::Result object: + * # Build a type map for two floats "r" and "i" + * tm = PG::TypeMapByColumn.new([PG::TextDecoder::Float.new]*2) + * # Build a record decoder to decode this two-value type: + * deco = PG::TextDecoder::Record.new(type_map: tm) + * # Fetch table data and use the decoder to cast the two complex values "v1" and "v2": + * conn.exec("SELECT * FROM my_table").map_types!(PG::TypeMapByColumn.new([deco]*2)).to_a + * # => [{"v1"=>[2.0, 3.0], "v2"=>[4.0, 5.0]}, {"v1"=>[6.0, 7.0], "v2"=>[8.0, 9.0]}] * - * Instead of manually assigning a type decoder for each column, PG::BasicTypeMapForResults - * can be used to assign them based on the table OIDs. + * It's more very convenient to use the PG::BasicTypeRegistry, which is based on database OIDs. + * # Fetch a NULL record of our type to retrieve the OIDs of the two record fields "r" and "i" + * oids = conn.exec( "SELECT (NULL::complex).*" ) + * # Build a type map (PG::TypeMapByColumn) for decoding the "complex" type + * dtm = PG::BasicTypeMapForResults.new(conn).build_column_map( oids ) + * # Register a record decoder for decoding our type "complex" + * PG::BasicTypeRegistry.register_coder(PG::TextDecoder::Record.new(type_map: dtm, name: "complex")) + * # Apply the basic type registry to all results retrieved from the server + * conn.type_map_for_results = PG::BasicTypeMapForResults.new(conn) + * # Now queries decode the "complex" type (and many basic types) automatically + * conn.exec("SELECT * FROM my_table").to_a + * # => [{"v1"=>[2.0, 3.0], "v2"=>[4.0, 5.0]}, {"v1"=>[6.0, 7.0], "v2"=>[8.0, 9.0]}] * - * See also PG::TextEncoder::Record for the encoding direction with - * PG::Connection#put_copy_data . + * See also PG::TextEncoder::Record for the encoding direction. */ /* * Parse the current line into separate attributes (fields), diff --git a/lib/pg/basic_type_mapping.rb b/lib/pg/basic_type_mapping.rb index 9fcc14b5e..e532281f1 100644 --- a/lib/pg/basic_type_mapping.rb +++ b/lib/pg/basic_type_mapping.rb @@ -386,19 +386,43 @@ def initialize(connection) # # Assign a default ruleset for type casts of input and output values. # conn.type_map_for_queries = PG::BasicTypeMapForQueries.new(conn) # # Execute a query. The Integer param value is typecasted internally by PG::BinaryEncoder::Int8. -# # The format of the parameter is set to 1 (binary) and the OID of this parameter is set to 20 (int8). +# # The format of the parameter is set to 0 (text) and the OID of this parameter is set to 20 (int8). # res = conn.exec_params( "SELECT $1", [5] ) class PG::BasicTypeMapForQueries < PG::TypeMapByClass include PG::BasicTypeRegistry def initialize(connection) @coder_maps = build_coder_maps(connection) + @array_encoders_by_klass = array_encoders_by_klass + @encode_array_as = :array + init_encoders + end + def init_encoders + coders.each { |kl, c| self[kl] = nil } # Clear type map populate_encoder_list - @array_encoders_by_klass = array_encoders_by_klass @anyarray_encoder = coder_by_name(0, :encoder, '_any') end + def encode_array_as=(pg_type) + case pg_type + when :array + when :json + when :record + when /\A_/ + else + raise ArgumentError, "invalid pg_type #{pg_type.inspect}" + end + + @encode_array_as = pg_type + + init_encoders + end + + def encode_array_as + @encode_array_as + end + private def coder_by_name(format, direction, name) @@ -418,7 +442,19 @@ def populate_encoder_list end self[klass] = coder else - self[klass] = selector + + case @encode_array_as + when :array + self[klass] = selector + when :json + self[klass] = PG::TextEncoder::JSON.new + when :record + self[klass] = PG::TextEncoder::Record.new type_map: self + when /\A_/ + self[klass] = coder_by_name(0, :encoder, @encode_array_as) || raise(ArgumentError, "unknown array type #{@encode_array_as.inspect}") + else + raise ArgumentError, "invalid pg_type #{@encode_array_as.inspect}" + end end end end @@ -451,6 +487,7 @@ def get_array_type(value) # We use text format and no type OID for IPAddr, because setting the OID can lead # to unnecessary inet/cidr conversions on the server side. IPAddr => [0, 'inet'], + Hash => [0, 'json'], Array => :get_array_type, } diff --git a/spec/pg/basic_type_mapping_spec.rb b/spec/pg/basic_type_mapping_spec.rb index f9b7477fc..7b64c072e 100644 --- a/spec/pg/basic_type_mapping_spec.rb +++ b/spec/pg/basic_type_mapping_spec.rb @@ -41,7 +41,7 @@ def expect_to_typecase_result_value_warning # it "should do basic param encoding", :ruby_19 do - res = @conn.exec_params( "SELECT $1::int8,$2::float,$3,$4::TEXT", + res = @conn.exec_params( "SELECT $1::int8, $2::float, $3, $4::TEXT", [1, 2.1, true, "b"], nil, basic_type_mapping ) expect( res.values ).to eq( [ @@ -51,7 +51,8 @@ def expect_to_typecase_result_value_warning expect( result_typenames(res) ).to eq( ['bigint', 'double precision', 'boolean', 'text'] ) end - it "should do array param encoding" do + it "should do default array-as-array param encoding" do + expect( basic_type_mapping.encode_array_as).to eq(:array) res = @conn.exec_params( "SELECT $1,$2,$3,$4", [ [1, 2, 3], [[1, 2], [3, nil]], [1.11, 2.21], @@ -67,6 +68,68 @@ def expect_to_typecase_result_value_warning expect( result_typenames(res) ).to eq( ['bigint[]', 'bigint[]', 'double precision[]', 'text[]'] ) end + it "should do array-as-json encoding" do + basic_type_mapping.encode_array_as = :json + expect( basic_type_mapping.encode_array_as).to eq(:json) + + res = @conn.exec_params( "SELECT $1::JSON, $2::JSON", [ + [1, {a: 5}, true, ["a", 2], [3.4, nil]], + ['/,"'.gsub("/", "\\"), nil, 'abcäöü'], + ], nil, basic_type_mapping ) + + expect( res.values ).to eq( [[ + '[1,{"a":5},true,["a",2],[3.4,null]]', + '["//,/"",null,"abcäöü"]'.gsub("/", "\\"), + ]] ) + + expect( result_typenames(res) ).to eq( ['json', 'json'] ) + end + + it "should do hash-as-json encoding" do + res = @conn.exec_params( "SELECT $1::JSON, $2::JSON", [ + {a: 5, b: ["a", 2], c: nil}, + {qu: '/,"'.gsub("/", "\\"), ni: nil, uml: 'abcäöü'}, + ], nil, basic_type_mapping ) + + expect( res.values ).to eq( [[ + '{"a":5,"b":["a",2],"c":null}', + '{"qu":"//,/"","ni":null,"uml":"abcäöü"}'.gsub("/", "\\"), + ]] ) + + expect( result_typenames(res) ).to eq( ['json', 'json'] ) + end + + describe "Record encoding" do + before :all do + @conn.exec("CREATE TYPE test_record1 AS (i int, d float, t text)") + @conn.exec("CREATE TYPE test_record2 AS (i int, r test_record1)") + end + + after :all do + @conn.exec("DROP TYPE IF EXISTS test_record2 CASCADE") + @conn.exec("DROP TYPE IF EXISTS test_record1 CASCADE") + end + + it "should do array-as-record encoding" do + basic_type_mapping.encode_array_as = :record + expect( basic_type_mapping.encode_array_as).to eq(:record) + + res = @conn.exec_params( "SELECT $1::test_record1, $2::test_record2, $3::text", [ + [5, 3.4, "txt"], + [1, [2, 4.5, "bcd"]], + [4, 5, 6], + ], nil, basic_type_mapping ) + + expect( res.values ).to eq( [[ + '(5,3.4,txt)', + '(1,"(2,4.5,bcd)")', + '("4","5","6")', + ]] ) + + expect( result_typenames(res) ).to eq( ['test_record1', 'test_record2', 'text'] ) + end + end + it "should do bigdecimal param encoding" do large = ('123456790'*10) << '.' << ('012345679') res = @conn.exec_params( "SELECT $1::numeric,$2::numeric", From 6756943886756d46ce63ca86ee823c54fc03b4bb Mon Sep 17 00:00:00 2001 From: Lars Kanis Date: Tue, 1 Oct 2019 22:13:56 +0200 Subject: [PATCH 5/7] More documentation to RecordCoder --- ext/pg_record_coder.c | 44 ++++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/ext/pg_record_coder.c b/ext/pg_record_coder.c index 20fc6915b..3f6745478 100644 --- a/ext/pg_record_coder.c +++ b/ext/pg_record_coder.c @@ -90,21 +90,37 @@ pg_recordcoder_type_map_get(VALUE self) * PostgreSQL allows composite types to be used in many of the same ways that simple types can be used. * For example, a column of a table can be declared to be of a composite type. * - * The columns are expected as Array of values. + * The encoder expects the record columns as array of values. * The single values are encoded as defined in the assigned #type_map. * If no type_map was assigned, all values are converted to strings by PG::TextEncoder::String. * * It is possible to manually assign a type encoder for each column per PG::TypeMapByColumn, * or to make use of PG::BasicTypeMapBasedOnResult to assign them based on the table OIDs. * - * Example with default type map ( TypeMapAllStrings ): - * conn.exec "create table my_table (a text,b int,c bool)" - * enco = PG::TextEncoder::Record.new - * conn.copy_data "COPY my_table FROM STDIN", enco do - * conn.put_copy_data ["astring", 7, false] - * conn.put_copy_data ["string2", 42, true] - * end - * This creates +my_table+ and inserts two records. + * Encode a record from an Array to a +String+ in PostgreSQL Composite Type format (uses default type map TypeMapAllStrings): + * PG::TextEncoder::Record.new.decode("(1,2)") # => ["1", "2"] + * + * Encode a record from Array to +String+ : + * # Build a type map for two Floats + * tm = PG::TypeMapByColumn.new([PG::TextEncoder::Float.new]*2) + * # Use this type map to encode the record: + * PG::TextEncoder::Record.new(type_map: tm).encode([1,2]) + * # => "(\"1.0000000000000000E+00\",\"2.0000000000000000E+00\")" + * + * Records can also be encoded and decoded directly to and from the database. + * This avoids intermediate String allocations and is very fast. + * Take the following type and table definitions: + * conn.exec("CREATE TYPE complex AS (r float, i float) ") + * conn.exec("CREATE TABLE my_table (v1 complex, v2 complex) ") + * + * The record can be encoded by applying a type map to the PG::Result object: + * # Build a type map for two floats "r" and "i" + * tm = PG::TypeMapByColumn.new([PG::TextEncoder::Float.new]*2) + * # Build a record encoder to encode this two-value type: + * deco = PG::TextEncoder::Record.new(type_map: tm) + * # Insert table data and use the encoder to cast the complex value "v1" from array: + * conn.exec_params("INSERT INTO my_table VALUES ($1) RETURNING v1", [[1,2]], 0, PG::TypeMapByColumn.new([deco])).to_a + * # => [{"v1"=>"(1,2)"}] * * See also PG::TextDecoder::Record for the decoding direction. */ @@ -244,14 +260,14 @@ record_isspace(char ch) * PostgreSQL allows composite types to be used in many of the same ways that simple types can be used. * For example, a column of a table can be declared to be of a composite type. * - * The columns are retrieved as Array of values. + * The columns are returned from the decoder as array of values. * The single values are decoded as defined in the assigned #type_map. * If no type_map was assigned, all values are converted to strings by PG::TextDecoder::String. * - * Decode a record from +String+ to +Array+ (uses default type map TypeMapAllStrings): + * Decode a record in Composite Type format from +String+ to Array (uses default type map TypeMapAllStrings): * PG::TextDecoder::Record.new.decode("(1,2)") # => ["1", "2"] * - * Decode a record from +String+ to +Array+ : + * Decode a record from +String+ to Array : * # Build a type map for two Floats * tm = PG::TypeMapByColumn.new([PG::TextDecoder::Float.new]*2) * # Use this type map to decode the record: @@ -287,7 +303,9 @@ record_isspace(char ch) * conn.exec("SELECT * FROM my_table").to_a * # => [{"v1"=>[2.0, 3.0], "v2"=>[4.0, 5.0]}, {"v1"=>[6.0, 7.0], "v2"=>[8.0, 9.0]}] * - * See also PG::TextEncoder::Record for the encoding direction. + * Records can also be nested or further wrapped into other encoders like PG::TextEncoder::CopyRow. + * + * See also PG::TextEncoder::Record for the encoding direction (data sent to the server). */ /* * Parse the current line into separate attributes (fields), From e18fd5796ba45182c183cfc816aa8d8fcc78c9b5 Mon Sep 17 00:00:00 2001 From: Lars Kanis Date: Fri, 4 Oct 2019 22:07:22 +0200 Subject: [PATCH 6/7] Finalize Record documentation --- ext/pg_record_coder.c | 40 +++++++++++++++++++++++++++--------- lib/pg/basic_type_mapping.rb | 15 +++++++++++--- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/ext/pg_record_coder.c b/ext/pg_record_coder.c index 3f6745478..c12faa7c8 100644 --- a/ext/pg_record_coder.c +++ b/ext/pg_record_coder.c @@ -98,7 +98,7 @@ pg_recordcoder_type_map_get(VALUE self) * or to make use of PG::BasicTypeMapBasedOnResult to assign them based on the table OIDs. * * Encode a record from an Array to a +String+ in PostgreSQL Composite Type format (uses default type map TypeMapAllStrings): - * PG::TextEncoder::Record.new.decode("(1,2)") # => ["1", "2"] + * PG::TextEncoder::Record.new.encode([1, 2]) # => "(\"1\",\"2\")" * * Encode a record from Array to +String+ : * # Build a type map for two Floats @@ -108,20 +108,40 @@ pg_recordcoder_type_map_get(VALUE self) * # => "(\"1.0000000000000000E+00\",\"2.0000000000000000E+00\")" * * Records can also be encoded and decoded directly to and from the database. - * This avoids intermediate String allocations and is very fast. + * This avoids intermediate string allocations and is very fast. * Take the following type and table definitions: * conn.exec("CREATE TYPE complex AS (r float, i float) ") * conn.exec("CREATE TABLE my_table (v1 complex, v2 complex) ") * - * The record can be encoded by applying a type map to the PG::Result object: - * # Build a type map for two floats "r" and "i" + * A record can be encoded by adding a type map to Connection#exec_params and siblings: + * # Build a type map for the two floats "r" and "i" as in our "complex" type * tm = PG::TypeMapByColumn.new([PG::TextEncoder::Float.new]*2) - * # Build a record encoder to encode this two-value type: - * deco = PG::TextEncoder::Record.new(type_map: tm) - * # Insert table data and use the encoder to cast the complex value "v1" from array: - * conn.exec_params("INSERT INTO my_table VALUES ($1) RETURNING v1", [[1,2]], 0, PG::TypeMapByColumn.new([deco])).to_a + * # Build a record encoder to encode this type as a record: + * enco = PG::TextEncoder::Record.new(type_map: tm) + * # Insert table data and use the encoder to cast the complex value "v1" from ruby array: + * conn.exec_params("INSERT INTO my_table VALUES ($1) RETURNING v1", [[1,2]], 0, PG::TypeMapByColumn.new([enco])).to_a * # => [{"v1"=>"(1,2)"}] * + * Alternatively the typemap can be build based on database OIDs rather than manually assigning encoders. + * # Fetch a NULL record of our type to retrieve the OIDs of the two fields "r" and "i" + * oids = conn.exec( "SELECT (NULL::complex).*" ) + * # Build a type map (PG::TypeMapByColumn) for encoding the "complex" type + * etm = PG::BasicTypeMapBasedOnResult.new(conn).build_column_map( oids ) + * + * It's also possible to use the BasicTypeMapForQueries to send records to the database server. + * In contrast to ORM libraries, PG doesn't have information regarding the type of data the server is expecting. + * So BasicTypeMapForQueries works based on the class of the values to be sent and it has to be instructed that a ruby array shall be casted to a record. + * # Retrieve OIDs of all basic types from the database + * etm = PG::BasicTypeMapForQueries.new(conn) + * etm.encode_array_as = :record + * # Apply the basic type registry to all values sent to the server + * conn.type_map_for_queries = etm + * # Send a complex number as an array of two integers + * conn.exec_params("INSERT INTO my_table VALUES ($1) RETURNING v1", [[1,2]]).to_a + * # => [{"v1"=>"(1,2)"}] + * + * Records can also be nested or further wrapped into other encoders like PG::TextEncoder::CopyRow. + * * See also PG::TextDecoder::Record for the decoding direction. */ static int @@ -291,7 +311,7 @@ record_isspace(char ch) * # => [{"v1"=>[2.0, 3.0], "v2"=>[4.0, 5.0]}, {"v1"=>[6.0, 7.0], "v2"=>[8.0, 9.0]}] * * It's more very convenient to use the PG::BasicTypeRegistry, which is based on database OIDs. - * # Fetch a NULL record of our type to retrieve the OIDs of the two record fields "r" and "i" + * # Fetch a NULL record of our type to retrieve the OIDs of the two fields "r" and "i" * oids = conn.exec( "SELECT (NULL::complex).*" ) * # Build a type map (PG::TypeMapByColumn) for decoding the "complex" type * dtm = PG::BasicTypeMapForResults.new(conn).build_column_map( oids ) @@ -303,7 +323,7 @@ record_isspace(char ch) * conn.exec("SELECT * FROM my_table").to_a * # => [{"v1"=>[2.0, 3.0], "v2"=>[4.0, 5.0]}, {"v1"=>[6.0, 7.0], "v2"=>[8.0, 9.0]}] * - * Records can also be nested or further wrapped into other encoders like PG::TextEncoder::CopyRow. + * Records can also be nested or further wrapped into other decoders like PG::TextDecoder::CopyRow. * * See also PG::TextEncoder::Record for the encoding direction (data sent to the server). */ diff --git a/lib/pg/basic_type_mapping.rb b/lib/pg/basic_type_mapping.rb index e532281f1..a24f01188 100644 --- a/lib/pg/basic_type_mapping.rb +++ b/lib/pg/basic_type_mapping.rb @@ -404,6 +404,17 @@ def init_encoders @anyarray_encoder = coder_by_name(0, :encoder, '_any') end + # Change the mechanism that is used to encode ruby array values + # + # Possible values: + # * +:array+ : Encode the ruby array as a PostgreSQL array. + # The array element type is inferred from the class of the first array element. This is the default. + # * +:json+ : Encode the ruby array as a JSON document. + # * +:record+ : Encode the ruby array as a composite type row. + # * "_type" : Encode the ruby array as a particular PostgreSQL type. + # All PostgreSQL array types are supported. + # If there's an encoder registered for the elements +type+, it will be used. + # Otherwise a string conversion (by +value.to_s+) is done. def encode_array_as=(pg_type) case pg_type when :array @@ -419,9 +430,7 @@ def encode_array_as=(pg_type) init_encoders end - def encode_array_as - @encode_array_as - end + attr_reader :encode_array_as private From 8acf8fdc8a29c1efabd01f023ebad14840c60c2e Mon Sep 17 00:00:00 2001 From: Lars Kanis Date: Fri, 4 Oct 2019 22:07:56 +0200 Subject: [PATCH 7/7] Make init_encoders private It is internal only. --- lib/pg/basic_type_mapping.rb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/pg/basic_type_mapping.rb b/lib/pg/basic_type_mapping.rb index a24f01188..08bc0917d 100644 --- a/lib/pg/basic_type_mapping.rb +++ b/lib/pg/basic_type_mapping.rb @@ -398,12 +398,6 @@ def initialize(connection) init_encoders end - def init_encoders - coders.each { |kl, c| self[kl] = nil } # Clear type map - populate_encoder_list - @anyarray_encoder = coder_by_name(0, :encoder, '_any') - end - # Change the mechanism that is used to encode ruby array values # # Possible values: @@ -434,6 +428,12 @@ def encode_array_as=(pg_type) private + def init_encoders + coders.each { |kl, c| self[kl] = nil } # Clear type map + populate_encoder_list + @anyarray_encoder = coder_by_name(0, :encoder, '_any') + end + def coder_by_name(format, direction, name) check_format_and_direction(format, direction) @coder_maps[format][direction].coder_by_name(name)