@@ -418,6 +418,159 @@ Status insert_int_value(const rapidjson::Value& col, PrimitiveType type,
418
418
return parse_and_insert_data (col);
419
419
}
420
420
421
+ template <typename T>
422
+ Status handle_value (const rapidjson::Value& col, PrimitiveType sub_type, bool pure_doc_value,
423
+ T& val) {
424
+ RETURN_IF_ERROR (get_int_value<T>(col, sub_type, &val, pure_doc_value));
425
+ return Status::OK ();
426
+ }
427
+
428
+ template <>
429
+ Status handle_value<float >(const rapidjson::Value& col, PrimitiveType sub_type, bool pure_doc_value,
430
+ float & val) {
431
+ RETURN_IF_ERROR (get_float_value<float >(col, sub_type, &val, pure_doc_value));
432
+ return Status::OK ();
433
+ }
434
+
435
+ template <>
436
+ Status handle_value<double >(const rapidjson::Value& col, PrimitiveType sub_type,
437
+ bool pure_doc_value, double & val) {
438
+ RETURN_IF_ERROR (get_float_value<double >(col, sub_type, &val, pure_doc_value));
439
+ return Status::OK ();
440
+ }
441
+
442
+ template <>
443
+ Status handle_value<std::string>(const rapidjson::Value& col, PrimitiveType sub_type,
444
+ bool pure_doc_value, std::string& val) {
445
+ RETURN_ERROR_IF_COL_IS_ARRAY (col, sub_type, true );
446
+ if (!col.IsString ()) {
447
+ val = json_value_to_string (col);
448
+ } else {
449
+ val = col.GetString ();
450
+ }
451
+ return Status::OK ();
452
+ }
453
+
454
+ template <>
455
+ Status handle_value<bool >(const rapidjson::Value& col, PrimitiveType sub_type, bool pure_doc_value,
456
+ bool & val) {
457
+ if (col.IsBool ()) {
458
+ val = col.GetBool ();
459
+ return Status::OK ();
460
+ }
461
+
462
+ if (col.IsNumber ()) {
463
+ val = col.GetInt ();
464
+ return Status::OK ();
465
+ }
466
+
467
+ bool is_nested_str = false ;
468
+ if (pure_doc_value && col.IsArray () && !col.Empty () && col[0 ].IsBool ()) {
469
+ val = col[0 ].GetBool ();
470
+ return Status::OK ();
471
+ } else if (pure_doc_value && col.IsArray () && !col.Empty () && col[0 ].IsString ()) {
472
+ is_nested_str = true ;
473
+ } else if (pure_doc_value && col.IsArray ()) {
474
+ return Status::InternalError (ERROR_INVALID_COL_DATA, " BOOLEAN" );
475
+ }
476
+
477
+ const rapidjson::Value& str_col = is_nested_str ? col[0 ] : col;
478
+ const std::string& str_val = str_col.GetString ();
479
+ size_t val_size = str_col.GetStringLength ();
480
+ StringParser::ParseResult result;
481
+ val = StringParser::string_to_bool (str_val.c_str (), val_size, &result);
482
+ RETURN_ERROR_IF_PARSING_FAILED (result, str_col, sub_type);
483
+ return Status::OK ();
484
+ }
485
+
486
+ template <typename T>
487
+ Status process_single_column (const rapidjson::Value& col, PrimitiveType sub_type,
488
+ bool pure_doc_value, vectorized::Array& array) {
489
+ T val;
490
+ RETURN_IF_ERROR (handle_value<T>(col, sub_type, pure_doc_value, val));
491
+ array.push_back (val);
492
+ return Status::OK ();
493
+ }
494
+
495
+ template <typename T>
496
+ Status process_column_array (const rapidjson::Value& col, PrimitiveType sub_type,
497
+ bool pure_doc_value, vectorized::Array& array) {
498
+ for (const auto & sub_col : col.GetArray ()) {
499
+ RETURN_IF_ERROR (process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
500
+ }
501
+ return Status::OK ();
502
+ }
503
+
504
+ template <typename T>
505
+ Status process_column (const rapidjson::Value& col, PrimitiveType sub_type, bool pure_doc_value,
506
+ vectorized::Array& array) {
507
+ if (!col.IsArray ()) {
508
+ return process_single_column<T>(col, sub_type, pure_doc_value, array);
509
+ } else {
510
+ return process_column_array<T>(col, sub_type, pure_doc_value, array);
511
+ }
512
+ }
513
+
514
+ template <typename DateType, typename RT>
515
+ Status process_date_column (const rapidjson::Value& col, PrimitiveType sub_type, bool pure_doc_value,
516
+ vectorized::Array& array, const cctz::time_zone& time_zone) {
517
+ if (!col.IsArray ()) {
518
+ RT data;
519
+ RETURN_IF_ERROR (
520
+ (get_date_int<DateType, RT>(col, sub_type, pure_doc_value, &data, time_zone)));
521
+ array.push_back (data);
522
+ } else {
523
+ for (const auto & sub_col : col.GetArray ()) {
524
+ RT data;
525
+ RETURN_IF_ERROR ((get_date_int<DateType, RT>(sub_col, sub_type, pure_doc_value, &data,
526
+ time_zone)));
527
+ array.push_back (data);
528
+ }
529
+ }
530
+ return Status::OK ();
531
+ }
532
+
533
+ Status ScrollParser::parse_column (const rapidjson::Value& col, PrimitiveType sub_type,
534
+ bool pure_doc_value, vectorized::Array& array,
535
+ const cctz::time_zone& time_zone) {
536
+ switch (sub_type) {
537
+ case TYPE_CHAR:
538
+ case TYPE_VARCHAR:
539
+ case TYPE_STRING:
540
+ return process_column<std::string>(col, sub_type, pure_doc_value, array);
541
+ case TYPE_TINYINT:
542
+ return process_column<int8_t >(col, sub_type, pure_doc_value, array);
543
+ case TYPE_SMALLINT:
544
+ return process_column<int16_t >(col, sub_type, pure_doc_value, array);
545
+ case TYPE_INT:
546
+ return process_column<int32>(col, sub_type, pure_doc_value, array);
547
+ case TYPE_BIGINT:
548
+ return process_column<int64_t >(col, sub_type, pure_doc_value, array);
549
+ case TYPE_LARGEINT:
550
+ return process_column<__int128>(col, sub_type, pure_doc_value, array);
551
+ case TYPE_FLOAT:
552
+ return process_column<float >(col, sub_type, pure_doc_value, array);
553
+ case TYPE_DOUBLE:
554
+ return process_column<double >(col, sub_type, pure_doc_value, array);
555
+ case TYPE_BOOLEAN:
556
+ return process_column<bool >(col, sub_type, pure_doc_value, array);
557
+ // date/datetime v2 is the default type for catalog table,
558
+ // see https://github.com/apache/doris/pull/16304
559
+ // No need to support date and datetime types.
560
+ case TYPE_DATEV2: {
561
+ return process_date_column<DateV2Value<DateV2ValueType>, uint32_t >(
562
+ col, sub_type, pure_doc_value, array, time_zone);
563
+ }
564
+ case TYPE_DATETIMEV2: {
565
+ return process_date_column<DateV2Value<DateTimeV2ValueType>, uint64_t >(
566
+ col, sub_type, pure_doc_value, array, time_zone);
567
+ }
568
+ default :
569
+ LOG (ERROR) << " Do not support Array type: " << sub_type;
570
+ return Status::InternalError (" Unsupported type" );
571
+ }
572
+ }
573
+
421
574
ScrollParser::ScrollParser (bool doc_value_mode) : _size(0 ), _line_index(0 ) {}
422
575
423
576
ScrollParser::~ScrollParser () = default ;
@@ -687,125 +840,7 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc,
687
840
case TYPE_ARRAY: {
688
841
vectorized::Array array;
689
842
const auto & sub_type = tuple_desc->slots ()[i]->type ().children [0 ].type ;
690
- RETURN_ERROR_IF_COL_IS_ARRAY (col, type, false );
691
- for (const auto & sub_col : col.GetArray ()) {
692
- switch (sub_type) {
693
- case TYPE_CHAR:
694
- case TYPE_VARCHAR:
695
- case TYPE_STRING: {
696
- std::string val;
697
- RETURN_ERROR_IF_COL_IS_ARRAY (sub_col, sub_type, true );
698
- if (!sub_col.IsString ()) {
699
- val = json_value_to_string (sub_col);
700
- } else {
701
- val = sub_col.GetString ();
702
- }
703
- array.push_back (val);
704
- break ;
705
- }
706
- case TYPE_TINYINT: {
707
- int8_t val;
708
- RETURN_IF_ERROR (get_int_value<int8_t >(sub_col, sub_type, &val, pure_doc_value));
709
- array.push_back (val);
710
- break ;
711
- }
712
- case TYPE_SMALLINT: {
713
- int16_t val;
714
- RETURN_IF_ERROR (
715
- get_int_value<int16_t >(sub_col, sub_type, &val, pure_doc_value));
716
- array.push_back (val);
717
- break ;
718
- }
719
- case TYPE_INT: {
720
- int32 val;
721
- RETURN_IF_ERROR (get_int_value<int32>(sub_col, sub_type, &val, pure_doc_value));
722
- array.push_back (val);
723
- break ;
724
- }
725
- case TYPE_BIGINT: {
726
- int64_t val;
727
- RETURN_IF_ERROR (
728
- get_int_value<int64_t >(sub_col, sub_type, &val, pure_doc_value));
729
- array.push_back (val);
730
- break ;
731
- }
732
- case TYPE_LARGEINT: {
733
- __int128 val;
734
- RETURN_IF_ERROR (
735
- get_int_value<__int128>(sub_col, sub_type, &val, pure_doc_value));
736
- array.push_back (val);
737
- break ;
738
- }
739
- case TYPE_FLOAT: {
740
- float val {};
741
- RETURN_IF_ERROR (
742
- get_float_value<float >(sub_col, sub_type, &val, pure_doc_value));
743
- array.push_back (val);
744
- break ;
745
- }
746
- case TYPE_DOUBLE: {
747
- double val {};
748
- RETURN_IF_ERROR (
749
- get_float_value<double >(sub_col, sub_type, &val, pure_doc_value));
750
- array.push_back (val);
751
- break ;
752
- }
753
- case TYPE_BOOLEAN: {
754
- if (sub_col.IsBool ()) {
755
- array.push_back (sub_col.GetBool ());
756
- break ;
757
- }
758
-
759
- if (sub_col.IsNumber ()) {
760
- array.push_back (sub_col.GetInt ());
761
- break ;
762
- }
763
-
764
- bool is_nested_str = false ;
765
- if (pure_doc_value && sub_col.IsArray () && !sub_col.Empty () &&
766
- sub_col[0 ].IsBool ()) {
767
- array.push_back (sub_col[0 ].GetBool ());
768
- break ;
769
- } else if (pure_doc_value && sub_col.IsArray () && !sub_col.Empty () &&
770
- sub_col[0 ].IsString ()) {
771
- is_nested_str = true ;
772
- } else if (pure_doc_value && sub_col.IsArray ()) {
773
- return Status::InternalError (ERROR_INVALID_COL_DATA, " BOOLEAN" );
774
- }
775
-
776
- const rapidjson::Value& str_col = is_nested_str ? sub_col[0 ] : sub_col;
777
-
778
- const std::string& val = str_col.GetString ();
779
- size_t val_size = str_col.GetStringLength ();
780
- StringParser::ParseResult result;
781
- bool b = StringParser::string_to_bool (val.c_str (), val_size, &result);
782
- RETURN_ERROR_IF_PARSING_FAILED (result, str_col, type);
783
- array.push_back (b);
784
- break ;
785
- }
786
- // date/datetime v2 is the default type for catalog table,
787
- // see https://github.com/apache/doris/pull/16304
788
- // No need to support date and datetime types.
789
- case TYPE_DATEV2: {
790
- uint32_t data;
791
- RETURN_IF_ERROR ((get_date_int<DateV2Value<DateV2ValueType>, uint32_t >(
792
- sub_col, sub_type, pure_doc_value, &data, time_zone)));
793
- array.push_back (data);
794
- break ;
795
- }
796
- case TYPE_DATETIMEV2: {
797
- uint64_t data;
798
- RETURN_IF_ERROR ((get_date_int<DateV2Value<DateTimeV2ValueType>, uint64_t >(
799
- sub_col, sub_type, pure_doc_value, &data, time_zone)));
800
- array.push_back (data);
801
- break ;
802
- }
803
- default : {
804
- LOG (ERROR) << " Do not support Array type: " << sub_type;
805
- break ;
806
- }
807
- }
808
- }
843
+ RETURN_IF_ERROR (parse_column (col, sub_type, pure_doc_value, array, time_zone));
809
844
col_ptr->insert (array);
810
845
break ;
811
846
}
0 commit comments