From 6e38dd639ec84d9d2c8807e18013f73f4a50b255 Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Fri, 23 Sep 2022 12:42:04 -0400 Subject: [PATCH 01/18] [FLINK-25756] [connectors/opensearch] Dedicated Opensearch connectors Signed-off-by: Andriy Redko --- .editorconfig | 276 ++++++ .gitignore | 38 + LICENSE | 201 +++++ NOTICE | 17 + README.md | 69 ++ flink-connector-opensearch-e2e-tests/pom.xml | 104 +++ .../tests/OpensearchSinkExample.java | 148 ++++ flink-connector-opensearch/pom.xml | 185 ++++ .../sink/BulkProcessorBuilderFactory.java | 36 + .../opensearch/sink/BulkProcessorConfig.java | 72 ++ .../sink/BulkRequestConsumerFactory.java | 35 + .../opensearch/sink/FlushBackoffType.java | 38 + .../opensearch/sink/NetworkClientConfig.java | 86 ++ .../opensearch/sink/OpensearchEmitter.java | 76 ++ .../opensearch/sink/OpensearchSink.java | 91 ++ .../sink/OpensearchSinkBuilder.java | 449 ++++++++++ .../opensearch/sink/OpensearchWriter.java | 348 ++++++++ .../opensearch/sink/RequestIndexer.java | 56 ++ .../table/AbstractTimeIndexGenerator.java | 41 + .../opensearch/table/IndexGenerator.java | 39 + .../opensearch/table/IndexGeneratorBase.java | 52 ++ .../table/IndexGeneratorFactory.java | 278 ++++++ .../opensearch/table/KeyExtractor.java | 97 +++ .../table/LogicalTypeWithIndex.java | 31 + .../table/OpensearchConfiguration.java | 168 ++++ .../table/OpensearchConnectorOptions.java | 161 ++++ .../table/OpensearchDynamicSink.java | 201 +++++ .../table/OpensearchDynamicSinkFactory.java | 222 +++++ .../table/OpensearchSinkBuilderSupplier.java | 25 + .../table/OpensearchValidationUtils.java | 92 ++ .../table/RowOpensearchEmitter.java | 123 +++ .../table/StaticIndexGenerator.java | 35 + .../ActionRequestFailureHandler.java | 77 ++ .../BufferingNoOpRequestIndexer.java | 75 ++ .../OpensearchBulkProcessorIndexer.java | 84 ++ .../connectors/opensearch/OpensearchSink.java | 807 ++++++++++++++++++ .../opensearch/OpensearchSinkFunction.java | 90 ++ .../connectors/opensearch/RequestIndexer.java | 84 ++ .../opensearch/RestClientFactory.java | 39 + .../util/IgnoringFailureHandler.java | 38 + .../opensearch/util/NoOpFailureHandler.java | 54 ++ .../RetryRejectedExecutionFailureHandler.java | 60 ++ .../org.apache.flink.table.factories.Factory | 16 + .../connector/opensearch/OpensearchUtil.java | 166 ++++ .../sink/OpensearchSinkBuilderTest.java | 102 +++ .../opensearch/sink/OpensearchSinkITCase.java | 201 +++++ .../opensearch/sink/OpensearchTestClient.java | 74 ++ .../sink/OpensearchWriterITCase.java | 337 ++++++++ .../opensearch/sink/TestEmitter.java | 75 ++ .../opensearch/table/IndexGeneratorTest.java | 264 ++++++ .../opensearch/table/KeyExtractorTest.java | 147 ++++ .../OpensearchDynamicSinkFactoryTest.java | 267 ++++++ .../table/OpensearchDynamicSinkITCase.java | 334 ++++++++ .../opensearch/table/TestContext.java | 72 ++ .../opensearch/test/DockerImageVersions.java | 26 + .../opensearch/OpensearchSinkITCase.java | 181 ++++ .../testutils/SourceSinkDataTestKit.java | 143 ++++ .../src/test/resources/log4j2-test.properties | 28 + flink-sql-connector-opensearch/pom.xml | 159 ++++ .../src/main/resources/META-INF/NOTICE | 50 ++ pom.xml | 451 ++++++++++ tools/ci/log4j.properties | 43 + tools/maven/checkstyle.xml | 562 ++++++++++++ tools/maven/suppressions.xml | 26 + 64 files changed, 9022 insertions(+) create mode 100644 .editorconfig create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 NOTICE create mode 100644 README.md create mode 100644 flink-connector-opensearch-e2e-tests/pom.xml create mode 100644 flink-connector-opensearch-e2e-tests/src/main/java/org/apache/flink/streaming/tests/OpensearchSinkExample.java create mode 100644 flink-connector-opensearch/pom.xml create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/BulkProcessorBuilderFactory.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/BulkProcessorConfig.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/BulkRequestConsumerFactory.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/FlushBackoffType.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/NetworkClientConfig.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchEmitter.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSink.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkBuilder.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchWriter.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/RequestIndexer.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/AbstractTimeIndexGenerator.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/IndexGenerator.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/IndexGeneratorBase.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/IndexGeneratorFactory.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/KeyExtractor.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/LogicalTypeWithIndex.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchConfiguration.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchConnectorOptions.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSink.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkFactory.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchSinkBuilderSupplier.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchValidationUtils.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/RowOpensearchEmitter.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/StaticIndexGenerator.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/ActionRequestFailureHandler.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/BufferingNoOpRequestIndexer.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/OpensearchBulkProcessorIndexer.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSink.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkFunction.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/RequestIndexer.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/RestClientFactory.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/util/IgnoringFailureHandler.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/util/NoOpFailureHandler.java create mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/util/RetryRejectedExecutionFailureHandler.java create mode 100644 flink-connector-opensearch/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory create mode 100644 flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/OpensearchUtil.java create mode 100644 flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkBuilderTest.java create mode 100644 flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkITCase.java create mode 100644 flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchTestClient.java create mode 100644 flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchWriterITCase.java create mode 100644 flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/TestEmitter.java create mode 100644 flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/IndexGeneratorTest.java create mode 100644 flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/KeyExtractorTest.java create mode 100644 flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkFactoryTest.java create mode 100644 flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkITCase.java create mode 100644 flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/TestContext.java create mode 100644 flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/test/DockerImageVersions.java create mode 100644 flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkITCase.java create mode 100644 flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/testutils/SourceSinkDataTestKit.java create mode 100644 flink-connector-opensearch/src/test/resources/log4j2-test.properties create mode 100644 flink-sql-connector-opensearch/pom.xml create mode 100644 flink-sql-connector-opensearch/src/main/resources/META-INF/NOTICE create mode 100644 pom.xml create mode 100644 tools/ci/log4j.properties create mode 100644 tools/maven/checkstyle.xml create mode 100644 tools/maven/suppressions.xml diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..2e26074 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,276 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +max_line_length = 100 +# ij_formatter_off_tag = @formatter:off +# ij_formatter_on_tag = @formatter:on +# ij_formatter_tags_enabled = false +# ij_smart_tabs = false +# ij_wrap_on_typing = false + +[*.java] +indent_size = 4 +indent_style = space +tab_width = 4 +ij_continuation_indent_size = 8 +# ij_java_align_consecutive_assignments = false +# ij_java_align_consecutive_variable_declarations = false +# ij_java_align_group_field_declarations = false +# ij_java_align_multiline_annotation_parameters = false +# ij_java_align_multiline_array_initializer_expression = false +# ij_java_align_multiline_assignment = false +# ij_java_align_multiline_binary_operation = false +# ij_java_align_multiline_chained_methods = false +# ij_java_align_multiline_extends_list = false +# ij_java_align_multiline_for = true +# ij_java_align_multiline_method_parentheses = false +# ij_java_align_multiline_parameters = true +# ij_java_align_multiline_parameters_in_calls = false +# ij_java_align_multiline_parenthesized_expression = false +# ij_java_align_multiline_records = true +# ij_java_align_multiline_resources = true +# ij_java_align_multiline_ternary_operation = false +# ij_java_align_multiline_text_blocks = false +# ij_java_align_multiline_throws_list = false +# ij_java_align_subsequent_simple_methods = false +# ij_java_align_throws_keyword = false +# ij_java_annotation_parameter_wrap = off +# ij_java_array_initializer_new_line_after_left_brace = false +# ij_java_array_initializer_right_brace_on_new_line = false +# ij_java_array_initializer_wrap = off +# ij_java_assert_statement_colon_on_next_line = false +# ij_java_assert_statement_wrap = off +# ij_java_assignment_wrap = off +ij_java_binary_operation_sign_on_next_line = true +ij_java_binary_operation_wrap = normal +# ij_java_blank_lines_after_anonymous_class_header = 0 +# ij_java_blank_lines_after_class_header = 0 +# ij_java_blank_lines_after_imports = 1 +# ij_java_blank_lines_after_package = 1 +# ij_java_blank_lines_around_class = 1 +# ij_java_blank_lines_around_field = 0 +# ij_java_blank_lines_around_field_in_interface = 0 +# ij_java_blank_lines_around_initializer = 1 +# ij_java_blank_lines_around_method = 1 +# ij_java_blank_lines_around_method_in_interface = 1 +# ij_java_blank_lines_before_class_end = 0 +# ij_java_blank_lines_before_imports = 1 +# ij_java_blank_lines_before_method_body = 0 +# ij_java_blank_lines_before_package = 0 +# ij_java_block_brace_style = end_of_line +# ij_java_block_comment_at_first_column = true +ij_java_call_parameters_new_line_after_left_paren = true +# ij_java_call_parameters_right_paren_on_new_line = false +ij_java_call_parameters_wrap = on_every_item +# ij_java_case_statement_on_separate_line = true +# ij_java_catch_on_new_line = false +# ij_java_class_annotation_wrap = split_into_lines +# ij_java_class_brace_style = end_of_line +ij_java_class_count_to_use_import_on_demand = 9999 +# ij_java_class_names_in_javadoc = 1 +# ij_java_do_not_indent_top_level_class_members = false +# ij_java_do_not_wrap_after_single_annotation = false +# ij_java_do_while_brace_force = never +# ij_java_doc_add_blank_line_after_description = true +ij_java_doc_add_blank_line_after_param_comments = true +ij_java_doc_add_blank_line_after_return = true +# ij_java_doc_add_p_tag_on_empty_lines = true +ij_java_doc_align_exception_comments = false +ij_java_doc_align_param_comments = false +ij_java_doc_do_not_wrap_if_one_line = true +ij_java_doc_enable_formatting = true +# ij_java_doc_enable_leading_asterisks = true +ij_java_doc_indent_on_continuation = true +ij_java_doc_keep_empty_lines = true +# ij_java_doc_keep_empty_parameter_tag = true +# ij_java_doc_keep_empty_return_tag = true +# ij_java_doc_keep_empty_throws_tag = true +# ij_java_doc_keep_invalid_tags = true +# ij_java_doc_param_description_on_new_line = false +ij_java_doc_preserve_line_breaks = false +# ij_java_doc_use_throws_not_exception_tag = true +# ij_java_else_on_new_line = false +# ij_java_entity_dd_suffix = EJB +# ij_java_entity_eb_suffix = Bean +# ij_java_entity_hi_suffix = Home +# ij_java_entity_lhi_prefix = Local +# ij_java_entity_lhi_suffix = Home +# ij_java_entity_li_prefix = Local +# ij_java_entity_pk_class = java.lang.String +# ij_java_entity_vo_suffix = VO +# ij_java_enum_constants_wrap = off +# ij_java_extends_keyword_wrap = off +# ij_java_extends_list_wrap = off +# ij_java_field_annotation_wrap = split_into_lines +# ij_java_finally_on_new_line = false +# ij_java_for_brace_force = never +# ij_java_for_statement_new_line_after_left_paren = false +# ij_java_for_statement_right_paren_on_new_line = false +# ij_java_for_statement_wrap = off +# ij_java_generate_final_locals = false +# ij_java_generate_final_parameters = false +# ij_java_if_brace_force = never +ij_java_imports_layout = org.apache.flink.**,|,org.apache.flink.shaded.**,|,*,|,javax.**,|,java.**,|,scala.**,|,$* +# ij_java_indent_case_from_switch = true +# ij_java_insert_inner_class_imports = false +# ij_java_insert_override_annotation = true +# ij_java_keep_blank_lines_before_right_brace = 2 +# ij_java_keep_blank_lines_between_package_declaration_and_header = 2 +# ij_java_keep_blank_lines_in_code = 2 +# ij_java_keep_blank_lines_in_declarations = 2 +# ij_java_keep_control_statement_in_one_line = true +# ij_java_keep_first_column_comment = true +# ij_java_keep_indents_on_empty_lines = false +# ij_java_keep_line_breaks = true +# ij_java_keep_multiple_expressions_in_one_line = false +# ij_java_keep_simple_blocks_in_one_line = false +# ij_java_keep_simple_classes_in_one_line = false +# ij_java_keep_simple_lambdas_in_one_line = false +# ij_java_keep_simple_methods_in_one_line = false +# ij_java_label_indent_absolute = false +# ij_java_label_indent_size = 0 +# ij_java_lambda_brace_style = end_of_line +ij_java_layout_static_imports_separately = true +# ij_java_line_comment_add_space = false +# ij_java_line_comment_at_first_column = true +# ij_java_message_dd_suffix = EJB +# ij_java_message_eb_suffix = Bean +# ij_java_method_annotation_wrap = split_into_lines +# ij_java_method_brace_style = end_of_line +ij_java_method_call_chain_wrap = on_every_item +ij_java_method_parameters_new_line_after_left_paren = true +# ij_java_method_parameters_right_paren_on_new_line = false +ij_java_method_parameters_wrap = on_every_item +# ij_java_modifier_list_wrap = false +ij_java_names_count_to_use_import_on_demand = 9999 +# ij_java_new_line_after_lparen_in_record_header = false +# ij_java_packages_to_use_import_on_demand = java.awt.*,javax.swing.* +# ij_java_parameter_annotation_wrap = off +# ij_java_parentheses_expression_new_line_after_left_paren = false +# ij_java_parentheses_expression_right_paren_on_new_line = false +# ij_java_place_assignment_sign_on_next_line = false +# ij_java_prefer_longer_names = true +# ij_java_prefer_parameters_wrap = false +# ij_java_record_components_wrap = normal +# ij_java_repeat_synchronized = true +# ij_java_replace_instanceof_and_cast = false +# ij_java_replace_null_check = true +# ij_java_replace_sum_lambda_with_method_ref = true +# ij_java_resource_list_new_line_after_left_paren = false +# ij_java_resource_list_right_paren_on_new_line = false +# ij_java_resource_list_wrap = off +# ij_java_rparen_on_new_line_in_record_header = false +# ij_java_session_dd_suffix = EJB +# ij_java_session_eb_suffix = Bean +# ij_java_session_hi_suffix = Home +# ij_java_session_lhi_prefix = Local +# ij_java_session_lhi_suffix = Home +# ij_java_session_li_prefix = Local +# ij_java_session_si_suffix = Service +# ij_java_space_after_closing_angle_bracket_in_type_argument = false +# ij_java_space_after_colon = true +# ij_java_space_after_comma = true +# ij_java_space_after_comma_in_type_arguments = true +# ij_java_space_after_for_semicolon = true +# ij_java_space_after_quest = true +# ij_java_space_after_type_cast = true +# ij_java_space_before_annotation_array_initializer_left_brace = false +# ij_java_space_before_annotation_parameter_list = false +# ij_java_space_before_array_initializer_left_brace = false +# ij_java_space_before_catch_keyword = true +# ij_java_space_before_catch_left_brace = true +# ij_java_space_before_catch_parentheses = true +# ij_java_space_before_class_left_brace = true +# ij_java_space_before_colon = true +# ij_java_space_before_colon_in_foreach = true +# ij_java_space_before_comma = false +# ij_java_space_before_do_left_brace = true +# ij_java_space_before_else_keyword = true +# ij_java_space_before_else_left_brace = true +# ij_java_space_before_finally_keyword = true +# ij_java_space_before_finally_left_brace = true +# ij_java_space_before_for_left_brace = true +# ij_java_space_before_for_parentheses = true +# ij_java_space_before_for_semicolon = false +# ij_java_space_before_if_left_brace = true +# ij_java_space_before_if_parentheses = true +# ij_java_space_before_method_call_parentheses = false +# ij_java_space_before_method_left_brace = true +# ij_java_space_before_method_parentheses = false +# ij_java_space_before_opening_angle_bracket_in_type_parameter = false +# ij_java_space_before_quest = true +# ij_java_space_before_switch_left_brace = true +# ij_java_space_before_switch_parentheses = true +# ij_java_space_before_synchronized_left_brace = true +# ij_java_space_before_synchronized_parentheses = true +# ij_java_space_before_try_left_brace = true +# ij_java_space_before_try_parentheses = true +# ij_java_space_before_type_parameter_list = false +# ij_java_space_before_while_keyword = true +# ij_java_space_before_while_left_brace = true +# ij_java_space_before_while_parentheses = true +# ij_java_space_inside_one_line_enum_braces = false +# ij_java_space_within_empty_array_initializer_braces = false +# ij_java_space_within_empty_method_call_parentheses = false +# ij_java_space_within_empty_method_parentheses = false +# ij_java_spaces_around_additive_operators = true +# ij_java_spaces_around_assignment_operators = true +# ij_java_spaces_around_bitwise_operators = true +# ij_java_spaces_around_equality_operators = true +# ij_java_spaces_around_lambda_arrow = true +# ij_java_spaces_around_logical_operators = true +# ij_java_spaces_around_method_ref_dbl_colon = false +# ij_java_spaces_around_multiplicative_operators = true +# ij_java_spaces_around_relational_operators = true +# ij_java_spaces_around_shift_operators = true +# ij_java_spaces_around_type_bounds_in_type_parameters = true +# ij_java_spaces_around_unary_operator = false +# ij_java_spaces_within_angle_brackets = false +# ij_java_spaces_within_annotation_parentheses = false +# ij_java_spaces_within_array_initializer_braces = false +# ij_java_spaces_within_braces = false +# ij_java_spaces_within_brackets = false +# ij_java_spaces_within_cast_parentheses = false +# ij_java_spaces_within_catch_parentheses = false +# ij_java_spaces_within_for_parentheses = false +# ij_java_spaces_within_if_parentheses = false +# ij_java_spaces_within_method_call_parentheses = false +# ij_java_spaces_within_method_parentheses = false +# ij_java_spaces_within_parentheses = false +# ij_java_spaces_within_switch_parentheses = false +# ij_java_spaces_within_synchronized_parentheses = false +# ij_java_spaces_within_try_parentheses = false +# ij_java_spaces_within_while_parentheses = false +# ij_java_special_else_if_treatment = true +# ij_java_subclass_name_suffix = Impl +# ij_java_ternary_operation_signs_on_next_line = false +# ij_java_ternary_operation_wrap = off +# ij_java_test_name_suffix = Test +# ij_java_throws_keyword_wrap = off +# ij_java_throws_list_wrap = off +# ij_java_use_external_annotations = false +# ij_java_use_fq_class_names = false +# ij_java_use_relative_indents = false +# ij_java_use_single_class_imports = true +ij_java_variable_annotation_wrap = normal +# ij_java_visibility = public +# ij_java_while_brace_force = never +# ij_java_while_on_new_line = false +# ij_java_wrap_comments = false +ij_java_wrap_first_method_in_call_chain = true +# ij_java_wrap_long_lines = false + +[*.xml] +indent_style = tab +indent_size = 4 + +[*.scala] +indent_style = space +indent_size = 2 + +[*.py] +indent_style = space +indent_size = 4 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..acbe217 --- /dev/null +++ b/.gitignore @@ -0,0 +1,38 @@ +.eslintcache +.cache +scalastyle-output.xml +.classpath +.idea/* +!.idea/vcs.xml +.metadata +.settings +.project +.version.properties +filter.properties +logs.zip +.mvn/wrapper/*.jar +target +tmp +*.class +*.iml +*.swp +*.jar +*.zip +*.log +*.pyc +.DS_Store +build-target +atlassian-ide-plugin.xml +out/ +/docs/api +/docs/.bundle +/docs/.rubydeps +/docs/ruby2/.bundle +/docs/ruby2/.rubydeps +/docs/.jekyll-metadata +*.ipr +*.iws +tools/flink +tools/flink-* +tools/releasing/release +tools/japicmp-output diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/NOTICE b/NOTICE new file mode 100644 index 0000000..123f92a --- /dev/null +++ b/NOTICE @@ -0,0 +1,17 @@ +Apache Flink Opensearch Connector +Copyright 2014-2022 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby +granted, provided that this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING +ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE +USE OR PERFORMANCE OF THIS SOFTWARE. + + + diff --git a/README.md b/README.md new file mode 100644 index 0000000..3a29737 --- /dev/null +++ b/README.md @@ -0,0 +1,69 @@ +# Apache Flink OpenSearch Connector + +This repository contains the official Apache Flink OpenSearch connector. + +## Apache Flink + +Apache Flink is an open source stream processing framework with powerful stream- and batch-processing capabilities. + +Learn more about Flink at [https://flink.apache.org/](https://flink.apache.org/) + +## Building the Apache Flink OpenSearch Connector from Source + +Prerequisites: + +* Unix-like environment (we use Linux, Mac OS X) +* Git +* Maven (we recommend version 3.8.6) +* Java 11 + +``` +git clone https://github.com/apache/flink-connector-opensearch.git +cd flink-connector-opensearch +./mvn clean package -DskipTests +``` + +The resulting jars can be found in the `target` directory of the respective module. + +## Developing Flink + +The Flink committers use IntelliJ IDEA to develop the Flink codebase. +We recommend IntelliJ IDEA for developing projects that involve Scala code. + +Minimal requirements for an IDE are: +* Support for Java and Scala (also mixed projects) +* Support for Maven with Java and Scala + +### IntelliJ IDEA + +The IntelliJ IDE supports Maven out of the box and offers a plugin for Scala development. + +* IntelliJ download: [https://www.jetbrains.com/idea/](https://www.jetbrains.com/idea/) +* IntelliJ Scala Plugin: [https://plugins.jetbrains.com/plugin/?id=1347](https://plugins.jetbrains.com/plugin/?id=1347) + +Check out our [Setting up IntelliJ](https://nightlies.apache.org/flink/flink-docs-master/flinkDev/ide_setup.html#intellij-idea) guide for details. + +## Support + +Don’t hesitate to ask! + +Contact the developers and community on the [mailing lists](https://flink.apache.org/community.html#mailing-lists) if you need any help. + +[Open an issue](https://issues.apache.org/jira/browse/FLINK) if you found a bug in Flink. + +## Documentation + +The documentation of Apache Flink is located on the website: [https://flink.apache.org](https://flink.apache.org) +or in the `docs/` directory of the source code. + +## Fork and Contribute + +This is an active open-source project. We are always open to people who want to use the system or contribute to it. +Contact us if you are looking for implementation tasks that fit your skills. +This article describes [how to contribute to Apache Flink](https://flink.apache.org/contributing/how-to-contribute.html). + +## About + +Apache Flink is an open source project of The Apache Software Foundation (ASF). +The Apache Flink project originated from the [Stratosphere](http://stratosphere.eu) research project. + diff --git a/flink-connector-opensearch-e2e-tests/pom.xml b/flink-connector-opensearch-e2e-tests/pom.xml new file mode 100644 index 0000000..242b579 --- /dev/null +++ b/flink-connector-opensearch-e2e-tests/pom.xml @@ -0,0 +1,104 @@ + + + + + 4.0.0 + + + org.apache.flink + flink-connector-opensearch-parent + 1.0.0-SNAPSHOT + .. + + + flink-connector-opensearch-e2e-tests + Flink : E2E Tests : Opensearch + jar + + + + org.apache.flink + flink-streaming-java + ${flink.version} + provided + + + org.apache.flink + flink-connector-opensearch + ${project.version} + + + org.apache.logging.log4j + log4j-api + ${log4j.version} + provided + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + package + + shade + + + opensearch-end-to-end-test + dependencies + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + copy + pre-integration-test + + copy + + + + + + + org.apache.flink + flink-connector-test-utils + ${flink.version} + flink-connector-test-utils.jar + jar + ${project.build.directory}/dependencies + + + + + + + + + diff --git a/flink-connector-opensearch-e2e-tests/src/main/java/org/apache/flink/streaming/tests/OpensearchSinkExample.java b/flink-connector-opensearch-e2e-tests/src/main/java/org/apache/flink/streaming/tests/OpensearchSinkExample.java new file mode 100644 index 0000000..c7fbca1 --- /dev/null +++ b/flink-connector-opensearch-e2e-tests/src/main/java/org/apache/flink/streaming/tests/OpensearchSinkExample.java @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.tests; + +import org.apache.flink.api.common.functions.FlatMapFunction; +import org.apache.flink.api.common.functions.RuntimeContext; +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.api.java.utils.ParameterTool; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.streaming.connectors.opensearch.ActionRequestFailureHandler; +import org.apache.flink.streaming.connectors.opensearch.OpensearchSink; +import org.apache.flink.streaming.connectors.opensearch.RequestIndexer; +import org.apache.flink.util.Collector; + +import org.apache.http.HttpHost; +import org.opensearch.action.ActionRequest; +import org.opensearch.action.index.IndexRequest; +import org.opensearch.action.update.UpdateRequest; +import org.opensearch.client.Requests; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** End to end test for OpensearchSink. */ +public class OpensearchSinkExample { + + public static void main(String[] args) throws Exception { + + final ParameterTool parameterTool = ParameterTool.fromArgs(args); + + if (parameterTool.getNumberOfParameters() < 2) { + System.out.println( + "Missing parameters!\n" + "Usage: --numRecords --index "); + return; + } + + final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + env.enableCheckpointing(5000); + + DataStream> source = + env.generateSequence(0, parameterTool.getInt("numRecords") - 1) + .flatMap( + new FlatMapFunction>() { + @Override + public void flatMap( + Long value, Collector> out) { + final String key = String.valueOf(value); + final String message = "message #" + value; + out.collect(Tuple2.of(key, message + "update #1")); + out.collect(Tuple2.of(key, message + "update #2")); + } + }); + + List httpHosts = new ArrayList<>(); + httpHosts.add(new HttpHost("127.0.0.1", 9200, "http")); + + OpensearchSink.Builder> osSinkBuilder = + new OpensearchSink.Builder<>( + httpHosts, + (Tuple2 element, + RuntimeContext ctx, + RequestIndexer indexer) -> { + indexer.add(createIndexRequest(element.f1, parameterTool)); + indexer.add(createUpdateRequest(element, parameterTool)); + }); + + osSinkBuilder.setFailureHandler( + new CustomFailureHandler(parameterTool.getRequired("index"))); + + // this instructs the sink to emit after every element, otherwise they would be buffered + osSinkBuilder.setBulkFlushMaxActions(1); + + source.addSink(osSinkBuilder.build()); + + env.execute("Opensearch end to end sink test example"); + } + + private static class CustomFailureHandler implements ActionRequestFailureHandler { + + private static final long serialVersionUID = 942269087742453482L; + + private final String index; + + CustomFailureHandler(String index) { + this.index = index; + } + + @Override + public void onFailure( + ActionRequest action, Throwable failure, int restStatusCode, RequestIndexer indexer) + throws Throwable { + if (action instanceof IndexRequest) { + Map json = new HashMap<>(); + json.put("data", ((IndexRequest) action).source()); + + indexer.add( + Requests.indexRequest() + .index(index) + .id(((IndexRequest) action).id()) + .source(json)); + } else { + throw new IllegalStateException("unexpected"); + } + } + } + + private static IndexRequest createIndexRequest(String element, ParameterTool parameterTool) { + Map json = new HashMap<>(); + json.put("data", element); + + String index; + if (element.startsWith("message #15")) { + index = ":intentional invalid index:"; + } else { + index = parameterTool.getRequired("index"); + } + + return Requests.indexRequest().index(index).id(element).source(json); + } + + private static UpdateRequest createUpdateRequest( + Tuple2 element, ParameterTool parameterTool) { + Map json = new HashMap<>(); + json.put("data", element.f1); + + return new UpdateRequest(parameterTool.getRequired("index"), element.f0) + .doc(json) + .upsert(json); + } +} diff --git a/flink-connector-opensearch/pom.xml b/flink-connector-opensearch/pom.xml new file mode 100644 index 0000000..662365b --- /dev/null +++ b/flink-connector-opensearch/pom.xml @@ -0,0 +1,185 @@ + + + + + 4.0.0 + + + org.apache.flink + flink-connector-opensearch-parent + 1.0.0-SNAPSHOT + .. + + + flink-connector-opensearch + Flink : Connectors : Opensearch + + jar + + + + 1.3.0 + + + + + + + + org.apache.flink + flink-connector-base + ${flink.version} + + + org.apache.flink + flink-streaming-java + ${flink.version} + provided + + + + + + + org.apache.flink + flink-table-api-java-bridge + ${flink.version} + provided + true + + + + + + org.opensearch + opensearch + ${opensearch.version} + + + + org.opensearch.client + opensearch-rest-high-level-client + ${opensearch.version} + + + org.apache.httpcomponents + httpcore-nio + + + + + + + org.apache.httpcomponents + httpcore-nio + 4.4.12 + + + + + org.apache.flink + flink-test-utils + ${flink.version} + test + + + + org.apache.flink + flink-runtime + ${flink.version} + test-jar + test + + + + org.apache.flink + flink-streaming-java + ${flink.version} + test + test-jar + + + + + org.apache.flink + flink-table-common + ${flink.version} + test-jar + test + + + + + org.apache.flink + flink-json + ${flink.version} + test + + + + + org.apache.flink + flink-table-test-utils + ${flink.version} + test + + + + + + org.apache.logging.log4j + log4j-api + provided + + + + org.apache.logging.log4j + log4j-core + test + + + + org.opensearch + opensearch-testcontainers + 1.0.0 + test + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + 1 + + + + + + diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/BulkProcessorBuilderFactory.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/BulkProcessorBuilderFactory.java new file mode 100644 index 0000000..ff284a6 --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/BulkProcessorBuilderFactory.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.sink; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.util.function.TriFunction; + +import org.opensearch.action.bulk.BulkProcessor; +import org.opensearch.client.RestHighLevelClient; + +import java.io.Serializable; + +@Internal +interface BulkProcessorBuilderFactory + extends Serializable, + TriFunction< + RestHighLevelClient, + BulkProcessorConfig, + BulkProcessor.Listener, + BulkProcessor.Builder> {} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/BulkProcessorConfig.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/BulkProcessorConfig.java new file mode 100644 index 0000000..07ed9a4 --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/BulkProcessorConfig.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.sink; + +import java.io.Serializable; + +import static org.apache.flink.util.Preconditions.checkNotNull; + +class BulkProcessorConfig implements Serializable { + + private final int bulkFlushMaxActions; + private final int bulkFlushMaxMb; + private final long bulkFlushInterval; + private final FlushBackoffType flushBackoffType; + private final int bulkFlushBackoffRetries; + private final long bulkFlushBackOffDelay; + + BulkProcessorConfig( + int bulkFlushMaxActions, + int bulkFlushMaxMb, + long bulkFlushInterval, + FlushBackoffType flushBackoffType, + int bulkFlushBackoffRetries, + long bulkFlushBackOffDelay) { + this.bulkFlushMaxActions = bulkFlushMaxActions; + this.bulkFlushMaxMb = bulkFlushMaxMb; + this.bulkFlushInterval = bulkFlushInterval; + this.flushBackoffType = checkNotNull(flushBackoffType); + this.bulkFlushBackoffRetries = bulkFlushBackoffRetries; + this.bulkFlushBackOffDelay = bulkFlushBackOffDelay; + } + + public int getBulkFlushMaxActions() { + return bulkFlushMaxActions; + } + + public int getBulkFlushMaxMb() { + return bulkFlushMaxMb; + } + + public long getBulkFlushInterval() { + return bulkFlushInterval; + } + + public FlushBackoffType getFlushBackoffType() { + return flushBackoffType; + } + + public int getBulkFlushBackoffRetries() { + return bulkFlushBackoffRetries; + } + + public long getBulkFlushBackOffDelay() { + return bulkFlushBackOffDelay; + } +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/BulkRequestConsumerFactory.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/BulkRequestConsumerFactory.java new file mode 100644 index 0000000..0eb3f68 --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/BulkRequestConsumerFactory.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.sink; + +import org.apache.flink.annotation.Internal; + +import org.opensearch.action.ActionListener; +import org.opensearch.action.bulk.BulkRequest; +import org.opensearch.action.bulk.BulkResponse; + +import java.util.function.BiConsumer; + +/** + * {@link BulkRequestConsumerFactory} is used to bridge incompatible Opensearch Java API calls + * across different Opensearch versions. + */ +@Internal +interface BulkRequestConsumerFactory + extends BiConsumer> {} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/FlushBackoffType.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/FlushBackoffType.java new file mode 100644 index 0000000..5b015ec --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/FlushBackoffType.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.sink; + +import org.apache.flink.annotation.PublicEvolving; + +/** + * Used to control whether the sink should retry failed requests at all or with which kind back off + * strategy. + */ +@PublicEvolving +public enum FlushBackoffType { + /** After every failure, it waits a configured time until the retries are exhausted. */ + CONSTANT, + /** + * After every failure, it waits initially the configured time and increases the waiting time + * exponentially until the retries are exhausted. + */ + EXPONENTIAL, + /** The failure is not retried. */ + NONE, +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/NetworkClientConfig.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/NetworkClientConfig.java new file mode 100644 index 0000000..cb9993d --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/NetworkClientConfig.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.sink; + +import javax.annotation.Nullable; + +import java.io.Serializable; + +class NetworkClientConfig implements Serializable { + + @Nullable private final String username; + @Nullable private final String password; + @Nullable private final String connectionPathPrefix; + @Nullable private final Integer connectionRequestTimeout; + @Nullable private final Integer connectionTimeout; + @Nullable private final Integer socketTimeout; + @Nullable private final Boolean allowInsecure; + + NetworkClientConfig( + @Nullable String username, + @Nullable String password, + @Nullable String connectionPathPrefix, + @Nullable Integer connectionRequestTimeout, + @Nullable Integer connectionTimeout, + @Nullable Integer socketTimeout, + @Nullable Boolean allowInsecure) { + this.username = username; + this.password = password; + this.connectionPathPrefix = connectionPathPrefix; + this.connectionRequestTimeout = connectionRequestTimeout; + this.connectionTimeout = connectionTimeout; + this.socketTimeout = socketTimeout; + this.allowInsecure = allowInsecure; + } + + @Nullable + public String getUsername() { + return username; + } + + @Nullable + public String getPassword() { + return password; + } + + @Nullable + public Integer getConnectionRequestTimeout() { + return connectionRequestTimeout; + } + + @Nullable + public Integer getConnectionTimeout() { + return connectionTimeout; + } + + @Nullable + public Integer getSocketTimeout() { + return socketTimeout; + } + + @Nullable + public String getConnectionPathPrefix() { + return connectionPathPrefix; + } + + @Nullable + public Boolean isAllowInsecure() { + return allowInsecure; + } +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchEmitter.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchEmitter.java new file mode 100644 index 0000000..1f40dc2 --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchEmitter.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.sink; + +import org.apache.flink.annotation.PublicEvolving; +import org.apache.flink.api.common.functions.Function; +import org.apache.flink.api.connector.sink2.SinkWriter; + +import org.opensearch.action.ActionRequest; + +/** + * Creates none or multiple {@link ActionRequest ActionRequests} from the incoming elements. + * + *

This is used by sinks to prepare elements for sending them to Opensearch. + * + *

Example: + * + *

{@code
+ * private static class TestOpensearchEmitter implements OpensearchEmitter> {
+ *
+ *     public IndexRequest createIndexRequest(Tuple2 element) {
+ *         Map document = new HashMap<>();
+ * 		   document.put("data", element.f1);
+ *
+ * 	       return Requests.indexRequest()
+ * 		       .index("my-index")
+ * 		       .id(element.f0.toString())
+ * 		       .source(document);
+ *     }
+ *
+ * 	   public void emit(Tuple2 element, RequestIndexer indexer) {
+ * 	       indexer.add(createIndexRequest(element));
+ *     }
+ * }
+ *
+ * }
+ * + * @param The type of the element handled by this {@link OpensearchEmitter} + */ +@PublicEvolving +public interface OpensearchEmitter extends Function { + /** + * Initialization method for the function. It is called once before the actual working process + * methods. + */ + default void open() throws Exception {} + + /** Tear-down method for the function. It is called when the sink closes. */ + default void close() throws Exception {} + + /** + * Process the incoming element to produce multiple {@link ActionRequest ActionRequests}. The + * produced requests should be added to the provided {@link RequestIndexer}. + * + * @param element incoming element to process + * @param context to access additional information about the record + * @param indexer request indexer that {@code ActionRequest} should be added to + */ + void emit(T element, SinkWriter.Context context, RequestIndexer indexer); +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSink.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSink.java new file mode 100644 index 0000000..fd0733a --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSink.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.sink; + +import org.apache.flink.annotation.PublicEvolving; +import org.apache.flink.api.connector.sink2.Sink; +import org.apache.flink.api.connector.sink2.SinkWriter; +import org.apache.flink.connector.base.DeliveryGuarantee; + +import org.apache.http.HttpHost; + +import java.io.IOException; +import java.util.List; + +import static org.apache.flink.util.Preconditions.checkArgument; +import static org.apache.flink.util.Preconditions.checkNotNull; + +/** + * Flink Sink to insert or update data in an Opensearch index. The sink supports the following + * delivery guarantees. + * + *
    + *
  • {@link DeliveryGuarantee#NONE} does not provide any guarantees: actions are flushed to + * Opensearch only depending on the configurations of the bulk processor. In case of a + * failure, it might happen that actions are lost if the bulk processor still has buffered + * actions. + *
  • {@link DeliveryGuarantee#AT_LEAST_ONCE} on a checkpoint the sink will wait until all + * buffered actions are flushed to and acknowledged by Opensearch. No actions will be lost but + * actions might be sent to Opensearch multiple times when Flink restarts. These additional + * requests may cause inconsistent data in Opensearch right after the restart, but eventually + * everything will be consistent again. + *
+ * + * @param type of the records converted to Opensearch actions + * @see OpensearchSinkBuilder on how to construct a OpensearchSink + */ +@PublicEvolving +public class OpensearchSink implements Sink { + + private final List hosts; + private final OpensearchEmitter emitter; + private final BulkProcessorConfig buildBulkProcessorConfig; + private final BulkProcessorBuilderFactory bulkProcessorBuilderFactory; + private final NetworkClientConfig networkClientConfig; + private final DeliveryGuarantee deliveryGuarantee; + + OpensearchSink( + List hosts, + OpensearchEmitter emitter, + DeliveryGuarantee deliveryGuarantee, + BulkProcessorBuilderFactory bulkProcessorBuilderFactory, + BulkProcessorConfig buildBulkProcessorConfig, + NetworkClientConfig networkClientConfig) { + this.hosts = checkNotNull(hosts); + this.bulkProcessorBuilderFactory = checkNotNull(bulkProcessorBuilderFactory); + checkArgument(!hosts.isEmpty(), "Hosts cannot be empty."); + this.emitter = checkNotNull(emitter); + this.deliveryGuarantee = checkNotNull(deliveryGuarantee); + this.buildBulkProcessorConfig = checkNotNull(buildBulkProcessorConfig); + this.networkClientConfig = checkNotNull(networkClientConfig); + } + + @Override + public SinkWriter createWriter(InitContext context) throws IOException { + return new OpensearchWriter<>( + hosts, + emitter, + deliveryGuarantee == DeliveryGuarantee.AT_LEAST_ONCE, + buildBulkProcessorConfig, + bulkProcessorBuilderFactory, + networkClientConfig, + context.metricGroup(), + context.getMailboxExecutor()); + } +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkBuilder.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkBuilder.java new file mode 100644 index 0000000..04ffbf7 --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkBuilder.java @@ -0,0 +1,449 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.sink; + +import org.apache.flink.annotation.PublicEvolving; +import org.apache.flink.api.common.ExecutionConfig; +import org.apache.flink.api.java.ClosureCleaner; +import org.apache.flink.connector.base.DeliveryGuarantee; +import org.apache.flink.util.InstantiationUtil; + +import org.apache.http.HttpHost; +import org.opensearch.action.ActionListener; +import org.opensearch.action.bulk.BackoffPolicy; +import org.opensearch.action.bulk.BulkProcessor; +import org.opensearch.action.bulk.BulkRequest; +import org.opensearch.action.bulk.BulkResponse; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.common.unit.ByteSizeUnit; +import org.opensearch.common.unit.ByteSizeValue; +import org.opensearch.common.unit.TimeValue; + +import java.util.Arrays; +import java.util.List; + +import static org.apache.flink.util.Preconditions.checkArgument; +import static org.apache.flink.util.Preconditions.checkNotNull; +import static org.apache.flink.util.Preconditions.checkState; + +/** + * Builder to construct an Opensearch compatible {@link OpensearchSink}. + * + *

The following example shows the minimal setup to create a OpensearchSink that submits actions + * on checkpoint or the default number of actions was buffered (1000). + * + *

{@code
+ * OpensearchSink sink = new OpensearchSinkBuilder()
+ *     .setHosts(new HttpHost("localhost:9200")
+ *     .setEmitter((element, context, indexer) -> {
+ *          indexer.add(
+ *              new IndexRequest("my-index")
+ *              .id(element.f0.toString())
+ *              .source(element.f1)
+ *          );
+ *      })
+ *     .setDeliveryGuarantee(DeliveryGuarantee.AT_LEAST_ONCE)
+ *     .build();
+ * }
+ * + * @param type of the records converted to Opensearch actions + */ +@PublicEvolving +public class OpensearchSinkBuilder { + + private int bulkFlushMaxActions = 1000; + private int bulkFlushMaxMb = -1; + private long bulkFlushInterval = -1; + private FlushBackoffType bulkFlushBackoffType = FlushBackoffType.NONE; + private int bulkFlushBackoffRetries = -1; + private long bulkFlushBackOffDelay = -1; + private DeliveryGuarantee deliveryGuarantee = DeliveryGuarantee.NONE; + private List hosts; + protected OpensearchEmitter emitter; + private String username; + private String password; + private String connectionPathPrefix; + private Integer connectionTimeout; + private Integer connectionRequestTimeout; + private Integer socketTimeout; + private Boolean allowInsecure; + + public OpensearchSinkBuilder() {} + + @SuppressWarnings("unchecked") + protected > S self() { + return (S) this; + } + + /** + * Sets the emitter which is invoked on every record to convert it to Opensearch actions. + * + * @param emitter to process records into Opensearch actions. + * @return this builder + */ + public OpensearchSinkBuilder setEmitter( + OpensearchEmitter emitter) { + checkNotNull(emitter); + checkState( + InstantiationUtil.isSerializable(emitter), + "The Opensearch emitter must be serializable."); + + final OpensearchSinkBuilder self = self(); + self.emitter = emitter; + return self; + } + + /** + * Sets the hosts where the Opensearch cluster nodes are reachable. + * + * @param hosts http addresses describing the node locations + * @return this builder + */ + public OpensearchSinkBuilder setHosts(HttpHost... hosts) { + checkNotNull(hosts); + checkState(hosts.length > 0, "Hosts cannot be empty."); + this.hosts = Arrays.asList(hosts); + return self(); + } + + /** + * Sets the wanted {@link DeliveryGuarantee}. The default delivery guarantee is {@link + * DeliveryGuarantee#NONE} + * + * @param deliveryGuarantee which describes the record emission behaviour + * @return this builder + */ + public OpensearchSinkBuilder setDeliveryGuarantee(DeliveryGuarantee deliveryGuarantee) { + checkState( + deliveryGuarantee != DeliveryGuarantee.EXACTLY_ONCE, + "Opensearch sink does not support the EXACTLY_ONCE guarantee."); + this.deliveryGuarantee = checkNotNull(deliveryGuarantee); + return self(); + } + + /** + * Sets the maximum number of actions to buffer for each bulk request. You can pass -1 to + * disable it. The default flush size 1000. + * + * @param numMaxActions the maximum number of actions to buffer per bulk request. + * @return this builder + */ + public OpensearchSinkBuilder setBulkFlushMaxActions(int numMaxActions) { + checkState( + numMaxActions == -1 || numMaxActions > 0, + "Max number of buffered actions must be larger than 0."); + this.bulkFlushMaxActions = numMaxActions; + return self(); + } + + /** + * Sets the maximum size of buffered actions, in mb, per bulk request. You can pass -1 to + * disable it. + * + * @param maxSizeMb the maximum size of buffered actions, in mb. + * @return this builder + */ + public OpensearchSinkBuilder setBulkFlushMaxSizeMb(int maxSizeMb) { + checkState( + maxSizeMb == -1 || maxSizeMb > 0, + "Max size of buffered actions must be larger than 0."); + this.bulkFlushMaxMb = maxSizeMb; + return self(); + } + + /** + * Sets the bulk flush interval, in milliseconds. You can pass -1 to disable it. + * + * @param intervalMillis the bulk flush interval, in milliseconds. + * @return this builder + */ + public OpensearchSinkBuilder setBulkFlushInterval(long intervalMillis) { + checkState( + intervalMillis == -1 || intervalMillis >= 0, + "Interval (in milliseconds) between each flush must be larger than " + + "or equal to 0."); + this.bulkFlushInterval = intervalMillis; + return self(); + } + + /** + * Sets the type of back off to use when flushing bulk requests. The default bulk flush back off + * type is {@link FlushBackoffType#NONE}. + * + *

Sets the amount of delay between each backoff attempt when flushing bulk requests, in + * milliseconds. + * + *

Sets the maximum number of retries for a backoff attempt when flushing bulk requests. + * + * @param flushBackoffType the backoff type to use. + * @return this builder + */ + public OpensearchSinkBuilder setBulkFlushBackoffStrategy( + FlushBackoffType flushBackoffType, int maxRetries, long delayMillis) { + this.bulkFlushBackoffType = checkNotNull(flushBackoffType); + checkState( + flushBackoffType != FlushBackoffType.NONE, + "FlushBackoffType#NONE does not require a configuration it is the default, retries and delay are ignored."); + checkState(maxRetries > 0, "Max number of backoff attempts must be larger than 0."); + this.bulkFlushBackoffRetries = maxRetries; + checkState( + delayMillis >= 0, + "Delay (in milliseconds) between each backoff attempt must be larger " + + "than or equal to 0."); + this.bulkFlushBackOffDelay = delayMillis; + return self(); + } + + /** + * Sets the username used to authenticate the connection with the Opensearch cluster. + * + * @param username of the Opensearch cluster user + * @return this builder + */ + public OpensearchSinkBuilder setConnectionUsername(String username) { + checkNotNull(username); + this.username = username; + return self(); + } + + /** + * Sets the password used to authenticate the conection with the Opensearch cluster. + * + * @param password of the Opensearch cluster user + * @return this builder + */ + public OpensearchSinkBuilder setConnectionPassword(String password) { + checkNotNull(password); + this.password = password; + return self(); + } + + /** + * Sets a prefix which used for every REST communication to the Opensearch cluster. + * + * @param prefix for the communication + * @return this builder + */ + public OpensearchSinkBuilder setConnectionPathPrefix(String prefix) { + checkNotNull(prefix); + this.connectionPathPrefix = prefix; + return self(); + } + + /** + * Sets the timeout for requesting the connection of the Opensearch cluster from the connection + * manager. + * + * @param timeout for the connection request + * @return this builder + */ + public OpensearchSinkBuilder setConnectionRequestTimeout(int timeout) { + checkState(timeout >= 0, "Connection request timeout must be larger than or equal to 0."); + this.connectionRequestTimeout = timeout; + return self(); + } + + /** + * Sets the timeout for establishing a connection of the Opensearch cluster. + * + * @param timeout for the connection + * @return this builder + */ + public OpensearchSinkBuilder setConnectionTimeout(int timeout) { + checkState(timeout >= 0, "Connection timeout must be larger than or equal to 0."); + this.connectionTimeout = timeout; + return self(); + } + + /** + * Sets the timeout for waiting for data or, put differently, a maximum period inactivity + * between two consecutive data packets. + * + * @param timeout for the socket + * @return this builder + */ + public OpensearchSinkBuilder setSocketTimeout(int timeout) { + checkState(timeout >= 0, "Socket timeout must be larger than or equal to 0."); + this.socketTimeout = timeout; + return self(); + } + + /** + * Allows to bypass the certificates chain validation and connect to insecure network endpoints + * (for example, servers which use self-signed certificates). + * + * @param allowInsecure allow or not to insecure network endpoints + * @return this builder + */ + public OpensearchSinkBuilder setAllowInsecure(boolean allowInsecure) { + this.allowInsecure = allowInsecure; + return self(); + } + + protected BulkProcessorBuilderFactory getBulkProcessorBuilderFactory() { + return new BulkProcessorBuilderFactory() { + @Override + public BulkProcessor.Builder apply( + RestHighLevelClient client, + BulkProcessorConfig bulkProcessorConfig, + BulkProcessor.Listener listener) { + BulkProcessor.Builder builder = + BulkProcessor.builder( + new BulkRequestConsumerFactory() { // This cannot be inlined as a + // lambda because then + // deserialization fails + @Override + public void accept( + BulkRequest bulkRequest, + ActionListener + bulkResponseActionListener) { + client.bulkAsync( + bulkRequest, + RequestOptions.DEFAULT, + bulkResponseActionListener); + } + }, + listener); + + if (bulkProcessorConfig.getBulkFlushMaxActions() != -1) { + builder.setBulkActions(bulkProcessorConfig.getBulkFlushMaxActions()); + } + + if (bulkProcessorConfig.getBulkFlushMaxMb() != -1) { + builder.setBulkSize( + new ByteSizeValue( + bulkProcessorConfig.getBulkFlushMaxMb(), ByteSizeUnit.MB)); + } + + if (bulkProcessorConfig.getBulkFlushInterval() != -1) { + builder.setFlushInterval( + new TimeValue(bulkProcessorConfig.getBulkFlushInterval())); + } + + BackoffPolicy backoffPolicy; + final TimeValue backoffDelay = + new TimeValue(bulkProcessorConfig.getBulkFlushBackOffDelay()); + final int maxRetryCount = bulkProcessorConfig.getBulkFlushBackoffRetries(); + switch (bulkProcessorConfig.getFlushBackoffType()) { + case CONSTANT: + backoffPolicy = BackoffPolicy.constantBackoff(backoffDelay, maxRetryCount); + break; + case EXPONENTIAL: + backoffPolicy = + BackoffPolicy.exponentialBackoff(backoffDelay, maxRetryCount); + break; + case NONE: + backoffPolicy = BackoffPolicy.noBackoff(); + break; + default: + throw new IllegalArgumentException( + "Received unknown backoff policy type " + + bulkProcessorConfig.getFlushBackoffType()); + } + builder.setBackoffPolicy(backoffPolicy); + return builder; + } + }; + } + + /** + * Constructs the {@link OpensearchSink} with the properties configured this builder. + * + * @return {@link OpensearchSink} + */ + public OpensearchSink build() { + checkNotNull(emitter); + checkNotNull(hosts); + + NetworkClientConfig networkClientConfig = buildNetworkClientConfig(); + BulkProcessorConfig bulkProcessorConfig = buildBulkProcessorConfig(); + + BulkProcessorBuilderFactory bulkProcessorBuilderFactory = getBulkProcessorBuilderFactory(); + ClosureCleaner.clean( + bulkProcessorBuilderFactory, ExecutionConfig.ClosureCleanerLevel.RECURSIVE, true); + + return new OpensearchSink<>( + hosts, + emitter, + deliveryGuarantee, + bulkProcessorBuilderFactory, + bulkProcessorConfig, + networkClientConfig); + } + + private NetworkClientConfig buildNetworkClientConfig() { + checkArgument(!hosts.isEmpty(), "Hosts cannot be empty."); + + return new NetworkClientConfig( + username, + password, + connectionPathPrefix, + connectionRequestTimeout, + connectionTimeout, + socketTimeout, + allowInsecure); + } + + private BulkProcessorConfig buildBulkProcessorConfig() { + return new BulkProcessorConfig( + bulkFlushMaxActions, + bulkFlushMaxMb, + bulkFlushInterval, + bulkFlushBackoffType, + bulkFlushBackoffRetries, + bulkFlushBackOffDelay); + } + + @Override + public String toString() { + return "OpensearchSinkBuilder{" + + "bulkFlushMaxActions=" + + bulkFlushMaxActions + + ", bulkFlushMaxMb=" + + bulkFlushMaxMb + + ", bulkFlushInterval=" + + bulkFlushInterval + + ", bulkFlushBackoffType=" + + bulkFlushBackoffType + + ", bulkFlushBackoffRetries=" + + bulkFlushBackoffRetries + + ", bulkFlushBackOffDelay=" + + bulkFlushBackOffDelay + + ", deliveryGuarantee=" + + deliveryGuarantee + + ", hosts=" + + hosts + + ", emitter=" + + emitter + + ", username='" + + username + + '\'' + + ", password='" + + password + + '\'' + + ", connectionPathPrefix='" + + connectionPathPrefix + + '\'' + + ", allowInsecure='" + + allowInsecure + + '\'' + + '}'; + } +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchWriter.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchWriter.java new file mode 100644 index 0000000..17d1d42 --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchWriter.java @@ -0,0 +1,348 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.sink; + +import org.apache.flink.annotation.VisibleForTesting; +import org.apache.flink.api.common.operators.MailboxExecutor; +import org.apache.flink.api.connector.sink2.SinkWriter; +import org.apache.flink.metrics.Counter; +import org.apache.flink.metrics.groups.SinkWriterMetricGroup; +import org.apache.flink.util.FlinkRuntimeException; +import org.apache.flink.util.function.ThrowingRunnable; + +import org.apache.http.HttpHost; +import org.apache.http.auth.AuthScope; +import org.apache.http.auth.UsernamePasswordCredentials; +import org.apache.http.client.CredentialsProvider; +import org.apache.http.conn.ssl.TrustAllStrategy; +import org.apache.http.impl.client.BasicCredentialsProvider; +import org.apache.http.ssl.SSLContexts; +import org.opensearch.action.DocWriteRequest; +import org.opensearch.action.bulk.BulkItemResponse; +import org.opensearch.action.bulk.BulkProcessor; +import org.opensearch.action.bulk.BulkRequest; +import org.opensearch.action.bulk.BulkResponse; +import org.opensearch.action.delete.DeleteRequest; +import org.opensearch.action.index.IndexRequest; +import org.opensearch.action.update.UpdateRequest; +import org.opensearch.client.RestClient; +import org.opensearch.client.RestClientBuilder; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.rest.RestStatus; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.security.KeyManagementException; +import java.security.KeyStoreException; +import java.security.NoSuchAlgorithmException; +import java.util.List; + +import static org.apache.flink.util.ExceptionUtils.firstOrSuppressed; +import static org.apache.flink.util.Preconditions.checkNotNull; + +class OpensearchWriter implements SinkWriter { + + private static final Logger LOG = LoggerFactory.getLogger(OpensearchWriter.class); + + private final OpensearchEmitter emitter; + private final MailboxExecutor mailboxExecutor; + private final boolean flushOnCheckpoint; + private final BulkProcessor bulkProcessor; + private final RestHighLevelClient client; + private final RequestIndexer requestIndexer; + private final Counter numBytesOutCounter; + + private long pendingActions = 0; + private boolean checkpointInProgress = false; + private volatile long lastSendTime = 0; + private volatile long ackTime = Long.MAX_VALUE; + private volatile boolean closed = false; + + /** + * Constructor creating an Opensearch writer. + * + * @param hosts the reachable Opensearch cluster nodes + * @param emitter converting incoming records to Opensearch actions + * @param flushOnCheckpoint if true all until now received records are flushed after every + * checkpoint + * @param bulkProcessorConfig describing the flushing and failure handling of the used {@link + * BulkProcessor} + * @param bulkProcessorBuilderFactory configuring the {@link BulkProcessor}'s builder + * @param networkClientConfig describing properties of the network connection used to connect to + * the Opensearch cluster + * @param metricGroup for the sink writer + * @param mailboxExecutor Flink's mailbox executor + */ + OpensearchWriter( + List hosts, + OpensearchEmitter emitter, + boolean flushOnCheckpoint, + BulkProcessorConfig bulkProcessorConfig, + BulkProcessorBuilderFactory bulkProcessorBuilderFactory, + NetworkClientConfig networkClientConfig, + SinkWriterMetricGroup metricGroup, + MailboxExecutor mailboxExecutor) { + this.emitter = checkNotNull(emitter); + this.flushOnCheckpoint = flushOnCheckpoint; + this.mailboxExecutor = checkNotNull(mailboxExecutor); + this.client = + new RestHighLevelClient( + configureRestClientBuilder( + RestClient.builder(hosts.toArray(new HttpHost[0])), + networkClientConfig)); + this.bulkProcessor = createBulkProcessor(bulkProcessorBuilderFactory, bulkProcessorConfig); + this.requestIndexer = new DefaultRequestIndexer(); + checkNotNull(metricGroup); + metricGroup.setCurrentSendTimeGauge(() -> ackTime - lastSendTime); + this.numBytesOutCounter = metricGroup.getIOMetricGroup().getNumBytesOutCounter(); + try { + emitter.open(); + } catch (Exception e) { + throw new FlinkRuntimeException("Failed to open the OpensearchEmitter", e); + } + } + + @Override + public void write(IN element, Context context) throws IOException, InterruptedException { + // do not allow new bulk writes until all actions are flushed + while (checkpointInProgress) { + mailboxExecutor.yield(); + } + emitter.emit(element, context, requestIndexer); + } + + @Override + public void flush(boolean flush) throws IOException, InterruptedException { + checkpointInProgress = true; + while (pendingActions != 0 && (flushOnCheckpoint || flush)) { + bulkProcessor.flush(); + LOG.info("Waiting for the response of {} pending actions.", pendingActions); + mailboxExecutor.yield(); + } + checkpointInProgress = false; + } + + @VisibleForTesting + void blockingFlushAllActions() throws InterruptedException { + while (pendingActions != 0) { + bulkProcessor.flush(); + LOG.info("Waiting for the response of {} pending actions.", pendingActions); + mailboxExecutor.yield(); + } + } + + @Override + public void close() throws Exception { + closed = true; + emitter.close(); + bulkProcessor.close(); + client.close(); + } + + private static RestClientBuilder configureRestClientBuilder( + RestClientBuilder builder, NetworkClientConfig networkClientConfig) { + if (networkClientConfig.getConnectionPathPrefix() != null) { + builder.setPathPrefix(networkClientConfig.getConnectionPathPrefix()); + } + + builder.setHttpClientConfigCallback( + httpClientBuilder -> { + if (networkClientConfig.getPassword() != null + && networkClientConfig.getUsername() != null) { + final CredentialsProvider credentialsProvider = + new BasicCredentialsProvider(); + credentialsProvider.setCredentials( + AuthScope.ANY, + new UsernamePasswordCredentials( + networkClientConfig.getUsername(), + networkClientConfig.getPassword())); + + httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider); + } + + if (networkClientConfig.isAllowInsecure() != null + && networkClientConfig.isAllowInsecure()) { + try { + httpClientBuilder.setSSLContext( + SSLContexts.custom() + .loadTrustMaterial(new TrustAllStrategy()) + .build()); + } catch (final NoSuchAlgorithmException + | KeyStoreException + | KeyManagementException ex) { + throw new IllegalStateException( + "Unable to create custom SSL context", ex); + } + } + + return httpClientBuilder; + }); + if (networkClientConfig.getConnectionRequestTimeout() != null + || networkClientConfig.getConnectionTimeout() != null + || networkClientConfig.getSocketTimeout() != null) { + builder.setRequestConfigCallback( + requestConfigBuilder -> { + if (networkClientConfig.getConnectionRequestTimeout() != null) { + requestConfigBuilder.setConnectionRequestTimeout( + networkClientConfig.getConnectionRequestTimeout()); + } + if (networkClientConfig.getConnectionTimeout() != null) { + requestConfigBuilder.setConnectTimeout( + networkClientConfig.getConnectionTimeout()); + } + if (networkClientConfig.getSocketTimeout() != null) { + requestConfigBuilder.setSocketTimeout( + networkClientConfig.getSocketTimeout()); + } + return requestConfigBuilder; + }); + } + return builder; + } + + private BulkProcessor createBulkProcessor( + BulkProcessorBuilderFactory bulkProcessorBuilderFactory, + BulkProcessorConfig bulkProcessorConfig) { + + BulkProcessor.Builder builder = + bulkProcessorBuilderFactory.apply(client, bulkProcessorConfig, new BulkListener()); + + // This makes flush() blocking + builder.setConcurrentRequests(0); + + return builder.build(); + } + + private class BulkListener implements BulkProcessor.Listener { + + @Override + public void beforeBulk(long executionId, BulkRequest request) { + LOG.info("Sending bulk of {} actions to Opensearch.", request.numberOfActions()); + lastSendTime = System.currentTimeMillis(); + numBytesOutCounter.inc(request.estimatedSizeInBytes()); + } + + @Override + public void afterBulk(long executionId, BulkRequest request, BulkResponse response) { + ackTime = System.currentTimeMillis(); + enqueueActionInMailbox( + () -> extractFailures(request, response), "opensearchSuccessCallback"); + } + + @Override + public void afterBulk(long executionId, BulkRequest request, Throwable failure) { + enqueueActionInMailbox( + () -> { + throw new FlinkRuntimeException("Complete bulk has failed.", failure); + }, + "opensearchErrorCallback"); + } + } + + private void enqueueActionInMailbox( + ThrowingRunnable action, String actionName) { + // If the writer is cancelled before the last bulk response (i.e. no flush on checkpoint + // configured or shutdown without a final + // checkpoint) the mailbox might already be shutdown, so we should not enqueue any + // actions. + if (isClosed()) { + return; + } + mailboxExecutor.execute(action, actionName); + } + + private void extractFailures(BulkRequest request, BulkResponse response) { + if (!response.hasFailures()) { + pendingActions -= request.numberOfActions(); + return; + } + + Throwable chainedFailures = null; + for (int i = 0; i < response.getItems().length; i++) { + final BulkItemResponse itemResponse = response.getItems()[i]; + if (!itemResponse.isFailed()) { + continue; + } + final Throwable failure = itemResponse.getFailure().getCause(); + if (failure == null) { + continue; + } + final RestStatus restStatus = itemResponse.getFailure().getStatus(); + final DocWriteRequest actionRequest = request.requests().get(i); + + chainedFailures = + firstOrSuppressed( + wrapException(restStatus, failure, actionRequest), chainedFailures); + } + if (chainedFailures == null) { + return; + } + throw new FlinkRuntimeException(chainedFailures); + } + + private static Throwable wrapException( + RestStatus restStatus, Throwable rootFailure, DocWriteRequest actionRequest) { + if (restStatus == null) { + return new FlinkRuntimeException( + String.format("Single action %s of bulk request failed.", actionRequest), + rootFailure); + } else { + return new FlinkRuntimeException( + String.format( + "Single action %s of bulk request failed with status %s.", + actionRequest, restStatus.getStatus()), + rootFailure); + } + } + + private boolean isClosed() { + if (closed) { + LOG.warn("Writer was closed before all records were acknowledged by Opensearch."); + } + return closed; + } + + private class DefaultRequestIndexer implements RequestIndexer { + + @Override + public void add(DeleteRequest... deleteRequests) { + for (final DeleteRequest deleteRequest : deleteRequests) { + pendingActions++; + bulkProcessor.add(deleteRequest); + } + } + + @Override + public void add(IndexRequest... indexRequests) { + for (final IndexRequest indexRequest : indexRequests) { + pendingActions++; + bulkProcessor.add(indexRequest); + } + } + + @Override + public void add(UpdateRequest... updateRequests) { + for (final UpdateRequest updateRequest : updateRequests) { + pendingActions++; + bulkProcessor.add(updateRequest); + } + } + } +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/RequestIndexer.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/RequestIndexer.java new file mode 100644 index 0000000..227a526 --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/RequestIndexer.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.sink; + +import org.apache.flink.annotation.Internal; + +import org.opensearch.action.delete.DeleteRequest; +import org.opensearch.action.index.IndexRequest; +import org.opensearch.action.update.UpdateRequest; + +/** + * Users add multiple delete, index or update requests to a {@link RequestIndexer} to prepare them + * for sending to an Opensearch cluster. + */ +@Internal +public interface RequestIndexer { + /** + * Add multiple {@link DeleteRequest} to the indexer to prepare for sending requests to + * Opensearch. + * + * @param deleteRequests The multiple {@link DeleteRequest} to add. + */ + void add(DeleteRequest... deleteRequests); + + /** + * Add multiple {@link IndexRequest} to the indexer to prepare for sending requests to + * Opensearch. + * + * @param indexRequests The multiple {@link IndexRequest} to add. + */ + void add(IndexRequest... indexRequests); + + /** + * Add multiple {@link UpdateRequest} to the indexer to prepare for sending requests to + * Opensearch. + * + * @param updateRequests The multiple {@link UpdateRequest} to add. + */ + void add(UpdateRequest... updateRequests); +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/AbstractTimeIndexGenerator.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/AbstractTimeIndexGenerator.java new file mode 100644 index 0000000..80a7a8c --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/AbstractTimeIndexGenerator.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.table; + +import org.apache.flink.annotation.Internal; + +import java.time.format.DateTimeFormatter; + +/** Abstract class for time related {@link IndexGenerator}. */ +@Internal +abstract class AbstractTimeIndexGenerator extends IndexGeneratorBase { + + private final String dateTimeFormat; + protected transient DateTimeFormatter dateTimeFormatter; + + public AbstractTimeIndexGenerator(String index, String dateTimeFormat) { + super(index); + this.dateTimeFormat = dateTimeFormat; + } + + @Override + public void open() { + this.dateTimeFormatter = DateTimeFormatter.ofPattern(dateTimeFormat); + } +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/IndexGenerator.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/IndexGenerator.java new file mode 100644 index 0000000..cd821fc --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/IndexGenerator.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.table; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.table.data.RowData; +import org.apache.flink.types.Row; + +import java.io.Serializable; + +/** This interface is responsible to generate index name from given {@link Row} record. */ +@Internal +interface IndexGenerator extends Serializable { + + /** + * Initialize the index generator, this will be called only once before {@link + * #generate(RowData)} is called. + */ + default void open() {} + + /** Generate index name according to the given row. */ + String generate(RowData row); +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/IndexGeneratorBase.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/IndexGeneratorBase.java new file mode 100644 index 0000000..e5298c1 --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/IndexGeneratorBase.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.table; + +import org.apache.flink.annotation.Internal; + +import java.util.Objects; + +/** Base class for {@link IndexGenerator}. */ +@Internal +public abstract class IndexGeneratorBase implements IndexGenerator { + + private static final long serialVersionUID = 1L; + protected final String index; + + public IndexGeneratorBase(String index) { + this.index = index; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof IndexGeneratorBase)) { + return false; + } + IndexGeneratorBase that = (IndexGeneratorBase) o; + return index.equals(that.index); + } + + @Override + public int hashCode() { + return Objects.hash(index); + } +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/IndexGeneratorFactory.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/IndexGeneratorFactory.java new file mode 100644 index 0000000..e87c41f --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/IndexGeneratorFactory.java @@ -0,0 +1,278 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.table; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.table.api.TableException; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.TimestampData; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.LogicalTypeRoot; + +import javax.annotation.Nonnull; + +import java.io.Serializable; +import java.time.LocalDate; +import java.time.LocalTime; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Factory of {@link IndexGenerator}. + * + *

Flink supports both static index and dynamic index. + * + *

If you want to have a static index, this option value should be a plain string, e.g. + * 'myusers', all the records will be consistently written into "myusers" index. + * + *

If you want to have a dynamic index, you can use '{field_name}' to reference a field value in + * the record to dynamically generate a target index. You can also use + * '{field_name|date_format_string}' to convert a field value of TIMESTAMP/DATE/TIME type into the + * format specified by date_format_string. The date_format_string is compatible with {@link + * java.text.SimpleDateFormat}. For example, if the option value is 'myusers_{log_ts|yyyy-MM-dd}', + * then a record with log_ts field value 2020-03-27 12:25:55 will be written into + * "myusers_2020-03-27" index. + */ +@Internal +final class IndexGeneratorFactory { + + private IndexGeneratorFactory() {} + + public static IndexGenerator createIndexGenerator( + String index, List fieldNames, List dataTypes) { + final IndexHelper indexHelper = new IndexHelper(); + if (indexHelper.checkIsDynamicIndex(index)) { + return createRuntimeIndexGenerator( + index, + fieldNames.toArray(new String[0]), + dataTypes.toArray(new DataType[0]), + indexHelper); + } else { + return new StaticIndexGenerator(index); + } + } + + interface DynamicFormatter extends Serializable { + String format(@Nonnull Object fieldValue, DateTimeFormatter formatter); + } + + private static IndexGenerator createRuntimeIndexGenerator( + String index, String[] fieldNames, DataType[] fieldTypes, IndexHelper indexHelper) { + final String dynamicIndexPatternStr = indexHelper.extractDynamicIndexPatternStr(index); + final String indexPrefix = index.substring(0, index.indexOf(dynamicIndexPatternStr)); + final String indexSuffix = + index.substring(indexPrefix.length() + dynamicIndexPatternStr.length()); + + final boolean isDynamicIndexWithFormat = indexHelper.checkIsDynamicIndexWithFormat(index); + final int indexFieldPos = + indexHelper.extractIndexFieldPos(index, fieldNames, isDynamicIndexWithFormat); + final LogicalType indexFieldType = fieldTypes[indexFieldPos].getLogicalType(); + final LogicalTypeRoot indexFieldLogicalTypeRoot = indexFieldType.getTypeRoot(); + + // validate index field type + indexHelper.validateIndexFieldType(indexFieldLogicalTypeRoot); + + // time extract dynamic index pattern + final RowData.FieldGetter fieldGetter = + RowData.createFieldGetter(indexFieldType, indexFieldPos); + + if (isDynamicIndexWithFormat) { + final String dateTimeFormat = + indexHelper.extractDateFormat(index, indexFieldLogicalTypeRoot); + DynamicFormatter formatFunction = + createFormatFunction(indexFieldType, indexFieldLogicalTypeRoot); + + return new AbstractTimeIndexGenerator(index, dateTimeFormat) { + @Override + public String generate(RowData row) { + Object fieldOrNull = fieldGetter.getFieldOrNull(row); + final String formattedField; + // TODO we can possibly optimize it to use the nullability of the field + if (fieldOrNull != null) { + formattedField = formatFunction.format(fieldOrNull, dateTimeFormatter); + } else { + formattedField = "null"; + } + return indexPrefix.concat(formattedField).concat(indexSuffix); + } + }; + } + // general dynamic index pattern + return new IndexGeneratorBase(index) { + @Override + public String generate(RowData row) { + Object indexField = fieldGetter.getFieldOrNull(row); + return indexPrefix + .concat(indexField == null ? "null" : indexField.toString()) + .concat(indexSuffix); + } + }; + } + + private static DynamicFormatter createFormatFunction( + LogicalType indexFieldType, LogicalTypeRoot indexFieldLogicalTypeRoot) { + switch (indexFieldLogicalTypeRoot) { + case DATE: + return (value, dateTimeFormatter) -> { + Integer indexField = (Integer) value; + return LocalDate.ofEpochDay(indexField).format(dateTimeFormatter); + }; + case TIME_WITHOUT_TIME_ZONE: + return (value, dateTimeFormatter) -> { + Integer indexField = (Integer) value; + return LocalTime.ofNanoOfDay(indexField * 1_000_000L).format(dateTimeFormatter); + }; + case TIMESTAMP_WITHOUT_TIME_ZONE: + return (value, dateTimeFormatter) -> { + TimestampData indexField = (TimestampData) value; + return indexField.toLocalDateTime().format(dateTimeFormatter); + }; + case TIMESTAMP_WITH_TIME_ZONE: + throw new UnsupportedOperationException( + "TIMESTAMP_WITH_TIME_ZONE is not supported yet"); + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + return (value, dateTimeFormatter) -> { + TimestampData indexField = (TimestampData) value; + return indexField.toInstant().atZone(ZoneOffset.UTC).format(dateTimeFormatter); + }; + default: + throw new TableException( + String.format( + "Unsupported type '%s' found in Opensearch dynamic index field, " + + "time-related pattern only support types are: DATE,TIME,TIMESTAMP.", + indexFieldType)); + } + } + + /** + * Helper class for {@link IndexGeneratorFactory}, this helper can use to validate index field + * type ans parse index format from pattern. + */ + private static class IndexHelper { + private static final Pattern dynamicIndexPattern = Pattern.compile("\\{[^\\{\\}]+\\}?"); + private static final Pattern dynamicIndexTimeExtractPattern = + Pattern.compile(".*\\{.+\\|.*\\}.*"); + private static final List supportedTypes = new ArrayList<>(); + private static final Map defaultFormats = new HashMap<>(); + + static { + // time related types + supportedTypes.add(LogicalTypeRoot.DATE); + supportedTypes.add(LogicalTypeRoot.TIME_WITHOUT_TIME_ZONE); + supportedTypes.add(LogicalTypeRoot.TIMESTAMP_WITHOUT_TIME_ZONE); + supportedTypes.add(LogicalTypeRoot.TIMESTAMP_WITH_TIME_ZONE); + supportedTypes.add(LogicalTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE); + // general types + supportedTypes.add(LogicalTypeRoot.VARCHAR); + supportedTypes.add(LogicalTypeRoot.CHAR); + supportedTypes.add(LogicalTypeRoot.TINYINT); + supportedTypes.add(LogicalTypeRoot.INTEGER); + supportedTypes.add(LogicalTypeRoot.BIGINT); + } + + static { + defaultFormats.put(LogicalTypeRoot.DATE, "yyyy_MM_dd"); + defaultFormats.put(LogicalTypeRoot.TIME_WITHOUT_TIME_ZONE, "HH_mm_ss"); + defaultFormats.put(LogicalTypeRoot.TIMESTAMP_WITHOUT_TIME_ZONE, "yyyy_MM_dd_HH_mm_ss"); + defaultFormats.put(LogicalTypeRoot.TIMESTAMP_WITH_TIME_ZONE, "yyyy_MM_dd_HH_mm_ss"); + defaultFormats.put( + LogicalTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE, "yyyy_MM_dd_HH_mm_ssX"); + } + + /** Validate the index field Type. */ + void validateIndexFieldType(LogicalTypeRoot logicalType) { + if (!supportedTypes.contains(logicalType)) { + throw new IllegalArgumentException( + String.format( + "Unsupported type %s of index field, " + "Supported types are: %s", + logicalType, supportedTypes)); + } + } + + /** Get the default date format. */ + String getDefaultFormat(LogicalTypeRoot logicalType) { + return defaultFormats.get(logicalType); + } + + /** Check general dynamic index is enabled or not by index pattern. */ + boolean checkIsDynamicIndex(String index) { + final Matcher matcher = dynamicIndexPattern.matcher(index); + int count = 0; + while (matcher.find()) { + count++; + } + if (count > 1) { + throw new TableException( + String.format( + "Chaining dynamic index pattern %s is not supported," + + " only support single dynamic index pattern.", + index)); + } + return count == 1; + } + + /** Check time extract dynamic index is enabled or not by index pattern. */ + boolean checkIsDynamicIndexWithFormat(String index) { + return dynamicIndexTimeExtractPattern.matcher(index).matches(); + } + + /** Extract dynamic index pattern string from index pattern string. */ + String extractDynamicIndexPatternStr(String index) { + int start = index.indexOf("{"); + int end = index.lastIndexOf("}"); + return index.substring(start, end + 1); + } + + /** Extract index field position in a fieldNames, return the field position. */ + int extractIndexFieldPos( + String index, String[] fieldNames, boolean isDynamicIndexWithFormat) { + List fieldList = Arrays.asList(fieldNames); + String indexFieldName; + if (isDynamicIndexWithFormat) { + indexFieldName = index.substring(index.indexOf("{") + 1, index.indexOf("|")); + } else { + indexFieldName = index.substring(index.indexOf("{") + 1, index.indexOf("}")); + } + if (!fieldList.contains(indexFieldName)) { + throw new TableException( + String.format( + "Unknown field '%s' in index pattern '%s', please check the field name.", + indexFieldName, index)); + } + return fieldList.indexOf(indexFieldName); + } + + /** Extract dateTime format by the date format that extracted from index pattern string. */ + private String extractDateFormat(String index, LogicalTypeRoot logicalType) { + String format = index.substring(index.indexOf("|") + 1, index.indexOf("}")); + if ("".equals(format)) { + format = getDefaultFormat(logicalType); + } + return format; + } + } +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/KeyExtractor.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/KeyExtractor.java new file mode 100644 index 0000000..20e1013 --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/KeyExtractor.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.table; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.logical.DistinctType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.util.function.SerializableFunction; + +import java.io.Serializable; +import java.time.Duration; +import java.time.LocalDate; +import java.time.LocalTime; +import java.time.Period; +import java.util.List; + +/** An extractor for a Opensearch key from a {@link RowData}. */ +@Internal +class KeyExtractor implements SerializableFunction { + private final FieldFormatter[] fieldFormatters; + private final String keyDelimiter; + + private interface FieldFormatter extends Serializable { + String format(RowData rowData); + } + + private KeyExtractor(FieldFormatter[] fieldFormatters, String keyDelimiter) { + this.fieldFormatters = fieldFormatters; + this.keyDelimiter = keyDelimiter; + } + + @Override + public String apply(RowData rowData) { + final StringBuilder builder = new StringBuilder(); + for (int i = 0; i < fieldFormatters.length; i++) { + if (i > 0) { + builder.append(keyDelimiter); + } + final String value = fieldFormatters[i].format(rowData); + builder.append(value); + } + return builder.toString(); + } + + public static SerializableFunction createKeyExtractor( + List primaryKeyTypesWithIndex, String keyDelimiter) { + if (!primaryKeyTypesWithIndex.isEmpty()) { + FieldFormatter[] formatters = + primaryKeyTypesWithIndex.stream() + .map( + logicalTypeWithIndex -> + toFormatter( + logicalTypeWithIndex.index, + logicalTypeWithIndex.logicalType)) + .toArray(FieldFormatter[]::new); + return new KeyExtractor(formatters, keyDelimiter); + } else { + return (row) -> null; + } + } + + private static FieldFormatter toFormatter(int index, LogicalType type) { + switch (type.getTypeRoot()) { + case DATE: + return (row) -> LocalDate.ofEpochDay(row.getInt(index)).toString(); + case TIME_WITHOUT_TIME_ZONE: + return (row) -> + LocalTime.ofNanoOfDay((long) row.getInt(index) * 1_000_000L).toString(); + case INTERVAL_YEAR_MONTH: + return (row) -> Period.ofDays(row.getInt(index)).toString(); + case INTERVAL_DAY_TIME: + return (row) -> Duration.ofMillis(row.getLong(index)).toString(); + case DISTINCT_TYPE: + return toFormatter(index, ((DistinctType) type).getSourceType()); + default: + RowData.FieldGetter fieldGetter = RowData.createFieldGetter(type, index); + return (row) -> fieldGetter.getFieldOrNull(row).toString(); + } + } +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/LogicalTypeWithIndex.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/LogicalTypeWithIndex.java new file mode 100644 index 0000000..e5fe9a6 --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/LogicalTypeWithIndex.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.table; + +import org.apache.flink.table.types.logical.LogicalType; + +class LogicalTypeWithIndex { + public final int index; + public final LogicalType logicalType; + + LogicalTypeWithIndex(int index, LogicalType logicalType) { + this.index = index; + this.logicalType = logicalType; + } +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchConfiguration.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchConfiguration.java new file mode 100644 index 0000000..8bf52d1 --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchConfiguration.java @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.table; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.configuration.MemorySize; +import org.apache.flink.configuration.ReadableConfig; +import org.apache.flink.connector.base.DeliveryGuarantee; +import org.apache.flink.connector.opensearch.sink.FlushBackoffType; +import org.apache.flink.table.api.ValidationException; + +import org.apache.http.HttpHost; + +import java.time.Duration; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.ALLOW_INSECURE; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.BULK_FLUSH_BACKOFF_DELAY_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.BULK_FLUSH_BACKOFF_MAX_RETRIES_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.BULK_FLUSH_BACKOFF_TYPE_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.BULK_FLUSH_INTERVAL_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.BULK_FLUSH_MAX_ACTIONS_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.BULK_FLUSH_MAX_SIZE_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.CONNECTION_PATH_PREFIX_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.CONNECTION_REQUEST_TIMEOUT; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.CONNECTION_TIMEOUT; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.DELIVERY_GUARANTEE_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.FLUSH_ON_CHECKPOINT_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.HOSTS_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.INDEX_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.KEY_DELIMITER_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.PASSWORD_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.SOCKET_TIMEOUT; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.USERNAME_OPTION; +import static org.apache.flink.table.factories.FactoryUtil.SINK_PARALLELISM; +import static org.apache.flink.util.Preconditions.checkNotNull; + +/** Opensearch base configuration. */ +@Internal +class OpensearchConfiguration { + protected final ReadableConfig config; + + OpensearchConfiguration(ReadableConfig config) { + this.config = checkNotNull(config); + } + + public int getBulkFlushMaxActions() { + return config.get(BULK_FLUSH_MAX_ACTIONS_OPTION); + } + + public MemorySize getBulkFlushMaxByteSize() { + return config.get(BULK_FLUSH_MAX_SIZE_OPTION); + } + + public long getBulkFlushInterval() { + return config.get(BULK_FLUSH_INTERVAL_OPTION).toMillis(); + } + + public DeliveryGuarantee getDeliveryGuarantee() { + return config.get(DELIVERY_GUARANTEE_OPTION); + } + + public Optional getUsername() { + return config.getOptional(USERNAME_OPTION); + } + + public Optional getPassword() { + return config.getOptional(PASSWORD_OPTION); + } + + public Optional getBulkFlushBackoffType() { + return config.getOptional(BULK_FLUSH_BACKOFF_TYPE_OPTION); + } + + public Optional getBulkFlushBackoffRetries() { + return config.getOptional(BULK_FLUSH_BACKOFF_MAX_RETRIES_OPTION); + } + + public Optional getBulkFlushBackoffDelay() { + return config.getOptional(BULK_FLUSH_BACKOFF_DELAY_OPTION).map(Duration::toMillis); + } + + public boolean isDisableFlushOnCheckpoint() { + return !config.get(FLUSH_ON_CHECKPOINT_OPTION); + } + + public String getIndex() { + return config.get(INDEX_OPTION); + } + + public String getKeyDelimiter() { + return config.get(KEY_DELIMITER_OPTION); + } + + public Optional getPathPrefix() { + return config.getOptional(CONNECTION_PATH_PREFIX_OPTION); + } + + public Optional getConnectionRequestTimeout() { + return config.getOptional(CONNECTION_REQUEST_TIMEOUT); + } + + public Optional getConnectionTimeout() { + return config.getOptional(CONNECTION_TIMEOUT); + } + + public Optional getSocketTimeout() { + return config.getOptional(SOCKET_TIMEOUT); + } + + public List getHosts() { + return config.get(HOSTS_OPTION).stream() + .map(OpensearchConfiguration::validateAndParseHostsString) + .collect(Collectors.toList()); + } + + public Optional getParallelism() { + return config.getOptional(SINK_PARALLELISM); + } + + public Optional isAllowInsecure() { + return config.getOptional(ALLOW_INSECURE); + } + + private static HttpHost validateAndParseHostsString(String host) { + try { + HttpHost httpHost = HttpHost.create(host); + if (httpHost.getPort() < 0) { + throw new ValidationException( + String.format( + "Could not parse host '%s' in option '%s'. It should follow the format 'http://host_name:port'. Missing port.", + host, HOSTS_OPTION.key())); + } + + if (httpHost.getSchemeName() == null) { + throw new ValidationException( + String.format( + "Could not parse host '%s' in option '%s'. It should follow the format 'http://host_name:port'. Missing scheme.", + host, HOSTS_OPTION.key())); + } + return httpHost; + } catch (Exception e) { + throw new ValidationException( + String.format( + "Could not parse host '%s' in option '%s'. It should follow the format 'http://host_name:port'.", + host, HOSTS_OPTION.key()), + e); + } + } +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchConnectorOptions.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchConnectorOptions.java new file mode 100644 index 0000000..5fca777 --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchConnectorOptions.java @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.table; + +import org.apache.flink.annotation.PublicEvolving; +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.ConfigOptions; +import org.apache.flink.configuration.MemorySize; +import org.apache.flink.connector.base.DeliveryGuarantee; +import org.apache.flink.connector.opensearch.sink.FlushBackoffType; + +import java.time.Duration; +import java.util.List; + +/** + * Base options for the Opensearch connector. Needs to be public so that the {@link + * org.apache.flink.table.api.TableDescriptor} can access it. + */ +@PublicEvolving +public class OpensearchConnectorOptions { + + OpensearchConnectorOptions() {} + + public static final ConfigOption> HOSTS_OPTION = + ConfigOptions.key("hosts") + .stringType() + .asList() + .noDefaultValue() + .withDescription("Opensearch hosts to connect to."); + + public static final ConfigOption INDEX_OPTION = + ConfigOptions.key("index") + .stringType() + .noDefaultValue() + .withDescription("Opensearch index for every record."); + + public static final ConfigOption PASSWORD_OPTION = + ConfigOptions.key("password") + .stringType() + .noDefaultValue() + .withDescription("Password used to connect to Opensearch instance."); + + public static final ConfigOption USERNAME_OPTION = + ConfigOptions.key("username") + .stringType() + .noDefaultValue() + .withDescription("Username used to connect to Opensearch instance."); + + public static final ConfigOption KEY_DELIMITER_OPTION = + ConfigOptions.key("document-id.key-delimiter") + .stringType() + .defaultValue("_") + .withDescription( + "Delimiter for composite keys e.g., \"$\" would result in IDs \"KEY1$KEY2$KEY3\"."); + + public static final ConfigOption BULK_FLUSH_MAX_ACTIONS_OPTION = + ConfigOptions.key("sink.bulk-flush.max-actions") + .intType() + .defaultValue(1000) + .withDescription("Maximum number of actions to buffer for each bulk request."); + + public static final ConfigOption BULK_FLUSH_MAX_SIZE_OPTION = + ConfigOptions.key("sink.bulk-flush.max-size") + .memoryType() + .defaultValue(MemorySize.parse("2mb")) + .withDescription("Maximum size of buffered actions per bulk request"); + + public static final ConfigOption BULK_FLUSH_INTERVAL_OPTION = + ConfigOptions.key("sink.bulk-flush.interval") + .durationType() + .defaultValue(Duration.ofSeconds(1)) + .withDescription("Bulk flush interval"); + + public static final ConfigOption BULK_FLUSH_BACKOFF_TYPE_OPTION = + ConfigOptions.key("sink.bulk-flush.backoff.strategy") + .enumType(FlushBackoffType.class) + .noDefaultValue() + .withDescription("Backoff strategy"); + + public static final ConfigOption BULK_FLUSH_BACKOFF_MAX_RETRIES_OPTION = + ConfigOptions.key("sink.bulk-flush.backoff.max-retries") + .intType() + .noDefaultValue() + .withDescription("Maximum number of retries."); + + public static final ConfigOption BULK_FLUSH_BACKOFF_DELAY_OPTION = + ConfigOptions.key("sink.bulk-flush.backoff.delay") + .durationType() + .noDefaultValue() + .withDescription("Delay between each backoff attempt."); + + public static final ConfigOption FLUSH_ON_CHECKPOINT_OPTION = + ConfigOptions.key("sink.flush-on-checkpoint") + .booleanType() + .defaultValue(true) + .withDescription("Disables flushing on checkpoint"); + + public static final ConfigOption CONNECTION_PATH_PREFIX_OPTION = + ConfigOptions.key("connection.path-prefix") + .stringType() + .noDefaultValue() + .withDescription("Prefix string to be added to every REST communication."); + + public static final ConfigOption CONNECTION_REQUEST_TIMEOUT = + ConfigOptions.key("connection.request-timeout") + .durationType() + .noDefaultValue() + .withDescription( + "The timeout for requesting a connection from the connection manager."); + + public static final ConfigOption CONNECTION_TIMEOUT = + ConfigOptions.key("connection.timeout") + .durationType() + .noDefaultValue() + .withDescription("The timeout for establishing a connection."); + + public static final ConfigOption SOCKET_TIMEOUT = + ConfigOptions.key("socket.timeout") + .durationType() + .noDefaultValue() + .withDescription( + "The socket timeout (SO_TIMEOUT) for waiting for data or, put differently," + + "a maximum period inactivity between two consecutive data packets."); + + public static final ConfigOption FORMAT_OPTION = + ConfigOptions.key("format") + .stringType() + .defaultValue("json") + .withDescription( + "The format must produce a valid JSON document. " + + "Please refer to the documentation on formats for more details."); + + public static final ConfigOption DELIVERY_GUARANTEE_OPTION = + ConfigOptions.key("sink.delivery-guarantee") + .enumType(DeliveryGuarantee.class) + .defaultValue(DeliveryGuarantee.NONE) + .withDescription("Optional delivery guarantee when committing."); + + public static final ConfigOption ALLOW_INSECURE = + ConfigOptions.key("allow-insecure") + .booleanType() + .defaultValue(false) + .withDescription( + "Allow insecure connections to HTTPS endpoints (disable certificates validation)"); +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSink.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSink.java new file mode 100644 index 0000000..ec1ec28 --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSink.java @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.table; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.connector.opensearch.sink.FlushBackoffType; +import org.apache.flink.connector.opensearch.sink.OpensearchSink; +import org.apache.flink.connector.opensearch.sink.OpensearchSinkBuilder; +import org.apache.flink.table.connector.ChangelogMode; +import org.apache.flink.table.connector.format.EncodingFormat; +import org.apache.flink.table.connector.sink.DynamicTableSink; +import org.apache.flink.table.connector.sink.SinkV2Provider; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.DataType; +import org.apache.flink.types.RowKind; +import org.apache.flink.util.StringUtils; + +import org.apache.http.HttpHost; +import org.opensearch.common.xcontent.XContentType; + +import java.util.List; +import java.util.Objects; +import java.util.function.Function; + +import static org.apache.flink.util.Preconditions.checkNotNull; + +/** + * A {@link DynamicTableSink} that describes how to create a {@link OpensearchSink} from a logical + * description. + */ +@Internal +class OpensearchDynamicSink implements DynamicTableSink { + + final EncodingFormat> format; + final DataType physicalRowDataType; + final List primaryKeyLogicalTypesWithIndex; + final OpensearchConfiguration config; + + final String summaryString; + final OpensearchSinkBuilderSupplier builderSupplier; + + OpensearchDynamicSink( + EncodingFormat> format, + OpensearchConfiguration config, + List primaryKeyLogicalTypesWithIndex, + DataType physicalRowDataType, + String summaryString, + OpensearchSinkBuilderSupplier builderSupplier) { + this.format = checkNotNull(format); + this.physicalRowDataType = checkNotNull(physicalRowDataType); + this.primaryKeyLogicalTypesWithIndex = checkNotNull(primaryKeyLogicalTypesWithIndex); + this.config = checkNotNull(config); + this.summaryString = checkNotNull(summaryString); + this.builderSupplier = checkNotNull(builderSupplier); + } + + Function createKeyExtractor() { + return KeyExtractor.createKeyExtractor( + primaryKeyLogicalTypesWithIndex, config.getKeyDelimiter()); + } + + IndexGenerator createIndexGenerator() { + return IndexGeneratorFactory.createIndexGenerator( + config.getIndex(), + DataType.getFieldNames(physicalRowDataType), + DataType.getFieldDataTypes(physicalRowDataType)); + } + + @Override + public ChangelogMode getChangelogMode(ChangelogMode requestedMode) { + ChangelogMode.Builder builder = ChangelogMode.newBuilder(); + for (RowKind kind : requestedMode.getContainedKinds()) { + if (kind != RowKind.UPDATE_BEFORE) { + builder.addContainedKind(kind); + } + } + return builder.build(); + } + + @Override + public SinkRuntimeProvider getSinkRuntimeProvider(Context context) { + SerializationSchema format = + this.format.createRuntimeEncoder(context, physicalRowDataType); + + final RowOpensearchEmitter rowOpensearchEmitter = + new RowOpensearchEmitter( + createIndexGenerator(), format, XContentType.JSON, createKeyExtractor()); + + OpensearchSinkBuilder builder = builderSupplier.get(); + builder.setEmitter(rowOpensearchEmitter); + builder.setHosts(config.getHosts().toArray(new HttpHost[0])); + builder.setDeliveryGuarantee(config.getDeliveryGuarantee()); + builder.setBulkFlushMaxActions(config.getBulkFlushMaxActions()); + builder.setBulkFlushMaxSizeMb(config.getBulkFlushMaxByteSize().getMebiBytes()); + builder.setBulkFlushInterval(config.getBulkFlushInterval()); + + if (config.getBulkFlushBackoffType().isPresent()) { + FlushBackoffType backoffType = config.getBulkFlushBackoffType().get(); + int backoffMaxRetries = config.getBulkFlushBackoffRetries().get(); + long backoffDelayMs = config.getBulkFlushBackoffDelay().get(); + + builder.setBulkFlushBackoffStrategy(backoffType, backoffMaxRetries, backoffDelayMs); + } + + if (config.getUsername().isPresent() + && !StringUtils.isNullOrWhitespaceOnly(config.getUsername().get())) { + builder.setConnectionUsername(config.getUsername().get()); + } + + if (config.getPassword().isPresent() + && !StringUtils.isNullOrWhitespaceOnly(config.getPassword().get())) { + builder.setConnectionPassword(config.getPassword().get()); + } + + if (config.getPathPrefix().isPresent() + && !StringUtils.isNullOrWhitespaceOnly(config.getPathPrefix().get())) { + builder.setConnectionPathPrefix(config.getPathPrefix().get()); + } + + if (config.getConnectionRequestTimeout().isPresent()) { + builder.setConnectionRequestTimeout( + (int) config.getConnectionRequestTimeout().get().getSeconds()); + } + + if (config.getConnectionTimeout().isPresent()) { + builder.setConnectionTimeout((int) config.getConnectionTimeout().get().getSeconds()); + } + + if (config.getSocketTimeout().isPresent()) { + builder.setSocketTimeout((int) config.getSocketTimeout().get().getSeconds()); + } + + if (config.isAllowInsecure().isPresent()) { + builder.setAllowInsecure(config.isAllowInsecure().get()); + } + + return SinkV2Provider.of(builder.build(), config.getParallelism().orElse(null)); + } + + @Override + public DynamicTableSink copy() { + return new OpensearchDynamicSink( + format, + config, + primaryKeyLogicalTypesWithIndex, + physicalRowDataType, + summaryString, + builderSupplier); + } + + @Override + public String asSummaryString() { + return summaryString; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + OpensearchDynamicSink that = (OpensearchDynamicSink) o; + return Objects.equals(format, that.format) + && Objects.equals(physicalRowDataType, that.physicalRowDataType) + && Objects.equals( + primaryKeyLogicalTypesWithIndex, that.primaryKeyLogicalTypesWithIndex) + && Objects.equals(config, that.config) + && Objects.equals(summaryString, that.summaryString) + && Objects.equals(builderSupplier, that.builderSupplier); + } + + @Override + public int hashCode() { + return Objects.hash( + format, + physicalRowDataType, + primaryKeyLogicalTypesWithIndex, + config, + summaryString, + builderSupplier); + } +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkFactory.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkFactory.java new file mode 100644 index 0000000..d02dce9 --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkFactory.java @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.table; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.connector.opensearch.sink.OpensearchSinkBuilder; +import org.apache.flink.table.api.ValidationException; +import org.apache.flink.table.catalog.Column; +import org.apache.flink.table.catalog.ResolvedSchema; +import org.apache.flink.table.connector.Projection; +import org.apache.flink.table.connector.format.EncodingFormat; +import org.apache.flink.table.connector.sink.DynamicTableSink; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.factories.DynamicTableFactory; +import org.apache.flink.table.factories.DynamicTableSinkFactory; +import org.apache.flink.table.factories.FactoryUtil; +import org.apache.flink.table.factories.SerializationFormatFactory; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.util.StringUtils; + +import java.util.Arrays; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.ALLOW_INSECURE; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.BULK_FLUSH_BACKOFF_DELAY_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.BULK_FLUSH_BACKOFF_MAX_RETRIES_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.BULK_FLUSH_BACKOFF_TYPE_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.BULK_FLUSH_INTERVAL_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.BULK_FLUSH_MAX_ACTIONS_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.BULK_FLUSH_MAX_SIZE_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.CONNECTION_PATH_PREFIX_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.CONNECTION_REQUEST_TIMEOUT; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.CONNECTION_TIMEOUT; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.DELIVERY_GUARANTEE_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.FORMAT_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.HOSTS_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.INDEX_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.KEY_DELIMITER_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.PASSWORD_OPTION; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.SOCKET_TIMEOUT; +import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.USERNAME_OPTION; +import static org.apache.flink.table.factories.FactoryUtil.SINK_PARALLELISM; +import static org.opensearch.common.Strings.capitalize; + +/** A {@link DynamicTableSinkFactory} for discovering OpensearchDynamicSink. */ +@Internal +public class OpensearchDynamicSinkFactory implements DynamicTableSinkFactory { + private static final String FACTORY_IDENTIFIER = "opensearch"; + + private final OpensearchSinkBuilderSupplier sinkBuilderSupplier; + + public OpensearchDynamicSinkFactory() { + this.sinkBuilderSupplier = OpensearchSinkBuilder::new; + } + + @Override + public DynamicTableSink createDynamicTableSink(Context context) { + List primaryKeyLogicalTypesWithIndex = + getPrimaryKeyLogicalTypesWithIndex(context); + EncodingFormat> format = + getValidatedEncodingFormat(this, context); + + OpensearchConfiguration config = getConfiguration(context); + validateConfiguration(config); + + return new OpensearchDynamicSink( + format, + config, + primaryKeyLogicalTypesWithIndex, + context.getPhysicalRowDataType(), + capitalize(FACTORY_IDENTIFIER), + sinkBuilderSupplier); + } + + OpensearchConfiguration getConfiguration(Context context) { + return new OpensearchConfiguration( + Configuration.fromMap(context.getCatalogTable().getOptions())); + } + + void validateConfiguration(OpensearchConfiguration config) { + config.getHosts(); // validate hosts + validate( + config.getIndex().length() >= 1, + () -> String.format("'%s' must not be empty", INDEX_OPTION.key())); + int maxActions = config.getBulkFlushMaxActions(); + validate( + maxActions == -1 || maxActions >= 1, + () -> + String.format( + "'%s' must be at least 1. Got: %s", + BULK_FLUSH_MAX_ACTIONS_OPTION.key(), maxActions)); + long maxSize = config.getBulkFlushMaxByteSize().getBytes(); + long mb1 = 1024 * 1024; + validate( + maxSize == -1 || (maxSize >= mb1 && maxSize % mb1 == 0), + () -> + String.format( + "'%s' must be in MB granularity. Got: %s", + BULK_FLUSH_MAX_SIZE_OPTION.key(), + config.getBulkFlushMaxByteSize().toHumanReadableString())); + validate( + config.getBulkFlushBackoffRetries().map(retries -> retries >= 1).orElse(true), + () -> + String.format( + "'%s' must be at least 1. Got: %s", + BULK_FLUSH_BACKOFF_MAX_RETRIES_OPTION.key(), + config.getBulkFlushBackoffRetries().get())); + if (config.getUsername().isPresent() + && !StringUtils.isNullOrWhitespaceOnly(config.getUsername().get())) { + validate( + config.getPassword().isPresent() + && !StringUtils.isNullOrWhitespaceOnly(config.getPassword().get()), + () -> + String.format( + "'%s' and '%s' must be set at the same time. Got: username '%s' and password '%s'", + USERNAME_OPTION.key(), + PASSWORD_OPTION.key(), + config.getUsername().get(), + config.getPassword().orElse(""))); + } + } + + static void validate(boolean condition, Supplier message) { + if (!condition) { + throw new ValidationException(message.get()); + } + } + + EncodingFormat> getValidatedEncodingFormat( + DynamicTableFactory factory, DynamicTableFactory.Context context) { + final FactoryUtil.TableFactoryHelper helper = + FactoryUtil.createTableFactoryHelper(factory, context); + final EncodingFormat> format = + helper.discoverEncodingFormat(SerializationFormatFactory.class, FORMAT_OPTION); + helper.validate(); + return format; + } + + List getPrimaryKeyLogicalTypesWithIndex(Context context) { + DataType physicalRowDataType = context.getPhysicalRowDataType(); + int[] primaryKeyIndexes = context.getPrimaryKeyIndexes(); + if (primaryKeyIndexes.length != 0) { + DataType pkDataType = Projection.of(primaryKeyIndexes).project(physicalRowDataType); + + OpensearchValidationUtils.validatePrimaryKey(pkDataType); + } + + ResolvedSchema resolvedSchema = context.getCatalogTable().getResolvedSchema(); + return Arrays.stream(primaryKeyIndexes) + .mapToObj( + index -> { + Optional column = resolvedSchema.getColumn(index); + if (!column.isPresent()) { + throw new IllegalStateException( + String.format( + "No primary key column found with index '%s'.", + index)); + } + LogicalType logicalType = column.get().getDataType().getLogicalType(); + return new LogicalTypeWithIndex(index, logicalType); + }) + .collect(Collectors.toList()); + } + + @Override + public Set> requiredOptions() { + return Stream.of(HOSTS_OPTION, INDEX_OPTION).collect(Collectors.toSet()); + } + + @Override + public Set> optionalOptions() { + return Stream.of( + KEY_DELIMITER_OPTION, + BULK_FLUSH_MAX_SIZE_OPTION, + BULK_FLUSH_MAX_ACTIONS_OPTION, + BULK_FLUSH_INTERVAL_OPTION, + BULK_FLUSH_BACKOFF_TYPE_OPTION, + BULK_FLUSH_BACKOFF_MAX_RETRIES_OPTION, + BULK_FLUSH_BACKOFF_DELAY_OPTION, + CONNECTION_PATH_PREFIX_OPTION, + CONNECTION_REQUEST_TIMEOUT, + CONNECTION_TIMEOUT, + SOCKET_TIMEOUT, + FORMAT_OPTION, + DELIVERY_GUARANTEE_OPTION, + PASSWORD_OPTION, + USERNAME_OPTION, + SINK_PARALLELISM, + ALLOW_INSECURE) + .collect(Collectors.toSet()); + } + + @Override + public String factoryIdentifier() { + return FACTORY_IDENTIFIER; + } +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchSinkBuilderSupplier.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchSinkBuilderSupplier.java new file mode 100644 index 0000000..80fc30f --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchSinkBuilderSupplier.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.table; + +import org.apache.flink.connector.opensearch.sink.OpensearchSinkBuilder; + +import java.util.function.Supplier; + +interface OpensearchSinkBuilderSupplier extends Supplier> {} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchValidationUtils.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchValidationUtils.java new file mode 100644 index 0000000..e9ea7e2 --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchValidationUtils.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.table; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.table.api.ValidationException; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.DistinctType; +import org.apache.flink.table.types.logical.LogicalTypeFamily; +import org.apache.flink.table.types.logical.LogicalTypeRoot; + +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +/** Utility methods for validating Opensearch properties. */ +@Internal +class OpensearchValidationUtils { + private static final Set ALLOWED_PRIMARY_KEY_TYPES = new LinkedHashSet<>(); + + static { + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.CHAR); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.VARCHAR); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.BOOLEAN); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.DECIMAL); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.TINYINT); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.SMALLINT); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.INTEGER); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.BIGINT); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.FLOAT); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.DOUBLE); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.DATE); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.TIME_WITHOUT_TIME_ZONE); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.TIMESTAMP_WITHOUT_TIME_ZONE); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.TIMESTAMP_WITH_TIME_ZONE); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.INTERVAL_YEAR_MONTH); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.INTERVAL_DAY_TIME); + } + + /** + * Checks that the table does not have a primary key defined on illegal types. In Opensearch the + * primary key is used to calculate the Opensearch document id, which is a string of up to 512 + * bytes. It cannot have whitespaces. As of now it is calculated by concatenating the fields. + * Certain types do not have a good string representation to be used in this scenario. The + * illegal types are mostly {@link LogicalTypeFamily#COLLECTION} types and {@link + * LogicalTypeRoot#RAW} type. + */ + public static void validatePrimaryKey(DataType primaryKeyDataType) { + List fieldDataTypes = DataType.getFieldDataTypes(primaryKeyDataType); + List illegalTypes = + fieldDataTypes.stream() + .map(DataType::getLogicalType) + .map( + logicalType -> { + if (logicalType.is(LogicalTypeRoot.DISTINCT_TYPE)) { + return ((DistinctType) logicalType) + .getSourceType() + .getTypeRoot(); + } else { + return logicalType.getTypeRoot(); + } + }) + .filter(t -> !ALLOWED_PRIMARY_KEY_TYPES.contains(t)) + .collect(Collectors.toList()); + if (!illegalTypes.isEmpty()) { + throw new ValidationException( + String.format( + "The table has a primary key on columns of illegal types: %s.", + illegalTypes)); + } + } + + private OpensearchValidationUtils() {} +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/RowOpensearchEmitter.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/RowOpensearchEmitter.java new file mode 100644 index 0000000..0f400b9 --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/RowOpensearchEmitter.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.table; + +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.api.connector.sink2.SinkWriter; +import org.apache.flink.connector.opensearch.sink.OpensearchEmitter; +import org.apache.flink.connector.opensearch.sink.RequestIndexer; +import org.apache.flink.metrics.MetricGroup; +import org.apache.flink.metrics.groups.UnregisteredMetricsGroup; +import org.apache.flink.table.api.TableException; +import org.apache.flink.table.data.RowData; +import org.apache.flink.util.FlinkRuntimeException; +import org.apache.flink.util.SimpleUserCodeClassLoader; +import org.apache.flink.util.UserCodeClassLoader; + +import org.opensearch.action.ActionRequest; +import org.opensearch.action.delete.DeleteRequest; +import org.opensearch.action.index.IndexRequest; +import org.opensearch.action.update.UpdateRequest; +import org.opensearch.common.xcontent.XContentType; + +import java.util.function.Function; + +import static org.apache.flink.util.Preconditions.checkNotNull; + +/** Sink function for converting upserts into Opensearch {@link ActionRequest}s. */ +class RowOpensearchEmitter implements OpensearchEmitter { + + private final IndexGenerator indexGenerator; + private final SerializationSchema serializationSchema; + private final XContentType contentType; + private final Function createKey; + + public RowOpensearchEmitter( + IndexGenerator indexGenerator, + SerializationSchema serializationSchema, + XContentType contentType, + Function createKey) { + this.indexGenerator = checkNotNull(indexGenerator); + this.serializationSchema = checkNotNull(serializationSchema); + this.contentType = checkNotNull(contentType); + this.createKey = checkNotNull(createKey); + } + + @Override + public void open() throws Exception { + try { + serializationSchema.open( + new SerializationSchema.InitializationContext() { + @Override + public MetricGroup getMetricGroup() { + return new UnregisteredMetricsGroup(); + } + + @Override + public UserCodeClassLoader getUserCodeClassLoader() { + return SimpleUserCodeClassLoader.create( + RowOpensearchEmitter.class.getClassLoader()); + } + }); + } catch (Exception e) { + throw new FlinkRuntimeException("Failed to initialize serialization schema.", e); + } + indexGenerator.open(); + } + + @Override + public void emit(RowData element, SinkWriter.Context context, RequestIndexer indexer) { + switch (element.getRowKind()) { + case INSERT: + case UPDATE_AFTER: + processUpsert(element, indexer); + break; + case UPDATE_BEFORE: + case DELETE: + processDelete(element, indexer); + break; + default: + throw new TableException("Unsupported message kind: " + element.getRowKind()); + } + } + + private void processUpsert(RowData row, RequestIndexer indexer) { + final byte[] document = serializationSchema.serialize(row); + final String key = createKey.apply(row); + if (key != null) { + final UpdateRequest updateRequest = + new UpdateRequest(indexGenerator.generate(row), key) + .doc(document, contentType) + .upsert(document, contentType); + indexer.add(updateRequest); + } else { + final IndexRequest indexRequest = + new IndexRequest(indexGenerator.generate(row)) + .id(key) + .source(document, contentType); + indexer.add(indexRequest); + } + } + + private void processDelete(RowData row, RequestIndexer indexer) { + final String key = createKey.apply(row); + final DeleteRequest deleteRequest = new DeleteRequest(indexGenerator.generate(row), key); + indexer.add(deleteRequest); + } +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/StaticIndexGenerator.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/StaticIndexGenerator.java new file mode 100644 index 0000000..768dd84 --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/StaticIndexGenerator.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.table; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.table.data.RowData; + +/** A static {@link IndexGenerator} which generate fixed index name. */ +@Internal +final class StaticIndexGenerator extends IndexGeneratorBase { + + public StaticIndexGenerator(String index) { + super(index); + } + + public String generate(RowData row) { + return index; + } +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/ActionRequestFailureHandler.java b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/ActionRequestFailureHandler.java new file mode 100644 index 0000000..2473bf9 --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/ActionRequestFailureHandler.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.opensearch; + +import org.apache.flink.annotation.PublicEvolving; + +import org.opensearch.action.ActionRequest; + +import java.io.Serializable; + +/** + * An implementation of {@link ActionRequestFailureHandler} is provided by the user to define how + * failed {@link ActionRequest ActionRequests} should be handled, e.g. dropping them, reprocessing + * malformed documents, or simply requesting them to be sent to Opensearch again if the failure is + * only temporary. + * + *

Example: + * + *

{@code
+ * private static class ExampleActionRequestFailureHandler implements ActionRequestFailureHandler {
+ *
+ * 	@Override
+ * 	void onFailure(ActionRequest action, Throwable failure, int restStatusCode, RequestIndexer indexer) throws Throwable {
+ * 		if (ExceptionUtils.findThrowable(failure, OpenSearchRejectedExecutionException.class).isPresent()) {
+ * 			// full queue; re-add document for indexing
+ * 			indexer.add(action);
+ * 		} else if (ExceptionUtils.findThrowable(failure, OpensearchParseException.class).isPresent()) {
+ * 			// malformed document; simply drop request without failing sink
+ * 		} else {
+ * 			// for all other failures, fail the sink;
+ * 			// here the failure is simply rethrown, but users can also choose to throw custom exceptions
+ * 			throw failure;
+ * 		}
+ * 	}
+ * }
+ *
+ * }
+ * + *

The above example will let the sink re-add requests that failed due to queue capacity + * saturation and drop requests with malformed documents, without failing the sink. For all other + * failures, the sink will fail. + * + * @deprecated This has been deprecated and will be removed in the future. + */ +@Deprecated +@PublicEvolving +public interface ActionRequestFailureHandler extends Serializable { + + /** + * Handle a failed {@link ActionRequest}. + * + * @param action the {@link ActionRequest} that failed due to the failure + * @param failure the cause of failure + * @param restStatusCode the REST status code of the failure (-1 if none can be retrieved) + * @param indexer request indexer to re-add the failed action, if intended to do so + * @throws Throwable if the sink should fail on this failure, the implementation should rethrow + * the exception or a custom one + */ + void onFailure( + ActionRequest action, Throwable failure, int restStatusCode, RequestIndexer indexer) + throws Throwable; +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/BufferingNoOpRequestIndexer.java b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/BufferingNoOpRequestIndexer.java new file mode 100644 index 0000000..c0d93c1 --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/BufferingNoOpRequestIndexer.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.opensearch; + +import org.apache.flink.annotation.Internal; + +import org.opensearch.action.ActionRequest; +import org.opensearch.action.delete.DeleteRequest; +import org.opensearch.action.index.IndexRequest; +import org.opensearch.action.update.UpdateRequest; + +import javax.annotation.concurrent.NotThreadSafe; + +import java.util.Collections; +import java.util.concurrent.ConcurrentLinkedQueue; + +/** + * Implementation of a {@link RequestIndexer} that buffers {@link ActionRequest ActionRequests} + * before re-sending them to the Opensearch cluster upon request. + */ +@Internal +@NotThreadSafe +class BufferingNoOpRequestIndexer implements RequestIndexer { + + private ConcurrentLinkedQueue bufferedRequests; + + BufferingNoOpRequestIndexer() { + this.bufferedRequests = new ConcurrentLinkedQueue(); + } + + @Override + public void add(DeleteRequest... deleteRequests) { + Collections.addAll(bufferedRequests, deleteRequests); + } + + @Override + public void add(IndexRequest... indexRequests) { + Collections.addAll(bufferedRequests, indexRequests); + } + + @Override + public void add(UpdateRequest... updateRequests) { + Collections.addAll(bufferedRequests, updateRequests); + } + + void processBufferedRequests(RequestIndexer actualIndexer) { + for (ActionRequest request : bufferedRequests) { + if (request instanceof IndexRequest) { + actualIndexer.add((IndexRequest) request); + } else if (request instanceof DeleteRequest) { + actualIndexer.add((DeleteRequest) request); + } else if (request instanceof UpdateRequest) { + actualIndexer.add((UpdateRequest) request); + } + } + + bufferedRequests.clear(); + } +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/OpensearchBulkProcessorIndexer.java b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/OpensearchBulkProcessorIndexer.java new file mode 100644 index 0000000..f60eb59 --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/OpensearchBulkProcessorIndexer.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.opensearch; + +import org.apache.flink.annotation.Internal; + +import org.opensearch.action.ActionRequest; +import org.opensearch.action.bulk.BulkProcessor; +import org.opensearch.action.delete.DeleteRequest; +import org.opensearch.action.index.IndexRequest; +import org.opensearch.action.update.UpdateRequest; + +import java.util.concurrent.atomic.AtomicLong; + +import static org.apache.flink.util.Preconditions.checkNotNull; + +/** + * Implementation of a {@link RequestIndexer}, using a {@link BulkProcessor}. {@link ActionRequest + * ActionRequests} will be buffered before sending a bulk request to the Opensearch cluster. + * + *

Note: This class is binary compatible to Opensearch 1.x. + */ +@Internal +class OpensearchBulkProcessorIndexer implements RequestIndexer { + + private final BulkProcessor bulkProcessor; + private final boolean flushOnCheckpoint; + private final AtomicLong numPendingRequestsRef; + + OpensearchBulkProcessorIndexer( + BulkProcessor bulkProcessor, + boolean flushOnCheckpoint, + AtomicLong numPendingRequestsRef) { + this.bulkProcessor = checkNotNull(bulkProcessor); + this.flushOnCheckpoint = flushOnCheckpoint; + this.numPendingRequestsRef = checkNotNull(numPendingRequestsRef); + } + + @Override + public void add(DeleteRequest... deleteRequests) { + for (DeleteRequest deleteRequest : deleteRequests) { + if (flushOnCheckpoint) { + numPendingRequestsRef.getAndIncrement(); + } + this.bulkProcessor.add(deleteRequest); + } + } + + @Override + public void add(IndexRequest... indexRequests) { + for (IndexRequest indexRequest : indexRequests) { + if (flushOnCheckpoint) { + numPendingRequestsRef.getAndIncrement(); + } + this.bulkProcessor.add(indexRequest); + } + } + + @Override + public void add(UpdateRequest... updateRequests) { + for (UpdateRequest updateRequest : updateRequests) { + if (flushOnCheckpoint) { + numPendingRequestsRef.getAndIncrement(); + } + this.bulkProcessor.add(updateRequest); + } + } +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSink.java b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSink.java new file mode 100644 index 0000000..aae3623 --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSink.java @@ -0,0 +1,807 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.opensearch; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.annotation.PublicEvolving; +import org.apache.flink.api.java.utils.ParameterTool; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.runtime.state.FunctionInitializationContext; +import org.apache.flink.runtime.state.FunctionSnapshotContext; +import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction; +import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; +import org.apache.flink.streaming.connectors.opensearch.util.NoOpFailureHandler; +import org.apache.flink.util.InstantiationUtil; +import org.apache.flink.util.Preconditions; + +import org.apache.http.HttpHost; +import org.opensearch.action.ActionRequest; +import org.opensearch.action.DocWriteRequest; +import org.opensearch.action.bulk.BackoffPolicy; +import org.opensearch.action.bulk.BulkItemResponse; +import org.opensearch.action.bulk.BulkProcessor; +import org.opensearch.action.bulk.BulkRequest; +import org.opensearch.action.bulk.BulkResponse; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestClient; +import org.opensearch.client.RestClientBuilder; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.common.unit.ByteSizeUnit; +import org.opensearch.common.unit.ByteSizeValue; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.rest.RestStatus; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; + +import java.io.IOException; +import java.io.Serializable; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; + +import static org.apache.flink.util.Preconditions.checkArgument; +import static org.apache.flink.util.Preconditions.checkNotNull; + +/** + * Base class for all Flink Opensearch Sinks. + * + *

This class implements the common behaviour across Opensearch versions, such as the use of an + * internal {@link BulkProcessor} to buffer multiple {@link ActionRequest}s before sending the + * requests to the cluster, as well as passing input records to the user provided {@link + * OpensearchSinkFunction} for processing. + * + * @param Type of the elements handled by this sink + */ +@Internal +public class OpensearchSink extends RichSinkFunction implements CheckpointedFunction { + private static final long serialVersionUID = -1007596293618451942L; + private static final Logger LOG = LoggerFactory.getLogger(OpensearchSink.class); + + // ------------------------------------------------------------------------ + // Internal bulk processor configuration + // ------------------------------------------------------------------------ + + public static final String CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS = "bulk.flush.max.actions"; + public static final String CONFIG_KEY_BULK_FLUSH_MAX_SIZE_MB = "bulk.flush.max.size.mb"; + public static final String CONFIG_KEY_BULK_FLUSH_INTERVAL_MS = "bulk.flush.interval.ms"; + public static final String CONFIG_KEY_BULK_FLUSH_BACKOFF_ENABLE = "bulk.flush.backoff.enable"; + public static final String CONFIG_KEY_BULK_FLUSH_BACKOFF_TYPE = "bulk.flush.backoff.type"; + public static final String CONFIG_KEY_BULK_FLUSH_BACKOFF_RETRIES = "bulk.flush.backoff.retries"; + public static final String CONFIG_KEY_BULK_FLUSH_BACKOFF_DELAY = "bulk.flush.backoff.delay"; + + /** Used to control whether the retry delay should increase exponentially or remain constant. */ + @PublicEvolving + public enum FlushBackoffType { + CONSTANT, + EXPONENTIAL + } + + /** + * Provides a backoff policy for bulk requests. Whenever a bulk request is rejected due to + * resource constraints (i.e. the client's internal thread pool is full), the backoff policy + * decides how long the bulk processor will wait before the operation is retried internally. + * + *

This is a proxy for version specific backoff policies. + */ + public static class BulkFlushBackoffPolicy implements Serializable { + + private static final long serialVersionUID = -6022851996101826049L; + + // the default values follow the Opensearch default settings for BulkProcessor + private FlushBackoffType backoffType = FlushBackoffType.EXPONENTIAL; + private int maxRetryCount = 8; + private long delayMillis = 50; + + public FlushBackoffType getBackoffType() { + return backoffType; + } + + public int getMaxRetryCount() { + return maxRetryCount; + } + + public long getDelayMillis() { + return delayMillis; + } + + public void setBackoffType(FlushBackoffType backoffType) { + this.backoffType = checkNotNull(backoffType); + } + + public void setMaxRetryCount(int maxRetryCount) { + checkArgument(maxRetryCount >= 0); + this.maxRetryCount = maxRetryCount; + } + + public void setDelayMillis(long delayMillis) { + checkArgument(delayMillis >= 0); + this.delayMillis = delayMillis; + } + } + + private final Integer bulkProcessorFlushMaxActions; + private final Integer bulkProcessorFlushMaxSizeMb; + private final Long bulkProcessorFlushIntervalMillis; + private final BulkFlushBackoffPolicy bulkProcessorFlushBackoffPolicy; + + // ------------------------------------------------------------------------ + // User-facing API and configuration + // ------------------------------------------------------------------------ + + /** + * The config map that contains configuration for the bulk flushing behaviours. + * + *

For {@link org.opensearch.client.transport.TransportClient} based implementations, this + * config map would also contain Opensearch-shipped configuration, and therefore this config map + * would also be forwarded when creating the Opensearch client. + */ + private final Map userConfig; + + /** + * The function that is used to construct multiple {@link ActionRequest ActionRequests} from + * each incoming element. + */ + private final OpensearchSinkFunction opensearchSinkFunction; + + /** User-provided handler for failed {@link ActionRequest ActionRequests}. */ + private final ActionRequestFailureHandler failureHandler; + + /** + * If true, the producer will wait until all outstanding action requests have been sent to + * Opensearch. + */ + private boolean flushOnCheckpoint = true; + + /** + * Provided to the user via the {@link OpensearchSinkFunction} to add {@link ActionRequest + * ActionRequests}. + */ + private transient RequestIndexer requestIndexer; + + /** + * Provided to the {@link ActionRequestFailureHandler} to allow users to re-index failed + * requests. + */ + private transient BufferingNoOpRequestIndexer failureRequestIndexer; + + // ------------------------------------------------------------------------ + // Internals for the Flink Opensearch Sink + // ------------------------------------------------------------------------ + + /** Opensearch client instance. */ + private transient RestHighLevelClient client; + + /** + * Number of pending action requests not yet acknowledged by Opensearch. This value is + * maintained only if {@link OpensearchSinkBase#flushOnCheckpoint} is {@code true}. + * + *

This is incremented whenever the user adds (or re-adds through the {@link + * ActionRequestFailureHandler}) requests to the {@link RequestIndexer}. It is decremented for + * each completed request of a bulk request, in {@link BulkProcessor.Listener#afterBulk(long, + * BulkRequest, BulkResponse)} and {@link BulkProcessor.Listener#afterBulk(long, BulkRequest, + * Throwable)}. + */ + private AtomicLong numPendingRequests = new AtomicLong(0); + + /** User-provided HTTP Host. */ + private final List httpHosts; + + /** The factory to configure the rest client. */ + private final RestClientFactory restClientFactory; + + /** Bulk processor to buffer and send requests to Opensearch, created using the client. */ + private transient BulkProcessor bulkProcessor; + + /** + * This is set from inside the {@link BulkProcessor.Listener} if a {@link Throwable} was thrown + * in callbacks and the user considered it should fail the sink via the {@link + * ActionRequestFailureHandler#onFailure(ActionRequest, Throwable, int, RequestIndexer)} method. + * + *

Errors will be checked and rethrown before processing each input element, and when the + * sink is closed. + */ + private final AtomicReference failureThrowable = new AtomicReference<>(); + + private OpensearchSink( + Map userConfig, + List httpHosts, + OpensearchSinkFunction opensearchSinkFunction, + ActionRequestFailureHandler failureHandler, + RestClientFactory restClientFactory) { + checkArgument(httpHosts != null && !httpHosts.isEmpty()); + this.httpHosts = httpHosts; + this.restClientFactory = checkNotNull(restClientFactory); + this.opensearchSinkFunction = checkNotNull(opensearchSinkFunction); + this.failureHandler = checkNotNull(failureHandler); + // we eagerly check if the user-provided sink function and failure handler is serializable; + // otherwise, if they aren't serializable, users will merely get a non-informative error + // message + // "OpensearchSinkBase is not serializable" + + checkArgument( + InstantiationUtil.isSerializable(opensearchSinkFunction), + "The implementation of the provided OpensearchSinkFunction is not serializable. " + + "The object probably contains or references non-serializable fields."); + + checkArgument( + InstantiationUtil.isSerializable(failureHandler), + "The implementation of the provided ActionRequestFailureHandler is not serializable. " + + "The object probably contains or references non-serializable fields."); + + // extract and remove bulk processor related configuration from the user-provided config, + // so that the resulting user config only contains configuration related to the + // Opensearch client. + + checkNotNull(userConfig); + + // copy config so we can remove entries without side-effects + userConfig = new HashMap<>(userConfig); + + ParameterTool params = ParameterTool.fromMap(userConfig); + + if (params.has(CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS)) { + bulkProcessorFlushMaxActions = params.getInt(CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS); + userConfig.remove(CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS); + } else { + bulkProcessorFlushMaxActions = null; + } + + if (params.has(CONFIG_KEY_BULK_FLUSH_MAX_SIZE_MB)) { + bulkProcessorFlushMaxSizeMb = params.getInt(CONFIG_KEY_BULK_FLUSH_MAX_SIZE_MB); + userConfig.remove(CONFIG_KEY_BULK_FLUSH_MAX_SIZE_MB); + } else { + bulkProcessorFlushMaxSizeMb = null; + } + + if (params.has(CONFIG_KEY_BULK_FLUSH_INTERVAL_MS)) { + bulkProcessorFlushIntervalMillis = params.getLong(CONFIG_KEY_BULK_FLUSH_INTERVAL_MS); + userConfig.remove(CONFIG_KEY_BULK_FLUSH_INTERVAL_MS); + } else { + bulkProcessorFlushIntervalMillis = null; + } + + boolean bulkProcessorFlushBackoffEnable = + params.getBoolean(CONFIG_KEY_BULK_FLUSH_BACKOFF_ENABLE, true); + userConfig.remove(CONFIG_KEY_BULK_FLUSH_BACKOFF_ENABLE); + + if (bulkProcessorFlushBackoffEnable) { + this.bulkProcessorFlushBackoffPolicy = new BulkFlushBackoffPolicy(); + + if (params.has(CONFIG_KEY_BULK_FLUSH_BACKOFF_TYPE)) { + bulkProcessorFlushBackoffPolicy.setBackoffType( + FlushBackoffType.valueOf(params.get(CONFIG_KEY_BULK_FLUSH_BACKOFF_TYPE))); + userConfig.remove(CONFIG_KEY_BULK_FLUSH_BACKOFF_TYPE); + } + + if (params.has(CONFIG_KEY_BULK_FLUSH_BACKOFF_RETRIES)) { + bulkProcessorFlushBackoffPolicy.setMaxRetryCount( + params.getInt(CONFIG_KEY_BULK_FLUSH_BACKOFF_RETRIES)); + userConfig.remove(CONFIG_KEY_BULK_FLUSH_BACKOFF_RETRIES); + } + + if (params.has(CONFIG_KEY_BULK_FLUSH_BACKOFF_DELAY)) { + bulkProcessorFlushBackoffPolicy.setDelayMillis( + params.getLong(CONFIG_KEY_BULK_FLUSH_BACKOFF_DELAY)); + userConfig.remove(CONFIG_KEY_BULK_FLUSH_BACKOFF_DELAY); + } + + } else { + bulkProcessorFlushBackoffPolicy = null; + } + + this.userConfig = userConfig; + } + + /** + * Disable flushing on checkpoint. When disabled, the sink will not wait for all pending action + * requests to be acknowledged by Opensearch on checkpoints. + * + *

NOTE: If flushing on checkpoint is disabled, the Flink Opensearch Sink does NOT provide + * any strong guarantees for at-least-once delivery of action requests. + */ + public void disableFlushOnCheckpoint() { + this.flushOnCheckpoint = false; + } + + @Override + public void open(Configuration parameters) throws Exception { + client = createClient(userConfig); + bulkProcessor = buildBulkProcessor(new BulkProcessorListener()); + requestIndexer = + new OpensearchBulkProcessorIndexer( + bulkProcessor, flushOnCheckpoint, numPendingRequests); + failureRequestIndexer = new BufferingNoOpRequestIndexer(); + opensearchSinkFunction.open(); + } + + @Override + public void invoke(T value, Context context) throws Exception { + checkAsyncErrorsAndRequests(); + opensearchSinkFunction.process(value, getRuntimeContext(), requestIndexer); + } + + @Override + public void initializeState(FunctionInitializationContext context) throws Exception { + // no initialization needed + } + + @Override + public void snapshotState(FunctionSnapshotContext context) throws Exception { + checkAsyncErrorsAndRequests(); + + if (flushOnCheckpoint) { + while (numPendingRequests.get() != 0) { + bulkProcessor.flush(); + checkAsyncErrorsAndRequests(); + } + } + } + + @Override + public void close() throws Exception { + opensearchSinkFunction.close(); + if (bulkProcessor != null) { + bulkProcessor.close(); + bulkProcessor = null; + } + + if (client != null) { + client.close(); + client = null; + } + + // make sure any errors from callbacks are rethrown + checkErrorAndRethrow(); + } + + /** Build the {@link BulkProcessor}. */ + protected BulkProcessor buildBulkProcessor(BulkProcessor.Listener listener) { + checkNotNull(listener); + + BulkProcessor.Builder bulkProcessorBuilder = + BulkProcessor.builder( + (request, bulkListener) -> + client.bulkAsync(request, RequestOptions.DEFAULT, bulkListener), + listener); + + // This makes flush() blocking + bulkProcessorBuilder.setConcurrentRequests(0); + + if (bulkProcessorFlushMaxActions != null) { + bulkProcessorBuilder.setBulkActions(bulkProcessorFlushMaxActions); + } + + if (bulkProcessorFlushMaxSizeMb != null) { + configureBulkSize(bulkProcessorBuilder); + } + + if (bulkProcessorFlushIntervalMillis != null) { + configureFlushInterval(bulkProcessorBuilder); + } + + // if backoff retrying is disabled, bulkProcessorFlushBackoffPolicy will be null + configureBulkProcessorBackoff(bulkProcessorBuilder, bulkProcessorFlushBackoffPolicy); + + return bulkProcessorBuilder.build(); + } + + /** + * Creates an Opensearch client implementing {@link AutoCloseable}. + * + * @param clientConfig The configuration to use when constructing the client. + * @return The created client. + * @throws IOException IOException + */ + private RestHighLevelClient createClient(Map clientConfig) throws IOException { + RestClientBuilder builder = + RestClient.builder(httpHosts.toArray(new HttpHost[httpHosts.size()])); + restClientFactory.configureRestClientBuilder(builder); + + RestHighLevelClient rhlClient = new RestHighLevelClient(builder); + verifyClientConnection(rhlClient); + + return rhlClient; + } + + /** + * Verify the client connection by making a test request/ping to the Opensearch cluster. + * + *

Called by {@link OpensearchSinkBase#open(org.apache.flink.configuration.Configuration)} + * after creating the client. This makes sure the underlying client is closed if the connection + * is not successful and preventing thread leak. + * + * @param client the Opensearch client. + */ + private void verifyClientConnection(RestHighLevelClient client) throws IOException { + if (LOG.isInfoEnabled()) { + LOG.info("Pinging Opensearch cluster via hosts {} ...", httpHosts); + } + + if (!client.ping(RequestOptions.DEFAULT)) { + throw new RuntimeException("There are no reachable Opensearch nodes!"); + } + + if (LOG.isInfoEnabled()) { + LOG.info("Opensearch RestHighLevelClient is connected to {}", httpHosts.toString()); + } + } + + /** + * Set backoff-related configurations on the provided {@link BulkProcessor.Builder}. The builder + * will be later on used to instantiate the actual {@link BulkProcessor}. + * + * @param builder the {@link BulkProcessor.Builder} to configure. + * @param flushBackoffPolicy user-provided backoff retry settings ({@code null} if the user + * disabled backoff retries). + */ + private static void configureBulkProcessorBackoff( + BulkProcessor.Builder builder, + @Nullable OpensearchSink.BulkFlushBackoffPolicy flushBackoffPolicy) { + + BackoffPolicy backoffPolicy; + if (flushBackoffPolicy != null) { + switch (flushBackoffPolicy.getBackoffType()) { + case CONSTANT: + backoffPolicy = + BackoffPolicy.constantBackoff( + new TimeValue(flushBackoffPolicy.getDelayMillis()), + flushBackoffPolicy.getMaxRetryCount()); + break; + case EXPONENTIAL: + default: + backoffPolicy = + BackoffPolicy.exponentialBackoff( + new TimeValue(flushBackoffPolicy.getDelayMillis()), + flushBackoffPolicy.getMaxRetryCount()); + } + } else { + backoffPolicy = BackoffPolicy.noBackoff(); + } + + builder.setBackoffPolicy(backoffPolicy); + } + + private void configureBulkSize(BulkProcessor.Builder bulkProcessorBuilder) { + final ByteSizeUnit sizeUnit; + if (bulkProcessorFlushMaxSizeMb == -1) { + // bulk size can be disabled with -1, however the ByteSizeValue constructor accepts -1 + // only with BYTES as the size unit + sizeUnit = ByteSizeUnit.BYTES; + } else { + sizeUnit = ByteSizeUnit.MB; + } + bulkProcessorBuilder.setBulkSize(new ByteSizeValue(bulkProcessorFlushMaxSizeMb, sizeUnit)); + } + + private void configureFlushInterval(BulkProcessor.Builder bulkProcessorBuilder) { + if (bulkProcessorFlushIntervalMillis == -1) { + bulkProcessorBuilder.setFlushInterval(null); + } else { + bulkProcessorBuilder.setFlushInterval( + TimeValue.timeValueMillis(bulkProcessorFlushIntervalMillis)); + } + } + + private void checkErrorAndRethrow() { + Throwable cause = failureThrowable.get(); + if (cause != null) { + throw new RuntimeException("An error occurred in OpensearchSink.", cause); + } + } + + private void checkAsyncErrorsAndRequests() { + checkErrorAndRethrow(); + failureRequestIndexer.processBufferedRequests(requestIndexer); + } + + private class BulkProcessorListener implements BulkProcessor.Listener { + + @Override + public void beforeBulk(long executionId, BulkRequest request) {} + + @Override + public void afterBulk(long executionId, BulkRequest request, BulkResponse response) { + if (response.hasFailures()) { + BulkItemResponse itemResponse; + Throwable failure; + RestStatus restStatus; + DocWriteRequest actionRequest; + + try { + for (int i = 0; i < response.getItems().length; i++) { + itemResponse = response.getItems()[i]; + failure = extractFailureCauseFromBulkItemResponse(itemResponse); + if (failure != null) { + restStatus = itemResponse.getFailure().getStatus(); + actionRequest = request.requests().get(i); + if (restStatus == null) { + if (actionRequest instanceof ActionRequest) { + failureHandler.onFailure( + (ActionRequest) actionRequest, + failure, + -1, + failureRequestIndexer); + } else { + throw new UnsupportedOperationException( + "The sink currently only supports ActionRequests"); + } + } else { + if (actionRequest instanceof ActionRequest) { + failureHandler.onFailure( + (ActionRequest) actionRequest, + failure, + restStatus.getStatus(), + failureRequestIndexer); + } else { + throw new UnsupportedOperationException( + "The sink currently only supports ActionRequests"); + } + } + } + } + } catch (Throwable t) { + // fail the sink and skip the rest of the items + // if the failure handler decides to throw an exception + failureThrowable.compareAndSet(null, t); + } + } + + if (flushOnCheckpoint) { + numPendingRequests.getAndAdd(-request.numberOfActions()); + } + } + + @Override + public void afterBulk(long executionId, BulkRequest request, Throwable failure) { + try { + for (DocWriteRequest writeRequest : request.requests()) { + if (writeRequest instanceof ActionRequest) { + failureHandler.onFailure( + (ActionRequest) writeRequest, failure, -1, failureRequestIndexer); + } else { + throw new UnsupportedOperationException( + "The sink currently only supports ActionRequests"); + } + } + } catch (Throwable t) { + // fail the sink and skip the rest of the items + // if the failure handler decides to throw an exception + failureThrowable.compareAndSet(null, t); + } + + if (flushOnCheckpoint) { + numPendingRequests.getAndAdd(-request.numberOfActions()); + } + } + } + + /** + * Extracts the cause of failure of a bulk item action. + * + * @param bulkItemResponse the bulk item response to extract cause of failure + * @return the extracted {@link Throwable} from the response ({@code null} is the response is + * successful). + */ + private static Throwable extractFailureCauseFromBulkItemResponse( + BulkItemResponse bulkItemResponse) { + if (!bulkItemResponse.isFailed()) { + return null; + } else { + return bulkItemResponse.getFailure().getCause(); + } + } + + long getNumPendingRequests() { + if (flushOnCheckpoint) { + return numPendingRequests.get(); + } else { + throw new UnsupportedOperationException( + "The number of pending requests is not maintained when flushing on checkpoint is disabled."); + } + } + + /** + * A builder for creating an {@link OpensearchSink}. + * + * @param Type of the elements handled by the sink this builder creates. + * @deprecated This has been deprecated, please use {@link + * org.apache.flink.connector.opensearch.sink.OpensearchSinkBuilder}. + */ + @Deprecated + @PublicEvolving + public static class Builder { + + private final List httpHosts; + private final OpensearchSinkFunction opensearchSinkFunction; + + private Map bulkRequestsConfig = new HashMap<>(); + private ActionRequestFailureHandler failureHandler = new NoOpFailureHandler(); + private RestClientFactory restClientFactory = restClientBuilder -> {}; + + /** + * Creates a new {@code OpensearchSink} that connects to the cluster using a {@link + * RestHighLevelClient}. + * + * @param httpHosts The list of {@link HttpHost} to which the {@link RestHighLevelClient} + * connects to. + * @param opensearchSinkFunction This is used to generate multiple {@link ActionRequest} + * from the incoming element. + */ + public Builder(List httpHosts, OpensearchSinkFunction opensearchSinkFunction) { + this.httpHosts = Preconditions.checkNotNull(httpHosts); + this.opensearchSinkFunction = Preconditions.checkNotNull(opensearchSinkFunction); + } + + /** + * Sets the maximum number of actions to buffer for each bulk request. You can pass -1 to + * disable it. + * + * @param numMaxActions the maximum number of actions to buffer per bulk request. + */ + public void setBulkFlushMaxActions(int numMaxActions) { + Preconditions.checkArgument( + numMaxActions == -1 || numMaxActions > 0, + "Max number of buffered actions must be larger than 0."); + + this.bulkRequestsConfig.put( + CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS, String.valueOf(numMaxActions)); + } + + /** + * Sets the maximum size of buffered actions, in mb, per bulk request. You can pass -1 to + * disable it. + * + * @param maxSizeMb the maximum size of buffered actions, in mb. + */ + public void setBulkFlushMaxSizeMb(int maxSizeMb) { + Preconditions.checkArgument( + maxSizeMb == -1 || maxSizeMb > 0, + "Max size of buffered actions must be larger than 0."); + + this.bulkRequestsConfig.put( + CONFIG_KEY_BULK_FLUSH_MAX_SIZE_MB, String.valueOf(maxSizeMb)); + } + + /** + * Sets the bulk flush interval, in milliseconds. You can pass -1 to disable it. + * + * @param intervalMillis the bulk flush interval, in milliseconds. + */ + public void setBulkFlushInterval(long intervalMillis) { + Preconditions.checkArgument( + intervalMillis == -1 || intervalMillis >= 0, + "Interval (in milliseconds) between each flush must be larger than or equal to 0."); + + this.bulkRequestsConfig.put( + CONFIG_KEY_BULK_FLUSH_INTERVAL_MS, String.valueOf(intervalMillis)); + } + + /** + * Sets whether or not to enable bulk flush backoff behaviour. + * + * @param enabled whether or not to enable backoffs. + */ + public void setBulkFlushBackoff(boolean enabled) { + this.bulkRequestsConfig.put( + CONFIG_KEY_BULK_FLUSH_BACKOFF_ENABLE, String.valueOf(enabled)); + } + + /** + * Sets the type of back of to use when flushing bulk requests. + * + * @param flushBackoffType the backoff type to use. + */ + public void setBulkFlushBackoffType(FlushBackoffType flushBackoffType) { + this.bulkRequestsConfig.put( + CONFIG_KEY_BULK_FLUSH_BACKOFF_TYPE, + Preconditions.checkNotNull(flushBackoffType).toString()); + } + + /** + * Sets the maximum number of retries for a backoff attempt when flushing bulk requests. + * + * @param maxRetries the maximum number of retries for a backoff attempt when flushing bulk + * requests + */ + public void setBulkFlushBackoffRetries(int maxRetries) { + Preconditions.checkArgument( + maxRetries > 0, "Max number of backoff attempts must be larger than 0."); + + this.bulkRequestsConfig.put( + CONFIG_KEY_BULK_FLUSH_BACKOFF_RETRIES, String.valueOf(maxRetries)); + } + + /** + * Sets the amount of delay between each backoff attempt when flushing bulk requests, in + * milliseconds. + * + * @param delayMillis the amount of delay between each backoff attempt when flushing bulk + * requests, in milliseconds. + */ + public void setBulkFlushBackoffDelay(long delayMillis) { + Preconditions.checkArgument( + delayMillis >= 0, + "Delay (in milliseconds) between each backoff attempt must be larger than or equal to 0."); + this.bulkRequestsConfig.put( + CONFIG_KEY_BULK_FLUSH_BACKOFF_DELAY, String.valueOf(delayMillis)); + } + + /** + * Sets a failure handler for action requests. + * + * @param failureHandler This is used to handle failed {@link ActionRequest}. + */ + public void setFailureHandler(ActionRequestFailureHandler failureHandler) { + this.failureHandler = Preconditions.checkNotNull(failureHandler); + } + + /** + * Sets a REST client factory for custom client configuration. + * + * @param restClientFactory the factory that configures the rest client. + */ + public void setRestClientFactory(RestClientFactory restClientFactory) { + this.restClientFactory = Preconditions.checkNotNull(restClientFactory); + } + + /** + * Creates the Opensearch sink. + * + * @return the created Opensearch sink. + */ + public OpensearchSink build() { + return new OpensearchSink<>( + bulkRequestsConfig, + httpHosts, + opensearchSinkFunction, + failureHandler, + restClientFactory); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + Builder builder = (Builder) o; + return Objects.equals(httpHosts, builder.httpHosts) + && Objects.equals(opensearchSinkFunction, builder.opensearchSinkFunction) + && Objects.equals(bulkRequestsConfig, builder.bulkRequestsConfig) + && Objects.equals(failureHandler, builder.failureHandler) + && Objects.equals(restClientFactory, builder.restClientFactory); + } + + @Override + public int hashCode() { + return Objects.hash( + httpHosts, + opensearchSinkFunction, + bulkRequestsConfig, + failureHandler, + restClientFactory); + } + } +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkFunction.java b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkFunction.java new file mode 100644 index 0000000..8c49c8a --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkFunction.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.opensearch; + +import org.apache.flink.annotation.PublicEvolving; +import org.apache.flink.api.common.functions.Function; +import org.apache.flink.api.common.functions.RuntimeContext; + +import org.opensearch.action.ActionRequest; + +import java.io.Serializable; + +/** + * Creates multiple {@link ActionRequest ActionRequests} from an element in a stream. + * + *

This is used by sinks to prepare elements for sending them to Opensearch. + * + *

Example: + * + *

{@code
+ * 				private static class TestOpensearchSinkFunction implements
+ * 					OpensearchSinkFunction> {
+ *
+ * 				public IndexRequest createIndexRequest(Tuple2 element) {
+ * 					Map json = new HashMap<>();
+ * 					json.put("data", element.f1);
+ *
+ * 					return Requests.indexRequest()
+ * 						.index("my-index")
+ * 						.id(element.f0.toString())
+ * 						.source(json);
+ * 					}
+ *
+ * 			public void process(Tuple2 element, RuntimeContext ctx, RequestIndexer indexer) {
+ * 				indexer.add(createIndexRequest(element));
+ * 			}
+ * 	}
+ *
+ * }
+ * + * @param The type of the element handled by this {@code OpensearchSinkFunction} + * @deprecated This has been deprecated and will be removed in the future. + */ +@Deprecated +@PublicEvolving +public interface OpensearchSinkFunction extends Serializable, Function { + + /** + * Initialization method for the function. It is called once before the actual working process + * methods. + */ + default void open() throws Exception {} + + /** + * Initialization method for the function. It is called once before the actual working process + * methods. + */ + default void open(RuntimeContext ctx) throws Exception { + open(); + } + + /** Tear-down method for the function. It is called when the sink closes. */ + default void close() throws Exception {} + + /** + * Process the incoming element to produce multiple {@link ActionRequest ActionsRequests}. The + * produced requests should be added to the provided {@link RequestIndexer}. + * + * @param element incoming element to process + * @param ctx runtime context containing information about the sink instance + * @param indexer request indexer that {@code ActionRequest} should be added to + */ + void process(T element, RuntimeContext ctx, RequestIndexer indexer); +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/RequestIndexer.java b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/RequestIndexer.java new file mode 100644 index 0000000..78fefed --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/RequestIndexer.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.opensearch; + +import org.apache.flink.annotation.PublicEvolving; + +import org.opensearch.action.ActionRequest; +import org.opensearch.action.delete.DeleteRequest; +import org.opensearch.action.index.IndexRequest; +import org.opensearch.action.update.UpdateRequest; + +/** + * Users add multiple delete, index or update requests to a {@link RequestIndexer} to prepare them + * for sending to an Opensearch cluster. + * + * @deprecated This has been deprecated and will be removed in the future. + */ +@Deprecated +@PublicEvolving +public interface RequestIndexer { + + /** + * Add multiple {@link ActionRequest} to the indexer to prepare for sending requests to + * Opensearch. + * + * @param actionRequests The multiple {@link ActionRequest} to add. + * @deprecated use the {@link DeleteRequest}, {@link IndexRequest} or {@link UpdateRequest} + */ + @Deprecated + default void add(ActionRequest... actionRequests) { + for (ActionRequest actionRequest : actionRequests) { + if (actionRequest instanceof IndexRequest) { + add((IndexRequest) actionRequest); + } else if (actionRequest instanceof DeleteRequest) { + add((DeleteRequest) actionRequest); + } else if (actionRequest instanceof UpdateRequest) { + add((UpdateRequest) actionRequest); + } else { + throw new IllegalArgumentException( + "RequestIndexer only supports Index, Delete and Update requests"); + } + } + } + + /** + * Add multiple {@link DeleteRequest} to the indexer to prepare for sending requests to + * Opensearch. + * + * @param deleteRequests The multiple {@link DeleteRequest} to add. + */ + void add(DeleteRequest... deleteRequests); + + /** + * Add multiple {@link IndexRequest} to the indexer to prepare for sending requests to + * Opensearch. + * + * @param indexRequests The multiple {@link IndexRequest} to add. + */ + void add(IndexRequest... indexRequests); + + /** + * Add multiple {@link UpdateRequest} to the indexer to prepare for sending requests to + * Opensearch. + * + * @param updateRequests The multiple {@link UpdateRequest} to add. + */ + void add(UpdateRequest... updateRequests); +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/RestClientFactory.java b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/RestClientFactory.java new file mode 100644 index 0000000..7287f60 --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/RestClientFactory.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.opensearch; + +import org.apache.flink.annotation.PublicEvolving; + +import org.opensearch.client.RestClientBuilder; + +import java.io.Serializable; + +/** + * A factory that is used to configure the {@link org.opensearch.client.RestHighLevelClient} + * internally used in the {@link OpensearchSink}. + */ +@PublicEvolving +public interface RestClientFactory extends Serializable { + + /** + * Configures the rest client builder. + * + * @param restClientBuilder the configured rest client builder. + */ + void configureRestClientBuilder(RestClientBuilder restClientBuilder); +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/util/IgnoringFailureHandler.java b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/util/IgnoringFailureHandler.java new file mode 100644 index 0000000..a04cd55 --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/util/IgnoringFailureHandler.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.opensearch.util; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.streaming.connectors.opensearch.ActionRequestFailureHandler; +import org.apache.flink.streaming.connectors.opensearch.RequestIndexer; + +import org.opensearch.action.ActionRequest; + +/** Ignores all kinds of failures and drops the affected {@link ActionRequest}. */ +@Internal +public class IgnoringFailureHandler implements ActionRequestFailureHandler { + + private static final long serialVersionUID = 1662846593501L; + + @Override + public void onFailure( + ActionRequest action, Throwable failure, int restStatusCode, RequestIndexer indexer) { + // ignore failure + } +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/util/NoOpFailureHandler.java b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/util/NoOpFailureHandler.java new file mode 100644 index 0000000..a2c6073 --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/util/NoOpFailureHandler.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.opensearch.util; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.streaming.connectors.opensearch.ActionRequestFailureHandler; +import org.apache.flink.streaming.connectors.opensearch.RequestIndexer; + +import org.opensearch.action.ActionRequest; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** An {@link ActionRequestFailureHandler} that simply fails the sink on any failures. */ +@Internal +public class NoOpFailureHandler implements ActionRequestFailureHandler { + + private static final long serialVersionUID = 737941343410827885L; + + private static final Logger LOG = LoggerFactory.getLogger(NoOpFailureHandler.class); + + @Override + public void onFailure( + ActionRequest action, Throwable failure, int restStatusCode, RequestIndexer indexer) + throws Throwable { + LOG.error("Failed Opensearch item request: {}", failure.getMessage(), failure); + // simply fail the sink + throw failure; + } + + @Override + public boolean equals(Object o) { + return o instanceof NoOpFailureHandler; + } + + @Override + public int hashCode() { + return NoOpFailureHandler.class.hashCode(); + } +} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/util/RetryRejectedExecutionFailureHandler.java b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/util/RetryRejectedExecutionFailureHandler.java new file mode 100644 index 0000000..922004e --- /dev/null +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/util/RetryRejectedExecutionFailureHandler.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.opensearch.util; + +import org.apache.flink.annotation.PublicEvolving; +import org.apache.flink.streaming.connectors.opensearch.ActionRequestFailureHandler; +import org.apache.flink.streaming.connectors.opensearch.RequestIndexer; +import org.apache.flink.util.ExceptionUtils; + +import org.opensearch.action.ActionRequest; +import org.opensearch.common.util.concurrent.OpenSearchRejectedExecutionException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * An {@link ActionRequestFailureHandler} that re-adds requests that failed due to temporary {@link + * OpenSearchRejectedExecutionException}s (which means that Opensearch node queues are currently + * full), and fails for all other failures. + * + * @deprecated This hase been deprecated and will be removed in the future. + */ +@Deprecated +@PublicEvolving +public class RetryRejectedExecutionFailureHandler implements ActionRequestFailureHandler { + + private static final long serialVersionUID = -7423562912824511906L; + + private static final Logger LOG = + LoggerFactory.getLogger(RetryRejectedExecutionFailureHandler.class); + + @Override + public void onFailure( + ActionRequest action, Throwable failure, int restStatusCode, RequestIndexer indexer) + throws Throwable { + LOG.error("Failed Opensearch item request: {}", failure.getMessage(), failure); + if (ExceptionUtils.findThrowable(failure, OpenSearchRejectedExecutionException.class) + .isPresent()) { + indexer.add(action); + } else { + // rethrow all other failures + throw failure; + } + } +} diff --git a/flink-connector-opensearch/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory b/flink-connector-opensearch/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory new file mode 100644 index 0000000..2cbd625 --- /dev/null +++ b/flink-connector-opensearch/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.flink.connector.opensearch.table.OpensearchDynamicSinkFactory diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/OpensearchUtil.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/OpensearchUtil.java new file mode 100644 index 0000000..b1403b9 --- /dev/null +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/OpensearchUtil.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.streaming.connectors.opensearch.RestClientFactory; +import org.apache.flink.table.connector.sink.DynamicTableSink; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.LogicalType; + +import org.apache.http.HttpHost; +import org.apache.http.auth.AuthScope; +import org.apache.http.auth.UsernamePasswordCredentials; +import org.apache.http.client.CredentialsProvider; +import org.apache.http.conn.ssl.TrustAllStrategy; +import org.apache.http.impl.client.BasicCredentialsProvider; +import org.apache.http.ssl.SSLContexts; +import org.opensearch.client.RestClient; +import org.opensearch.client.RestClientBuilder.HttpClientConfigCallback; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.testcontainers.OpensearchContainer; +import org.slf4j.Logger; +import org.testcontainers.containers.output.Slf4jLogConsumer; +import org.testcontainers.utility.DockerImageName; + +import java.security.KeyManagementException; +import java.security.KeyStoreException; +import java.security.NoSuchAlgorithmException; + +/** Collection of utility methods for Opensearch tests. */ +@Internal +public class OpensearchUtil { + + private OpensearchUtil() {} + + /** + * Creates a preconfigured {@link OpensearchContainer} with limited memory allocation and aligns + * the internal Opensearch log levels with the ones used by the capturing logger. + * + * @param dockerImageVersion describing the Opensearch image + * @param log to derive the log level from + * @return configured Opensearch container + */ + public static OpensearchContainer createOpensearchContainer( + String dockerImageVersion, Logger log) { + String logLevel; + if (log.isTraceEnabled()) { + logLevel = "TRACE"; + } else if (log.isDebugEnabled()) { + logLevel = "DEBUG"; + } else if (log.isInfoEnabled()) { + logLevel = "INFO"; + } else if (log.isWarnEnabled()) { + logLevel = "WARN"; + } else if (log.isErrorEnabled()) { + logLevel = "ERROR"; + } else { + logLevel = "OFF"; + } + + return new OpensearchContainer(DockerImageName.parse(dockerImageVersion)) + .withEnv("OPENSEARCH_JAVA_OPTS", "-Xms2g -Xmx2g") + .withEnv("logger.org.opensearch", logLevel) + .withLogConsumer(new Slf4jLogConsumer(log)); + } + + /** + * Creates a preconfigured {@link RestHighLevelClient} instance for specific {@link + * OpensearchContainer} instance. + * + * @return preconfigured {@link RestHighLevelClient} instance + */ + public static RestHighLevelClient createClient(OpensearchContainer container) { + final String username = container.getUsername(); + final String password = container.getPassword(); + + return new RestHighLevelClient( + RestClient.builder(HttpHost.create(container.getHttpHostAddress())) + .setHttpClientConfigCallback( + createClientConfigCallback(username, password))); + } + + /** + * Creates a preconfigured {@link RestClientFactory} instance for specific {@link + * OpensearchContainer} instance. + * + * @return preconfigured {@link RestClientFactory} instance + */ + public static RestClientFactory createClientFactory(OpensearchContainer container) { + final String username = container.getUsername(); + final String password = container.getPassword(); + + return factory -> + factory.setHttpClientConfigCallback(createClientConfigCallback(username, password)); + } + + /** + * Creates a dedicated {@link HttpClientConfigCallback} instance for specific {@link + * OpensearchContainer} instance. + * + * @return dedicated {@link HttpClientConfigCallback} instance + */ + private static HttpClientConfigCallback createClientConfigCallback( + final String username, final String password) { + + return (httpClientBuilder) -> { + try { + final CredentialsProvider credentialsProvider = new BasicCredentialsProvider(); + credentialsProvider.setCredentials( + AuthScope.ANY, new UsernamePasswordCredentials(username, password)); + + return httpClientBuilder + .setDefaultCredentialsProvider(credentialsProvider) + .setSSLContext( + SSLContexts.custom() + .loadTrustMaterial(new TrustAllStrategy()) + .build()); + } catch (final NoSuchAlgorithmException + | KeyStoreException + | KeyManagementException ex) { + throw new RuntimeException(ex); + } + }; + } + + /** A mock {@link DynamicTableSink.Context} for Opensearch tests. */ + public static class MockContext implements DynamicTableSink.Context { + @Override + public boolean isBounded() { + return false; + } + + @Override + public TypeInformation createTypeInformation(DataType consumedDataType) { + return null; + } + + @Override + public TypeInformation createTypeInformation(LogicalType consumedLogicalType) { + return null; + } + + @Override + public DynamicTableSink.DataStructureConverter createDataStructureConverter( + DataType consumedDataType) { + return null; + } + } +} diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkBuilderTest.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkBuilderTest.java new file mode 100644 index 0000000..d7fb231 --- /dev/null +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkBuilderTest.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.sink; + +import org.apache.flink.connector.base.DeliveryGuarantee; +import org.apache.flink.util.TestLoggerExtension; + +import org.apache.http.HttpHost; +import org.junit.jupiter.api.DynamicTest; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestFactory; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.extension.ExtendWith; + +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertThrows; + +/** Tests for {@link OpensearchSinkBuilder}. */ +@ExtendWith(TestLoggerExtension.class) +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +class OpensearchSinkBuilderTest { + + @TestFactory + Stream testValidBuilders() { + Stream> validBuilders = + Stream.of( + createMinimalBuilder(), + createMinimalBuilder() + .setDeliveryGuarantee(DeliveryGuarantee.AT_LEAST_ONCE), + createMinimalBuilder() + .setBulkFlushBackoffStrategy(FlushBackoffType.CONSTANT, 1, 1), + createMinimalBuilder() + .setConnectionUsername("username") + .setConnectionPassword("password")); + + return DynamicTest.stream( + validBuilders, + OpensearchSinkBuilder::toString, + builder -> assertDoesNotThrow(builder::build)); + } + + @Test + void testThrowIfExactlyOnceConfigured() { + assertThrows( + IllegalStateException.class, + () -> createMinimalBuilder().setDeliveryGuarantee(DeliveryGuarantee.EXACTLY_ONCE)); + } + + @Test + void testThrowIfHostsNotSet() { + assertThrows( + NullPointerException.class, + () -> createEmptyBuilder().setEmitter((element, indexer, context) -> {}).build()); + } + + @Test + void testThrowIfEmitterNotSet() { + assertThrows( + NullPointerException.class, + () -> createEmptyBuilder().setHosts(new HttpHost("localhost:3000")).build()); + } + + @Test + void testThrowIfSetInvalidTimeouts() { + assertThrows( + IllegalStateException.class, + () -> createEmptyBuilder().setConnectionRequestTimeout(-1).build()); + assertThrows( + IllegalStateException.class, + () -> createEmptyBuilder().setConnectionTimeout(-1).build()); + assertThrows( + IllegalStateException.class, + () -> createEmptyBuilder().setSocketTimeout(-1).build()); + } + + private OpensearchSinkBuilder createEmptyBuilder() { + return new OpensearchSinkBuilder<>(); + } + + private OpensearchSinkBuilder createMinimalBuilder() { + return new OpensearchSinkBuilder<>() + .setEmitter((element, indexer, context) -> {}) + .setHosts(new HttpHost("localhost:3000")); + } +} diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkITCase.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkITCase.java new file mode 100644 index 0000000..084485b --- /dev/null +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkITCase.java @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.sink; + +import org.apache.flink.api.common.functions.MapFunction; +import org.apache.flink.api.common.restartstrategy.RestartStrategies; +import org.apache.flink.api.common.state.CheckpointListener; +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.connector.base.DeliveryGuarantee; +import org.apache.flink.connector.opensearch.OpensearchUtil; +import org.apache.flink.connector.opensearch.test.DockerImageVersions; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.environment.LocalStreamEnvironment; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.util.TestLoggerExtension; + +import org.apache.http.HttpHost; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; +import org.junit.jupiter.params.provider.MethodSource; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.testcontainers.OpensearchContainer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; + +import javax.annotation.Nullable; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.UUID; +import java.util.function.BiFunction; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** Tests for {@link OpensearchSink}. */ +@Testcontainers +@ExtendWith(TestLoggerExtension.class) +class OpensearchSinkITCase { + protected static final Logger LOG = LoggerFactory.getLogger(OpensearchSinkITCase.class); + private static boolean failed; + + private RestHighLevelClient client; + private OpensearchTestClient context; + + @Container + private static final OpensearchContainer OS_CONTAINER = + OpensearchUtil.createOpensearchContainer(DockerImageVersions.OPENSEARCH_1, LOG); + + @BeforeEach + void setUp() { + failed = false; + client = OpensearchUtil.createClient(OS_CONTAINER); + context = new OpensearchTestClient(client); + } + + @AfterEach + void tearDown() throws IOException { + if (client != null) { + client.close(); + } + } + + @ParameterizedTest + @EnumSource(DeliveryGuarantee.class) + void testWriteToOpensearchWithDeliveryGuarantee(DeliveryGuarantee deliveryGuarantee) + throws Exception { + final String index = "test-opensearch-with-delivery-" + deliveryGuarantee; + boolean failure = false; + try { + runTest(index, false, TestEmitter::jsonEmitter, deliveryGuarantee, null); + } catch (IllegalStateException e) { + failure = true; + assertSame(deliveryGuarantee, DeliveryGuarantee.EXACTLY_ONCE); + } finally { + assertEquals(failure, deliveryGuarantee == DeliveryGuarantee.EXACTLY_ONCE); + } + } + + @ParameterizedTest + @MethodSource("opensearchEmitters") + void testWriteJsonToOpensearch( + BiFunction>> emitterProvider) + throws Exception { + final String index = "test-opensearch-sink-" + UUID.randomUUID(); + runTest(index, false, emitterProvider, null); + } + + @Test + void testRecovery() throws Exception { + final String index = "test-recovery-opensearch-sink"; + runTest(index, true, TestEmitter::jsonEmitter, new FailingMapper()); + assertTrue(failed); + } + + private void runTest( + String index, + boolean allowRestarts, + BiFunction>> emitterProvider, + @Nullable MapFunction additionalMapper) + throws Exception { + runTest( + index, + allowRestarts, + emitterProvider, + DeliveryGuarantee.AT_LEAST_ONCE, + additionalMapper); + } + + private void runTest( + String index, + boolean allowRestarts, + BiFunction>> emitterProvider, + DeliveryGuarantee deliveryGuarantee, + @Nullable MapFunction additionalMapper) + throws Exception { + final OpensearchSink> sink = + new OpensearchSinkBuilder<>() + .setHosts(HttpHost.create(OS_CONTAINER.getHttpHostAddress())) + .setEmitter(emitterProvider.apply(index, context.getDataFieldName())) + .setBulkFlushMaxActions(5) + .setConnectionUsername(OS_CONTAINER.getUsername()) + .setConnectionPassword(OS_CONTAINER.getPassword()) + .setDeliveryGuarantee(deliveryGuarantee) + .setAllowInsecure(true) + .build(); + + final StreamExecutionEnvironment env = new LocalStreamEnvironment(); + env.enableCheckpointing(100L); + if (!allowRestarts) { + env.setRestartStrategy(RestartStrategies.noRestart()); + } + DataStream stream = env.fromSequence(1, 5); + + if (additionalMapper != null) { + stream = stream.map(additionalMapper); + } + + stream.map( + new MapFunction>() { + @Override + public Tuple2 map(Long value) throws Exception { + return Tuple2.of( + value.intValue(), + OpensearchTestClient.buildMessage(value.intValue())); + } + }) + .sinkTo(sink); + env.execute(); + context.assertThatIdsAreWritten(index, 1, 2, 3, 4, 5); + } + + private static List>>> + opensearchEmitters() { + return Arrays.asList(TestEmitter::jsonEmitter, TestEmitter::smileEmitter); + } + + private static class FailingMapper implements MapFunction, CheckpointListener { + + private int emittedRecords = 0; + + @Override + public Long map(Long value) throws Exception { + Thread.sleep(50); + emittedRecords++; + return value; + } + + @Override + public void notifyCheckpointComplete(long checkpointId) throws Exception { + if (failed || emittedRecords == 0) { + return; + } + failed = true; + throw new Exception("Expected failure"); + } + } +} diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchTestClient.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchTestClient.java new file mode 100644 index 0000000..021ad37 --- /dev/null +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchTestClient.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.sink; + +import org.opensearch.OpenSearchStatusException; +import org.opensearch.action.get.GetRequest; +import org.opensearch.action.get.GetResponse; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; + +import java.io.IOException; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; + +class OpensearchTestClient { + private static final String DATA_FIELD_NAME = "data"; + private final RestHighLevelClient client; + + OpensearchTestClient(RestHighLevelClient client) { + this.client = client; + } + + GetResponse getResponse(String index, int id) throws IOException { + return client.get(new GetRequest(index, Integer.toString(id)), RequestOptions.DEFAULT); + } + + void assertThatIdsAreNotWritten(String index, int... ids) throws IOException { + for (final int id : ids) { + try { + final GetResponse response = getResponse(index, id); + assertFalse( + response.isExists(), String.format("Id %s is unexpectedly present.", id)); + } catch (OpenSearchStatusException e) { + assertEquals(404, e.status().getStatus()); + } + } + } + + void assertThatIdsAreWritten(String index, int... ids) + throws IOException, InterruptedException { + for (final int id : ids) { + GetResponse response; + do { + response = getResponse(index, id); + Thread.sleep(10); + } while (response.isSourceEmpty()); + assertEquals(buildMessage(id), response.getSource().get(DATA_FIELD_NAME)); + } + } + + String getDataFieldName() { + return DATA_FIELD_NAME; + } + + static String buildMessage(int id) { + return "test-" + id; + } +} diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchWriterITCase.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchWriterITCase.java new file mode 100644 index 0000000..ab8c06d --- /dev/null +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchWriterITCase.java @@ -0,0 +1,337 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.sink; + +import org.apache.flink.api.common.operators.MailboxExecutor; +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.connector.opensearch.OpensearchUtil; +import org.apache.flink.connector.opensearch.test.DockerImageVersions; +import org.apache.flink.metrics.Counter; +import org.apache.flink.metrics.Gauge; +import org.apache.flink.metrics.groups.OperatorIOMetricGroup; +import org.apache.flink.metrics.groups.SinkWriterMetricGroup; +import org.apache.flink.metrics.testutils.MetricListener; +import org.apache.flink.runtime.metrics.groups.InternalSinkWriterMetricGroup; +import org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups; +import org.apache.flink.util.FlinkRuntimeException; +import org.apache.flink.util.TestLoggerExtension; +import org.apache.flink.util.function.ThrowingRunnable; + +import org.apache.http.HttpHost; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.opensearch.action.ActionListener; +import org.opensearch.action.bulk.BackoffPolicy; +import org.opensearch.action.bulk.BulkProcessor; +import org.opensearch.action.bulk.BulkRequest; +import org.opensearch.action.bulk.BulkResponse; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.common.unit.ByteSizeUnit; +import org.opensearch.common.unit.ByteSizeValue; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.testcontainers.OpensearchContainer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; + +import java.io.IOException; +import java.util.Collections; +import java.util.Optional; + +import static org.apache.flink.connector.opensearch.sink.OpensearchTestClient.buildMessage; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** Tests for {@link OpensearchWriter}. */ +@Testcontainers +@ExtendWith(TestLoggerExtension.class) +class OpensearchWriterITCase { + + private static final Logger LOG = LoggerFactory.getLogger(OpensearchWriterITCase.class); + + @Container + private static final OpensearchContainer OS_CONTAINER = + OpensearchUtil.createOpensearchContainer(DockerImageVersions.OPENSEARCH_1, LOG); + + private RestHighLevelClient client; + private OpensearchTestClient context; + private MetricListener metricListener; + + @BeforeEach + void setUp() { + metricListener = new MetricListener(); + client = OpensearchUtil.createClient(OS_CONTAINER); + context = new OpensearchTestClient(client); + } + + @AfterEach + void tearDown() throws IOException { + if (client != null) { + client.close(); + } + } + + @Test + void testWriteOnBulkFlush() throws Exception { + final String index = "test-bulk-flush-without-checkpoint"; + final int flushAfterNActions = 5; + final BulkProcessorConfig bulkProcessorConfig = + new BulkProcessorConfig(flushAfterNActions, -1, -1, FlushBackoffType.NONE, 0, 0); + + try (final OpensearchWriter> writer = + createWriter(index, false, bulkProcessorConfig)) { + writer.write(Tuple2.of(1, buildMessage(1)), null); + writer.write(Tuple2.of(2, buildMessage(2)), null); + writer.write(Tuple2.of(3, buildMessage(3)), null); + writer.write(Tuple2.of(4, buildMessage(4)), null); + + // Ignore flush on checkpoint + writer.flush(false); + + context.assertThatIdsAreNotWritten(index, 1, 2, 3, 4); + + // Trigger flush + writer.write(Tuple2.of(5, "test-5"), null); + context.assertThatIdsAreWritten(index, 1, 2, 3, 4, 5); + + writer.write(Tuple2.of(6, "test-6"), null); + context.assertThatIdsAreNotWritten(index, 6); + + // Force flush + writer.blockingFlushAllActions(); + context.assertThatIdsAreWritten(index, 1, 2, 3, 4, 5, 6); + } + } + + @Test + void testWriteOnBulkIntervalFlush() throws Exception { + final String index = "test-bulk-flush-with-interval"; + + // Configure bulk processor to flush every 1s; + final BulkProcessorConfig bulkProcessorConfig = + new BulkProcessorConfig(-1, -1, 1000, FlushBackoffType.NONE, 0, 0); + + try (final OpensearchWriter> writer = + createWriter(index, false, bulkProcessorConfig)) { + writer.write(Tuple2.of(1, buildMessage(1)), null); + writer.write(Tuple2.of(2, buildMessage(2)), null); + writer.write(Tuple2.of(3, buildMessage(3)), null); + writer.write(Tuple2.of(4, buildMessage(4)), null); + writer.blockingFlushAllActions(); + } + + context.assertThatIdsAreWritten(index, 1, 2, 3, 4); + } + + @Test + void testWriteOnCheckpoint() throws Exception { + final String index = "test-bulk-flush-with-checkpoint"; + final BulkProcessorConfig bulkProcessorConfig = + new BulkProcessorConfig(-1, -1, -1, FlushBackoffType.NONE, 0, 0); + + // Enable flush on checkpoint + try (final OpensearchWriter> writer = + createWriter(index, true, bulkProcessorConfig)) { + writer.write(Tuple2.of(1, buildMessage(1)), null); + writer.write(Tuple2.of(2, buildMessage(2)), null); + writer.write(Tuple2.of(3, buildMessage(3)), null); + + context.assertThatIdsAreNotWritten(index, 1, 2, 3); + + // Trigger flush + writer.flush(false); + + context.assertThatIdsAreWritten(index, 1, 2, 3); + } + } + + @Test + void testIncrementByteOutMetric() throws Exception { + final String index = "test-inc-byte-out"; + final OperatorIOMetricGroup operatorIOMetricGroup = + UnregisteredMetricGroups.createUnregisteredOperatorMetricGroup().getIOMetricGroup(); + final InternalSinkWriterMetricGroup metricGroup = + InternalSinkWriterMetricGroup.mock( + metricListener.getMetricGroup(), operatorIOMetricGroup); + final int flushAfterNActions = 2; + final BulkProcessorConfig bulkProcessorConfig = + new BulkProcessorConfig(flushAfterNActions, -1, -1, FlushBackoffType.NONE, 0, 0); + + try (final OpensearchWriter> writer = + createWriter(index, false, bulkProcessorConfig, metricGroup)) { + final Counter numBytesOut = operatorIOMetricGroup.getNumBytesOutCounter(); + assertEquals(numBytesOut.getCount(), 0); + writer.write(Tuple2.of(1, buildMessage(1)), null); + writer.write(Tuple2.of(2, buildMessage(2)), null); + + writer.blockingFlushAllActions(); + long first = numBytesOut.getCount(); + + assertTrue(first > 0); + + writer.write(Tuple2.of(1, buildMessage(1)), null); + writer.write(Tuple2.of(2, buildMessage(2)), null); + + writer.blockingFlushAllActions(); + assertTrue(numBytesOut.getCount() > first); + } + } + + @Test + void testCurrentSendTime() throws Exception { + final String index = "test-current-send-time"; + final int flushAfterNActions = 2; + final BulkProcessorConfig bulkProcessorConfig = + new BulkProcessorConfig(flushAfterNActions, -1, -1, FlushBackoffType.NONE, 0, 0); + + try (final OpensearchWriter> writer = + createWriter(index, false, bulkProcessorConfig)) { + final Optional> currentSendTime = + metricListener.getGauge("currentSendTime"); + writer.write(Tuple2.of(1, buildMessage(1)), null); + writer.write(Tuple2.of(2, buildMessage(2)), null); + + writer.blockingFlushAllActions(); + + assertTrue(currentSendTime.isPresent()); + assertThat(currentSendTime.get().getValue()).isGreaterThan(0L); + } + } + + private OpensearchWriter> createWriter( + String index, boolean flushOnCheckpoint, BulkProcessorConfig bulkProcessorConfig) { + return createWriter( + index, + flushOnCheckpoint, + bulkProcessorConfig, + InternalSinkWriterMetricGroup.mock(metricListener.getMetricGroup())); + } + + private OpensearchWriter> createWriter( + String index, + boolean flushOnCheckpoint, + BulkProcessorConfig bulkProcessorConfig, + SinkWriterMetricGroup metricGroup) { + return new OpensearchWriter>( + Collections.singletonList(HttpHost.create(OS_CONTAINER.getHttpHostAddress())), + TestEmitter.jsonEmitter(index, context.getDataFieldName()), + flushOnCheckpoint, + bulkProcessorConfig, + new TestBulkProcessorBuilderFactory(), + new NetworkClientConfig( + OS_CONTAINER.getUsername(), + OS_CONTAINER.getPassword(), + null, + null, + null, + null, + true), + metricGroup, + new TestMailbox()); + } + + private static class TestBulkProcessorBuilderFactory implements BulkProcessorBuilderFactory { + @Override + public BulkProcessor.Builder apply( + RestHighLevelClient client, + BulkProcessorConfig bulkProcessorConfig, + BulkProcessor.Listener listener) { + BulkProcessor.Builder builder = + BulkProcessor.builder( + new BulkRequestConsumerFactory() { // This cannot be inlined as a lambda + // because then deserialization fails + @Override + public void accept( + BulkRequest bulkRequest, + ActionListener bulkResponseActionListener) { + client.bulkAsync( + bulkRequest, + RequestOptions.DEFAULT, + bulkResponseActionListener); + } + }, + listener); + + if (bulkProcessorConfig.getBulkFlushMaxActions() != -1) { + builder.setBulkActions(bulkProcessorConfig.getBulkFlushMaxActions()); + } + + if (bulkProcessorConfig.getBulkFlushMaxMb() != -1) { + builder.setBulkSize( + new ByteSizeValue( + bulkProcessorConfig.getBulkFlushMaxMb(), ByteSizeUnit.MB)); + } + + if (bulkProcessorConfig.getBulkFlushInterval() != -1) { + builder.setFlushInterval(new TimeValue(bulkProcessorConfig.getBulkFlushInterval())); + } + + BackoffPolicy backoffPolicy; + final TimeValue backoffDelay = + new TimeValue(bulkProcessorConfig.getBulkFlushBackOffDelay()); + final int maxRetryCount = bulkProcessorConfig.getBulkFlushBackoffRetries(); + switch (bulkProcessorConfig.getFlushBackoffType()) { + case CONSTANT: + backoffPolicy = BackoffPolicy.constantBackoff(backoffDelay, maxRetryCount); + break; + case EXPONENTIAL: + backoffPolicy = BackoffPolicy.exponentialBackoff(backoffDelay, maxRetryCount); + break; + case NONE: + backoffPolicy = BackoffPolicy.noBackoff(); + break; + default: + throw new IllegalArgumentException( + "Received unknown backoff policy type " + + bulkProcessorConfig.getFlushBackoffType()); + } + builder.setBackoffPolicy(backoffPolicy); + return builder; + } + } + + private static class TestMailbox implements MailboxExecutor { + + @Override + public void execute( + ThrowingRunnable command, + String descriptionFormat, + Object... descriptionArgs) { + try { + command.run(); + } catch (Exception e) { + throw new RuntimeException("Unexpected error", e); + } + } + + @Override + public void yield() throws InterruptedException, FlinkRuntimeException { + Thread.sleep(100); + } + + @Override + public boolean tryYield() throws FlinkRuntimeException { + return false; + } + } +} diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/TestEmitter.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/TestEmitter.java new file mode 100644 index 0000000..7ce28ad --- /dev/null +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/TestEmitter.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.sink; + +import org.apache.flink.api.connector.sink2.SinkWriter; +import org.apache.flink.api.java.tuple.Tuple2; + +import org.opensearch.action.index.IndexRequest; +import org.opensearch.common.xcontent.XContentBuilder; +import org.opensearch.common.xcontent.XContentFactory; + +import java.io.IOException; +import java.io.Serializable; +import java.util.HashMap; +import java.util.Map; + +class TestEmitter implements OpensearchEmitter> { + + private final String index; + private final XContentBuilderProvider xContentBuilderProvider; + private final String dataFieldName; + + public static TestEmitter jsonEmitter(String index, String dataFieldName) { + return new TestEmitter(index, dataFieldName, XContentFactory::jsonBuilder); + } + + public static TestEmitter smileEmitter(String index, String dataFieldName) { + return new TestEmitter(index, dataFieldName, XContentFactory::smileBuilder); + } + + private TestEmitter( + String index, String dataFieldName, XContentBuilderProvider xContentBuilderProvider) { + this.dataFieldName = dataFieldName; + this.index = index; + this.xContentBuilderProvider = xContentBuilderProvider; + } + + @Override + public void emit( + Tuple2 element, SinkWriter.Context context, RequestIndexer indexer) { + indexer.add(createIndexRequest(element)); + } + + public IndexRequest createIndexRequest(Tuple2 element) { + Map document = new HashMap<>(); + document.put(dataFieldName, element.f1); + try { + return new IndexRequest(index) + .id(element.f0.toString()) + .source(xContentBuilderProvider.getBuilder().map(document)); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @FunctionalInterface + private interface XContentBuilderProvider extends Serializable { + XContentBuilder getBuilder() throws IOException; + } +} diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/IndexGeneratorTest.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/IndexGeneratorTest.java new file mode 100644 index 0000000..fe61398 --- /dev/null +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/IndexGeneratorTest.java @@ -0,0 +1,264 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.table; + +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.api.TableException; +import org.apache.flink.table.data.GenericRowData; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.StringData; +import org.apache.flink.table.data.TimestampData; +import org.apache.flink.table.types.DataType; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.sql.Date; +import java.sql.Time; +import java.sql.Timestamp; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.temporal.UnsupportedTemporalTypeException; +import java.util.Arrays; +import java.util.List; + +/** Suite tests for {@link IndexGenerator}. */ +class IndexGeneratorTest { + + private static final List fieldNames = + Arrays.asList( + "id", + "item", + "log_ts", + "log_date", + "order_timestamp", + "log_time", + "local_datetime", + "local_date", + "local_time", + "note", + "status"); + + private static final List dataTypes = + Arrays.asList( + DataTypes.INT(), + DataTypes.STRING(), + DataTypes.BIGINT(), + DataTypes.DATE().bridgedTo(Date.class), + DataTypes.TIMESTAMP().bridgedTo(Timestamp.class), + DataTypes.TIME().bridgedTo(Time.class), + DataTypes.TIMESTAMP().bridgedTo(LocalDateTime.class), + DataTypes.DATE().bridgedTo(LocalDate.class), + DataTypes.TIME().bridgedTo(LocalTime.class), + DataTypes.STRING(), + DataTypes.BOOLEAN()); + + private static final List rows = + Arrays.asList( + GenericRowData.of( + 1, + StringData.fromString("apple"), + Timestamp.valueOf("2020-03-18 12:12:14").getTime(), + (int) Date.valueOf("2020-03-18").toLocalDate().toEpochDay(), + TimestampData.fromTimestamp(Timestamp.valueOf("2020-03-18 12:12:14")), + (int) + (Time.valueOf("12:12:14").toLocalTime().toNanoOfDay() + / 1_000_000L), + TimestampData.fromLocalDateTime( + LocalDateTime.of(2020, 3, 18, 12, 12, 14, 1000)), + (int) LocalDate.of(2020, 3, 18).toEpochDay(), + (int) (LocalTime.of(12, 13, 14, 2000).toNanoOfDay() / 1_000_000L), + "test1", + true), + GenericRowData.of( + 2, + StringData.fromString("peanut"), + Timestamp.valueOf("2020-03-19 12:22:14").getTime(), + (int) Date.valueOf("2020-03-19").toLocalDate().toEpochDay(), + TimestampData.fromTimestamp(Timestamp.valueOf("2020-03-19 12:22:21")), + (int) + (Time.valueOf("12:22:21").toLocalTime().toNanoOfDay() + / 1_000_000L), + TimestampData.fromLocalDateTime( + LocalDateTime.of(2020, 3, 19, 12, 22, 14, 1000)), + (int) LocalDate.of(2020, 3, 19).toEpochDay(), + (int) (LocalTime.of(12, 13, 14, 2000).toNanoOfDay() / 1_000_000L), + "test2", + false)); + + @Test + public void testDynamicIndexFromTimestamp() { + IndexGenerator indexGenerator = + IndexGeneratorFactory.createIndexGenerator( + "{order_timestamp|yyyy_MM_dd_HH-ss}_index", fieldNames, dataTypes); + indexGenerator.open(); + Assertions.assertEquals("2020_03_18_12-14_index", indexGenerator.generate(rows.get(0))); + IndexGenerator indexGenerator1 = + IndexGeneratorFactory.createIndexGenerator( + "{order_timestamp|yyyy_MM_dd_HH_mm}_index", fieldNames, dataTypes); + indexGenerator1.open(); + Assertions.assertEquals("2020_03_19_12_22_index", indexGenerator1.generate(rows.get(1))); + } + + @Test + public void testDynamicIndexFromLocalDateTime() { + IndexGenerator indexGenerator = + IndexGeneratorFactory.createIndexGenerator( + "{local_datetime|yyyy_MM_dd_HH-ss}_index", fieldNames, dataTypes); + indexGenerator.open(); + Assertions.assertEquals("2020_03_18_12-14_index", indexGenerator.generate(rows.get(0))); + IndexGenerator indexGenerator1 = + IndexGeneratorFactory.createIndexGenerator( + "{local_datetime|yyyy_MM_dd_HH_mm}_index", fieldNames, dataTypes); + indexGenerator1.open(); + Assertions.assertEquals("2020_03_19_12_22_index", indexGenerator1.generate(rows.get(1))); + } + + @Test + public void testDynamicIndexFromDate() { + IndexGenerator indexGenerator = + IndexGeneratorFactory.createIndexGenerator( + "my-index-{log_date|yyyy/MM/dd}", fieldNames, dataTypes); + indexGenerator.open(); + Assertions.assertEquals("my-index-2020/03/18", indexGenerator.generate(rows.get(0))); + Assertions.assertEquals("my-index-2020/03/19", indexGenerator.generate(rows.get(1))); + } + + @Test + public void testDynamicIndexFromLocalDate() { + IndexGenerator indexGenerator = + IndexGeneratorFactory.createIndexGenerator( + "my-index-{local_date|yyyy/MM/dd}", fieldNames, dataTypes); + indexGenerator.open(); + Assertions.assertEquals("my-index-2020/03/18", indexGenerator.generate(rows.get(0))); + Assertions.assertEquals("my-index-2020/03/19", indexGenerator.generate(rows.get(1))); + } + + @Test + public void testDynamicIndexFromTime() { + IndexGenerator indexGenerator = + IndexGeneratorFactory.createIndexGenerator( + "my-index-{log_time|HH-mm}", fieldNames, dataTypes); + indexGenerator.open(); + Assertions.assertEquals("my-index-12-12", indexGenerator.generate(rows.get(0))); + Assertions.assertEquals("my-index-12-22", indexGenerator.generate(rows.get(1))); + } + + @Test + public void testDynamicIndexFromLocalTime() { + IndexGenerator indexGenerator = + IndexGeneratorFactory.createIndexGenerator( + "my-index-{local_time|HH-mm}", fieldNames, dataTypes); + indexGenerator.open(); + Assertions.assertEquals("my-index-12-13", indexGenerator.generate(rows.get(0))); + Assertions.assertEquals("my-index-12-13", indexGenerator.generate(rows.get(1))); + } + + @Test + public void testDynamicIndexDefaultFormat() { + IndexGenerator indexGenerator = + IndexGeneratorFactory.createIndexGenerator( + "my-index-{local_time|}", fieldNames, dataTypes); + indexGenerator.open(); + Assertions.assertEquals("my-index-12_13_14", indexGenerator.generate(rows.get(0))); + Assertions.assertEquals("my-index-12_13_14", indexGenerator.generate(rows.get(1))); + } + + @Test + public void testGeneralDynamicIndex() { + IndexGenerator indexGenerator = + IndexGeneratorFactory.createIndexGenerator("index_{item}", fieldNames, dataTypes); + indexGenerator.open(); + Assertions.assertEquals("index_apple", indexGenerator.generate(rows.get(0))); + Assertions.assertEquals("index_peanut", indexGenerator.generate(rows.get(1))); + } + + @Test + public void testStaticIndex() { + IndexGenerator indexGenerator = + IndexGeneratorFactory.createIndexGenerator("my-index", fieldNames, dataTypes); + indexGenerator.open(); + Assertions.assertEquals("my-index", indexGenerator.generate(rows.get(0))); + Assertions.assertEquals("my-index", indexGenerator.generate(rows.get(1))); + } + + @Test + public void testUnknownField() { + String expectedExceptionMsg = + "Unknown field 'unknown_ts' in index pattern 'my-index-{unknown_ts|yyyy-MM-dd}'," + + " please check the field name."; + try { + IndexGeneratorFactory.createIndexGenerator( + "my-index-{unknown_ts|yyyy-MM-dd}", fieldNames, dataTypes); + } catch (TableException e) { + Assertions.assertEquals(e.getMessage(), expectedExceptionMsg); + } + } + + @Test + public void testUnsupportedTimeType() { + String expectedExceptionMsg = + "Unsupported type 'INT' found in Opensearch dynamic index field, " + + "time-related pattern only support types are: DATE,TIME,TIMESTAMP."; + try { + IndexGeneratorFactory.createIndexGenerator( + "my-index-{id|yyyy-MM-dd}", fieldNames, dataTypes); + } catch (TableException e) { + Assertions.assertEquals(expectedExceptionMsg, e.getMessage()); + } + } + + @Test + public void testUnsupportedMultiParametersType() { + String expectedExceptionMsg = + "Chaining dynamic index pattern my-index-{local_date}-{local_time} is not supported," + + " only support single dynamic index pattern."; + try { + IndexGeneratorFactory.createIndexGenerator( + "my-index-{local_date}-{local_time}", fieldNames, dataTypes); + } catch (TableException e) { + Assertions.assertEquals(expectedExceptionMsg, e.getMessage()); + } + } + + @Test + public void testDynamicIndexUnsupportedFormat() { + String expectedExceptionMsg = "Unsupported field: HourOfDay"; + try { + IndexGeneratorFactory.createIndexGenerator( + "my-index-{local_date|yyyy/MM/dd HH:mm}", fieldNames, dataTypes); + } catch (UnsupportedTemporalTypeException e) { + Assertions.assertEquals(expectedExceptionMsg, e.getMessage()); + } + } + + @Test + public void testUnsupportedIndexFieldType() { + String expectedExceptionMsg = + "Unsupported type BOOLEAN of index field, Supported types are:" + + " [DATE, TIME_WITHOUT_TIME_ZONE, TIMESTAMP_WITHOUT_TIME_ZONE, TIMESTAMP_WITH_TIME_ZONE," + + " TIMESTAMP_WITH_LOCAL_TIME_ZONE, VARCHAR, CHAR, TINYINT, INTEGER, BIGINT]"; + try { + IndexGeneratorFactory.createIndexGenerator("index_{status}", fieldNames, dataTypes); + } catch (IllegalArgumentException e) { + Assertions.assertEquals(expectedExceptionMsg, e.getMessage()); + } + } +} diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/KeyExtractorTest.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/KeyExtractorTest.java new file mode 100644 index 0000000..537fc72 --- /dev/null +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/KeyExtractorTest.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.table; + +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.data.GenericRowData; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.StringData; +import org.apache.flink.table.data.TimestampData; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.util.Collections; +import java.util.List; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** Tests for {@link KeyExtractor}. */ +class KeyExtractorTest { + @Test + public void testSimpleKey() { + List logicalTypesWithIndex = + Stream.of( + new LogicalTypeWithIndex( + 0, DataTypes.BIGINT().notNull().getLogicalType())) + .collect(Collectors.toList()); + + Function keyExtractor = + KeyExtractor.createKeyExtractor(logicalTypesWithIndex, "_"); + + String key = keyExtractor.apply(GenericRowData.of(12L, StringData.fromString("ABCD"))); + Assertions.assertEquals(key, "12"); + } + + @Test + public void testNoPrimaryKey() { + List logicalTypesWithIndex = Collections.emptyList(); + + Function keyExtractor = + KeyExtractor.createKeyExtractor(logicalTypesWithIndex, "_"); + + String key = keyExtractor.apply(GenericRowData.of(12L, StringData.fromString("ABCD"))); + Assertions.assertEquals(key, null); + } + + @Test + public void testTwoFieldsKey() { + List logicalTypesWithIndex = + Stream.of( + new LogicalTypeWithIndex( + 0, DataTypes.BIGINT().notNull().getLogicalType()), + new LogicalTypeWithIndex( + 2, DataTypes.TIMESTAMP().notNull().getLogicalType())) + .collect(Collectors.toList()); + + Function keyExtractor = + KeyExtractor.createKeyExtractor(logicalTypesWithIndex, "_"); + + String key = + keyExtractor.apply( + GenericRowData.of( + 12L, + StringData.fromString("ABCD"), + TimestampData.fromLocalDateTime( + LocalDateTime.parse("2012-12-12T12:12:12")))); + Assertions.assertEquals(key, "12_2012-12-12T12:12:12"); + } + + @Test + public void testAllTypesKey() { + List logicalTypesWithIndex = + Stream.of( + new LogicalTypeWithIndex( + 0, DataTypes.TINYINT().notNull().getLogicalType()), + new LogicalTypeWithIndex( + 1, DataTypes.SMALLINT().notNull().getLogicalType()), + new LogicalTypeWithIndex( + 2, DataTypes.INT().notNull().getLogicalType()), + new LogicalTypeWithIndex( + 3, DataTypes.BIGINT().notNull().getLogicalType()), + new LogicalTypeWithIndex( + 4, DataTypes.BOOLEAN().notNull().getLogicalType()), + new LogicalTypeWithIndex( + 5, DataTypes.FLOAT().notNull().getLogicalType()), + new LogicalTypeWithIndex( + 6, DataTypes.DOUBLE().notNull().getLogicalType()), + new LogicalTypeWithIndex( + 7, DataTypes.STRING().notNull().getLogicalType()), + new LogicalTypeWithIndex( + 8, DataTypes.TIMESTAMP().notNull().getLogicalType()), + new LogicalTypeWithIndex( + 9, + DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE() + .notNull() + .getLogicalType()), + new LogicalTypeWithIndex( + 10, DataTypes.TIME().notNull().getLogicalType()), + new LogicalTypeWithIndex( + 11, DataTypes.DATE().notNull().getLogicalType())) + .collect(Collectors.toList()); + + Function keyExtractor = + KeyExtractor.createKeyExtractor(logicalTypesWithIndex, "_"); + + String key = + keyExtractor.apply( + GenericRowData.of( + (byte) 1, + (short) 2, + 3, + (long) 4, + true, + 1.0f, + 2.0d, + StringData.fromString("ABCD"), + TimestampData.fromLocalDateTime( + LocalDateTime.parse("2012-12-12T12:12:12")), + TimestampData.fromInstant(Instant.parse("2013-01-13T13:13:13Z")), + (int) (LocalTime.parse("14:14:14").toNanoOfDay() / 1_000_000), + (int) LocalDate.parse("2015-05-15").toEpochDay())); + Assertions.assertEquals( + key, + "1_2_3_4_true_1.0_2.0_ABCD_2012-12-12T12:12:12_2013-01-13T13:13:13_14:14:14_2015-05-15"); + } +} diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkFactoryTest.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkFactoryTest.java new file mode 100644 index 0000000..f7f1430 --- /dev/null +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkFactoryTest.java @@ -0,0 +1,267 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.table; + +import org.apache.flink.api.common.typeutils.base.VoidSerializer; +import org.apache.flink.connector.opensearch.OpensearchUtil; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.api.ValidationException; +import org.apache.flink.table.catalog.Column; +import org.apache.flink.table.catalog.ResolvedSchema; +import org.apache.flink.table.catalog.UniqueConstraint; +import org.apache.flink.table.connector.sink.DynamicTableSink; +import org.apache.flink.table.connector.sink.SinkV2Provider; +import org.apache.flink.util.TestLoggerExtension; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.function.Executable; + +import java.util.Arrays; +import java.util.Collections; + +import static org.apache.flink.table.factories.FactoryUtil.SINK_PARALLELISM; +import static org.assertj.core.api.Assertions.assertThat; + +/** Tests for validation in {@link OpensearchDynamicSinkFactory}. */ +@ExtendWith(TestLoggerExtension.class) +class OpensearchDynamicSinkFactoryTest { + private TestContext createPrefilledTestContext() { + return TestContext.context() + .withOption(OpensearchConnectorOptions.INDEX_OPTION.key(), "MyIndex") + .withOption( + OpensearchConnectorOptions.HOSTS_OPTION.key(), "http://localhost:12345"); + } + + @Test + public void validateEmptyConfiguration() { + OpensearchDynamicSinkFactory sinkFactory = new OpensearchDynamicSinkFactory(); + + assertValidationException( + "One or more required options are missing.\n" + + "\n" + + "Missing required options are:\n" + + "\n" + + "hosts\n" + + "index", + () -> sinkFactory.createDynamicTableSink(TestContext.context().build())); + } + + void assertValidationException(String expectedMessage, Executable executable) { + ValidationException thrown = Assertions.assertThrows(ValidationException.class, executable); + Assertions.assertEquals(expectedMessage, thrown.getMessage()); + } + + @Test + public void validateWrongIndex() { + OpensearchDynamicSinkFactory sinkFactory = new OpensearchDynamicSinkFactory(); + assertValidationException( + "'index' must not be empty", + () -> + sinkFactory.createDynamicTableSink( + createPrefilledTestContext() + .withOption( + OpensearchConnectorOptions.INDEX_OPTION.key(), "") + .build())); + } + + @Test + public void validateWrongHosts() { + OpensearchDynamicSinkFactory sinkFactory = new OpensearchDynamicSinkFactory(); + assertValidationException( + "Could not parse host 'wrong-host' in option 'hosts'. It should follow the format 'http://host_name:port'.", + () -> + sinkFactory.createDynamicTableSink( + createPrefilledTestContext() + .withOption( + OpensearchConnectorOptions.HOSTS_OPTION.key(), + "wrong-host") + .build())); + } + + @Test + public void validateWrongFlushSize() { + OpensearchDynamicSinkFactory sinkFactory = new OpensearchDynamicSinkFactory(); + assertValidationException( + "'sink.bulk-flush.max-size' must be in MB granularity. Got: 1024 bytes", + () -> + sinkFactory.createDynamicTableSink( + createPrefilledTestContext() + .withOption( + OpensearchConnectorOptions + .BULK_FLUSH_MAX_SIZE_OPTION + .key(), + "1kb") + .build())); + } + + @Test + public void validateWrongRetries() { + OpensearchDynamicSinkFactory sinkFactory = new OpensearchDynamicSinkFactory(); + + assertValidationException( + "'sink.bulk-flush.backoff.max-retries' must be at least 1. Got: 0", + () -> + sinkFactory.createDynamicTableSink( + createPrefilledTestContext() + .withOption( + OpensearchConnectorOptions + .BULK_FLUSH_BACKOFF_MAX_RETRIES_OPTION + .key(), + "0") + .build())); + } + + @Test + public void validateWrongMaxActions() { + OpensearchDynamicSinkFactory sinkFactory = new OpensearchDynamicSinkFactory(); + + assertValidationException( + "'sink.bulk-flush.max-actions' must be at least 1. Got: -2", + () -> + sinkFactory.createDynamicTableSink( + createPrefilledTestContext() + .withOption( + OpensearchConnectorOptions + .BULK_FLUSH_MAX_ACTIONS_OPTION + .key(), + "-2") + .build())); + } + + @Test + public void validateWrongBackoffDelay() { + OpensearchDynamicSinkFactory sinkFactory = new OpensearchDynamicSinkFactory(); + + assertValidationException( + "Invalid value for option 'sink.bulk-flush.backoff.delay'.", + () -> + sinkFactory.createDynamicTableSink( + createPrefilledTestContext() + .withOption( + OpensearchConnectorOptions + .BULK_FLUSH_BACKOFF_DELAY_OPTION + .key(), + "-1s") + .build())); + } + + @Test + public void validatePrimaryKeyOnIllegalColumn() { + OpensearchDynamicSinkFactory sinkFactory = new OpensearchDynamicSinkFactory(); + + assertValidationException( + "The table has a primary key on columns of illegal types: " + + "[ARRAY, MAP, MULTISET, ROW, RAW, VARBINARY].", + () -> + sinkFactory.createDynamicTableSink( + createPrefilledTestContext() + .withSchema( + new ResolvedSchema( + Arrays.asList( + Column.physical( + "a", + DataTypes.BIGINT() + .notNull()), + Column.physical( + "b", + DataTypes.ARRAY( + DataTypes + .BIGINT() + .notNull()) + .notNull()), + Column.physical( + "c", + DataTypes.MAP( + DataTypes + .BIGINT(), + DataTypes + .STRING()) + .notNull()), + Column.physical( + "d", + DataTypes.MULTISET( + DataTypes + .BIGINT() + .notNull()) + .notNull()), + Column.physical( + "e", + DataTypes.ROW( + DataTypes + .FIELD( + "a", + DataTypes + .BIGINT())) + .notNull()), + Column.physical( + "f", + DataTypes.RAW( + Void.class, + VoidSerializer + .INSTANCE) + .notNull()), + Column.physical( + "g", + DataTypes.BYTES() + .notNull())), + Collections.emptyList(), + UniqueConstraint.primaryKey( + "name", + Arrays.asList( + "a", "b", "c", "d", "e", + "f", "g")))) + .build())); + } + + @Test + public void validateWrongCredential() { + OpensearchDynamicSinkFactory sinkFactory = new OpensearchDynamicSinkFactory(); + + assertValidationException( + "'username' and 'password' must be set at the same time. Got: username 'username' and password ''", + () -> + sinkFactory.createDynamicTableSink( + createPrefilledTestContext() + .withOption( + OpensearchConnectorOptions.USERNAME_OPTION.key(), + "username") + .withOption( + OpensearchConnectorOptions.PASSWORD_OPTION.key(), + "") + .build())); + } + + @Test + public void testSinkParallelism() { + OpensearchDynamicSinkFactory sinkFactory = new OpensearchDynamicSinkFactory(); + DynamicTableSink sink = + sinkFactory.createDynamicTableSink( + createPrefilledTestContext() + .withOption(SINK_PARALLELISM.key(), "2") + .build()); + assertThat(sink).isInstanceOf(OpensearchDynamicSink.class); + OpensearchDynamicSink opensearchSink = (OpensearchDynamicSink) sink; + SinkV2Provider provider = + (SinkV2Provider) + opensearchSink.getSinkRuntimeProvider(new OpensearchUtil.MockContext()); + assertThat(2).isEqualTo(provider.getParallelism().get()); + } +} diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkITCase.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkITCase.java new file mode 100644 index 0000000..56bec56 --- /dev/null +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkITCase.java @@ -0,0 +1,334 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.table; + +import org.apache.flink.api.common.time.Deadline; +import org.apache.flink.api.connector.sink2.Sink; +import org.apache.flink.connector.opensearch.OpensearchUtil; +import org.apache.flink.connector.opensearch.test.DockerImageVersions; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.api.EnvironmentSettings; +import org.apache.flink.table.api.TableEnvironment; +import org.apache.flink.table.catalog.Column; +import org.apache.flink.table.catalog.ResolvedSchema; +import org.apache.flink.table.catalog.UniqueConstraint; +import org.apache.flink.table.connector.sink.DynamicTableSink; +import org.apache.flink.table.connector.sink.SinkV2Provider; +import org.apache.flink.table.data.GenericRowData; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.StringData; +import org.apache.flink.table.data.TimestampData; +import org.apache.flink.types.RowKind; +import org.apache.flink.util.TestLoggerExtension; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.opensearch.action.get.GetRequest; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.search.SearchHits; +import org.opensearch.testcontainers.OpensearchContainer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; + +import java.time.Duration; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; + +import static org.apache.flink.table.api.Expressions.row; + +/** IT tests for {@link OpensearchDynamicSink}. */ +@ExtendWith(TestLoggerExtension.class) +@Testcontainers +class OpensearchDynamicSinkITCase { + private static final Logger LOG = LoggerFactory.getLogger(OpensearchDynamicSinkITCase.class); + + @Container + private static final OpensearchContainer OS_CONTAINER = + OpensearchUtil.createOpensearchContainer(DockerImageVersions.OPENSEARCH_1, LOG); + + private TestContext getPrefilledTestContext(String index) { + return TestContext.context() + .withOption(OpensearchConnectorOptions.INDEX_OPTION.key(), index) + .withOption( + OpensearchConnectorOptions.HOSTS_OPTION.key(), + OS_CONTAINER.getHttpHostAddress()) + .withOption(OpensearchConnectorOptions.ALLOW_INSECURE.key(), "true") + .withOption( + OpensearchConnectorOptions.USERNAME_OPTION.key(), + OS_CONTAINER.getUsername()) + .withOption( + OpensearchConnectorOptions.PASSWORD_OPTION.key(), + OS_CONTAINER.getPassword()); + } + + private String getConnectorSql(String index) { + return String.format("'%s'='%s',\n", "connector", "opensearch") + + String.format( + "'%s'='%s',\n", OpensearchConnectorOptions.INDEX_OPTION.key(), index) + + String.format( + "'%s'='%s', \n", + OpensearchConnectorOptions.HOSTS_OPTION.key(), + OS_CONTAINER.getHttpHostAddress()) + + String.format( + "'%s'='%s', \n", + OpensearchConnectorOptions.USERNAME_OPTION.key(), + OS_CONTAINER.getUsername()) + + String.format( + "'%s'='%s', \n", + OpensearchConnectorOptions.PASSWORD_OPTION.key(), + OS_CONTAINER.getPassword()) + + String.format( + "'%s'='%s'\n", OpensearchConnectorOptions.ALLOW_INSECURE.key(), true); + } + + @Test + public void testWritingDocuments() throws Exception { + ResolvedSchema schema = + new ResolvedSchema( + Arrays.asList( + Column.physical("a", DataTypes.BIGINT().notNull()), + Column.physical("b", DataTypes.TIME()), + Column.physical("c", DataTypes.STRING().notNull()), + Column.physical("d", DataTypes.FLOAT()), + Column.physical("e", DataTypes.TINYINT().notNull()), + Column.physical("f", DataTypes.DATE()), + Column.physical("g", DataTypes.TIMESTAMP().notNull())), + Collections.emptyList(), + UniqueConstraint.primaryKey("name", Arrays.asList("a", "g"))); + GenericRowData rowData = + GenericRowData.of( + 1L, + 12345, + StringData.fromString("ABCDE"), + 12.12f, + (byte) 2, + 12345, + TimestampData.fromLocalDateTime( + LocalDateTime.parse("2012-12-12T12:12:12"))); + + String index = "writing-documents"; + OpensearchDynamicSinkFactory sinkFactory = new OpensearchDynamicSinkFactory(); + + DynamicTableSink.SinkRuntimeProvider runtimeProvider = + sinkFactory + .createDynamicTableSink( + getPrefilledTestContext(index).withSchema(schema).build()) + .getSinkRuntimeProvider(new OpensearchUtil.MockContext()); + + final SinkV2Provider sinkProvider = (SinkV2Provider) runtimeProvider; + final Sink sink = sinkProvider.createSink(); + StreamExecutionEnvironment environment = + StreamExecutionEnvironment.getExecutionEnvironment(); + environment.setParallelism(4); + + rowData.setRowKind(RowKind.UPDATE_AFTER); + environment.fromElements(rowData).sinkTo(sink); + environment.execute(); + + RestHighLevelClient client = OpensearchUtil.createClient(OS_CONTAINER); + Map response = + client.get(new GetRequest(index, "1_2012-12-12T12:12:12"), RequestOptions.DEFAULT) + .getSource(); + Map expectedMap = new HashMap<>(); + expectedMap.put("a", 1); + expectedMap.put("b", "00:00:12"); + expectedMap.put("c", "ABCDE"); + expectedMap.put("d", 12.12d); + expectedMap.put("e", 2); + expectedMap.put("f", "2003-10-20"); + expectedMap.put("g", "2012-12-12 12:12:12"); + Assertions.assertEquals(response, expectedMap); + } + + @Test + public void testWritingDocumentsFromTableApi() throws Exception { + TableEnvironment tableEnvironment = + TableEnvironment.create(EnvironmentSettings.inStreamingMode()); + + String index = "table-api"; + tableEnvironment.executeSql( + "CREATE TABLE osTable (" + + "a BIGINT NOT NULL,\n" + + "b TIME,\n" + + "c STRING NOT NULL,\n" + + "d FLOAT,\n" + + "e TINYINT NOT NULL,\n" + + "f DATE,\n" + + "g TIMESTAMP NOT NULL,\n" + + "h as a + 2,\n" + + "PRIMARY KEY (a, g) NOT ENFORCED\n" + + ")\n" + + "WITH (\n" + + getConnectorSql(index) + + ")"); + + tableEnvironment + .fromValues( + row( + 1L, + LocalTime.ofNanoOfDay(12345L * 1_000_000L), + "ABCDE", + 12.12f, + (byte) 2, + LocalDate.ofEpochDay(12345), + LocalDateTime.parse("2012-12-12T12:12:12"))) + .executeInsert("osTable") + .await(); + + RestHighLevelClient client = OpensearchUtil.createClient(OS_CONTAINER); + Map response = + client.get(new GetRequest(index, "1_2012-12-12T12:12:12"), RequestOptions.DEFAULT) + .getSource(); + Map expectedMap = new HashMap<>(); + expectedMap.put("a", 1); + expectedMap.put("b", "00:00:12"); + expectedMap.put("c", "ABCDE"); + expectedMap.put("d", 12.12d); + expectedMap.put("e", 2); + expectedMap.put("f", "2003-10-20"); + expectedMap.put("g", "2012-12-12 12:12:12"); + Assertions.assertEquals(response, expectedMap); + } + + @Test + public void testWritingDocumentsNoPrimaryKey() throws Exception { + TableEnvironment tableEnvironment = + TableEnvironment.create(EnvironmentSettings.inStreamingMode()); + + String index = "no-primary-key"; + tableEnvironment.executeSql( + "CREATE TABLE osTable (" + + "a BIGINT NOT NULL,\n" + + "b TIME,\n" + + "c STRING NOT NULL,\n" + + "d FLOAT,\n" + + "e TINYINT NOT NULL,\n" + + "f DATE,\n" + + "g TIMESTAMP NOT NULL\n" + + ")\n" + + "WITH (\n" + + getConnectorSql(index) + + ")"); + + tableEnvironment + .fromValues( + row( + 1L, + LocalTime.ofNanoOfDay(12345L * 1_000_000L), + "ABCDE", + 12.12f, + (byte) 2, + LocalDate.ofEpochDay(12345), + LocalDateTime.parse("2012-12-12T12:12:12")), + row( + 2L, + LocalTime.ofNanoOfDay(12345L * 1_000_000L), + "FGHIJK", + 13.13f, + (byte) 4, + LocalDate.ofEpochDay(12345), + LocalDateTime.parse("2013-12-12T13:13:13"))) + .executeInsert("osTable") + .await(); + + RestHighLevelClient client = OpensearchUtil.createClient(OS_CONTAINER); + + // search API does not return documents that were not indexed, we might need to query + // the index a few times + Deadline deadline = Deadline.fromNow(Duration.ofSeconds(30)); + SearchHits hits; + do { + hits = client.search(new SearchRequest(index), RequestOptions.DEFAULT).getHits(); + if (hits.getTotalHits().value < 2) { + Thread.sleep(200); + } + } while (hits.getTotalHits().value < 2 && deadline.hasTimeLeft()); + + if (hits.getTotalHits().value < 2) { + throw new AssertionError("Could not retrieve results from Opensearch."); + } + + HashSet> resultSet = new HashSet<>(); + resultSet.add(hits.getAt(0).getSourceAsMap()); + resultSet.add(hits.getAt(1).getSourceAsMap()); + Map expectedMap1 = new HashMap<>(); + expectedMap1.put("a", 1); + expectedMap1.put("b", "00:00:12"); + expectedMap1.put("c", "ABCDE"); + expectedMap1.put("d", 12.12d); + expectedMap1.put("e", 2); + expectedMap1.put("f", "2003-10-20"); + expectedMap1.put("g", "2012-12-12 12:12:12"); + Map expectedMap2 = new HashMap<>(); + expectedMap2.put("a", 2); + expectedMap2.put("b", "00:00:12"); + expectedMap2.put("c", "FGHIJK"); + expectedMap2.put("d", 13.13d); + expectedMap2.put("e", 4); + expectedMap2.put("f", "2003-10-20"); + expectedMap2.put("g", "2013-12-12 13:13:13"); + HashSet> expectedSet = new HashSet<>(); + expectedSet.add(expectedMap1); + expectedSet.add(expectedMap2); + Assertions.assertEquals(resultSet, expectedSet); + } + + @Test + public void testWritingDocumentsWithDynamicIndex() throws Exception { + TableEnvironment tableEnvironment = + TableEnvironment.create(EnvironmentSettings.inStreamingMode()); + + String index = "dynamic-index-{b|yyyy-MM-dd}"; + tableEnvironment.executeSql( + "CREATE TABLE osTable (" + + "a BIGINT NOT NULL,\n" + + "b TIMESTAMP NOT NULL,\n" + + "PRIMARY KEY (a) NOT ENFORCED\n" + + ")\n" + + "WITH (\n" + + getConnectorSql(index) + + ")"); + + tableEnvironment + .fromValues(row(1L, LocalDateTime.parse("2012-12-12T12:12:12"))) + .executeInsert("osTable") + .await(); + + RestHighLevelClient client = OpensearchUtil.createClient(OS_CONTAINER); + Map response = + client.get(new GetRequest("dynamic-index-2012-12-12", "1"), RequestOptions.DEFAULT) + .getSource(); + Map expectedMap = new HashMap<>(); + expectedMap.put("a", 1); + expectedMap.put("b", "2012-12-12 12:12:12"); + Assertions.assertEquals(response, expectedMap); + } +} diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/TestContext.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/TestContext.java new file mode 100644 index 0000000..50da696 --- /dev/null +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/TestContext.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.table; + +import org.apache.flink.configuration.Configuration; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.api.Schema; +import org.apache.flink.table.catalog.CatalogTable; +import org.apache.flink.table.catalog.Column; +import org.apache.flink.table.catalog.ObjectIdentifier; +import org.apache.flink.table.catalog.ResolvedCatalogTable; +import org.apache.flink.table.catalog.ResolvedSchema; +import org.apache.flink.table.factories.DynamicTableFactory; +import org.apache.flink.table.factories.FactoryUtil; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** A utility class for mocking {@link DynamicTableFactory.Context}. */ +class TestContext { + + private ResolvedSchema schema = ResolvedSchema.of(Column.physical("a", DataTypes.TIME())); + + private final Map options = new HashMap<>(); + + public static TestContext context() { + return new TestContext(); + } + + public TestContext withSchema(ResolvedSchema schema) { + this.schema = schema; + return this; + } + + DynamicTableFactory.Context build() { + return new FactoryUtil.DefaultDynamicTableContext( + ObjectIdentifier.of("default", "default", "t1"), + new ResolvedCatalogTable( + CatalogTable.of( + Schema.newBuilder().fromResolvedSchema(schema).build(), + "mock context", + Collections.emptyList(), + options), + schema), + Collections.emptyMap(), + new Configuration(), + TestContext.class.getClassLoader(), + false); + } + + public TestContext withOption(String key, String value) { + options.put(key, value); + return this; + } +} diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/test/DockerImageVersions.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/test/DockerImageVersions.java new file mode 100644 index 0000000..2087367 --- /dev/null +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/test/DockerImageVersions.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch.test; + +/** + * Utility class for defining the image names and versions of Docker containers used during the + * integration tests. + */ +public class DockerImageVersions { + public static final String OPENSEARCH_1 = "opensearchproject/opensearch:1.2.4"; +} diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkITCase.java b/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkITCase.java new file mode 100644 index 0000000..e7378c3 --- /dev/null +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkITCase.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.opensearch; + +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.connector.opensearch.OpensearchUtil; +import org.apache.flink.connector.opensearch.test.DockerImageVersions; +import org.apache.flink.runtime.JobException; +import org.apache.flink.runtime.client.JobExecutionException; +import org.apache.flink.streaming.api.datastream.DataStreamSource; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.streaming.connectors.opensearch.testutils.SourceSinkDataTestKit; +import org.apache.flink.test.util.AbstractTestBase; + +import org.apache.http.HttpHost; +import org.junit.ClassRule; +import org.junit.Test; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.testcontainers.OpensearchContainer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.function.Function; + +import static org.hamcrest.CoreMatchers.instanceOf; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.junit.Assert.fail; + +/** IT cases for the {@link OpensearchSink}. */ +public class OpensearchSinkITCase extends AbstractTestBase { + + private static final Logger LOG = LoggerFactory.getLogger(OpensearchSinkITCase.class); + + @ClassRule + public static final OpensearchContainer OS_CONTAINER = + OpensearchUtil.createOpensearchContainer(DockerImageVersions.OPENSEARCH_1, LOG); + + @Test + public void testOpensearchSink() throws Exception { + runOpensearchSinkTest( + "opensearch-sink-test-json-index", SourceSinkDataTestKit::getJsonSinkFunction); + } + + @Test + public void testOpensearchSinkWithSmile() throws Exception { + runOpensearchSinkTest( + "opensearch-sink-test-smile-index", SourceSinkDataTestKit::getSmileSinkFunction); + } + + @Test + public void testNullAddresses() { + try { + createOpensearchSink( + 1, getClusterName(), null, SourceSinkDataTestKit.getJsonSinkFunction("test")); + } catch (IllegalArgumentException | NullPointerException expectedException) { + // test passes + return; + } + + fail(); + } + + @Test + public void testEmptyAddresses() { + try { + createOpensearchSink( + 1, + getClusterName(), + Collections.emptyList(), + SourceSinkDataTestKit.getJsonSinkFunction("test")); + } catch (IllegalArgumentException expectedException) { + // test passes + return; + } + + fail(); + } + + @Test + public void testInvalidOpensearchCluster() throws Exception { + final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + + DataStreamSource> source = + env.addSource(new SourceSinkDataTestKit.TestDataSourceFunction()); + + source.addSink( + createOpensearchSinkForNode( + 1, + "invalid-cluster-name", + SourceSinkDataTestKit.getJsonSinkFunction("test"), + "123.123.123.123")); // incorrect ip address + + try { + env.execute("Opensearch Sink Test"); + } catch (JobExecutionException expectedException) { + assertThat(expectedException.getCause(), instanceOf(JobException.class)); + return; + } + + fail(); + } + + private String getClusterName() { + return "docker-cluster"; + } + + private OpensearchSink> createOpensearchSink( + int bulkFlushMaxActions, + String clusterName, + List httpHosts, + OpensearchSinkFunction> opensearchSinkFunction) { + + OpensearchSink.Builder> builder = + new OpensearchSink.Builder<>(httpHosts, opensearchSinkFunction); + builder.setBulkFlushMaxActions(bulkFlushMaxActions); + + return builder.build(); + } + + private OpensearchSink> createOpensearchSinkForNode( + int bulkFlushMaxActions, + String clusterName, + OpensearchSinkFunction> opensearchSinkFunction, + String hostAddress) { + + ArrayList httpHosts = new ArrayList<>(); + httpHosts.add(HttpHost.create(hostAddress)); + + OpensearchSink.Builder> builder = + new OpensearchSink.Builder<>(httpHosts, opensearchSinkFunction); + builder.setBulkFlushMaxActions(bulkFlushMaxActions); + builder.setRestClientFactory(OpensearchUtil.createClientFactory(OS_CONTAINER)); + + return builder.build(); + } + + private void runOpensearchSinkTest( + String index, + Function>> functionFactory) + throws Exception { + final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + + DataStreamSource> source = + env.addSource(new SourceSinkDataTestKit.TestDataSourceFunction()); + + source.addSink( + createOpensearchSinkForNode( + 1, + getClusterName(), + functionFactory.apply(index), + OS_CONTAINER.getHttpHostAddress())); + + env.execute("Opensearch Sink Test"); + + // verify the results + final RestHighLevelClient client = OpensearchUtil.createClient(OS_CONTAINER); + + SourceSinkDataTestKit.verifyProducedSinkData(client, index); + + client.close(); + } +} diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/testutils/SourceSinkDataTestKit.java b/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/testutils/SourceSinkDataTestKit.java new file mode 100644 index 0000000..fe97d57 --- /dev/null +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/testutils/SourceSinkDataTestKit.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.opensearch.testutils; + +import org.apache.flink.api.common.functions.RuntimeContext; +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.streaming.api.functions.source.SourceFunction; +import org.apache.flink.streaming.connectors.opensearch.OpensearchSinkFunction; +import org.apache.flink.streaming.connectors.opensearch.RequestIndexer; + +import org.junit.Assert; +import org.opensearch.action.get.GetRequest; +import org.opensearch.action.get.GetResponse; +import org.opensearch.action.index.IndexRequest; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.common.xcontent.XContentBuilder; +import org.opensearch.common.xcontent.XContentFactory; + +import java.io.IOException; +import java.io.Serializable; +import java.util.HashMap; +import java.util.Map; + +/** + * This class contains utilities and a pre-defined source function and Opensearch Sink function used + * to simulate and verify data used in tests. + */ +public class SourceSinkDataTestKit { + + private static final int NUM_ELEMENTS = 20; + + private static final String DATA_PREFIX = "message #"; + private static final String DATA_FIELD_NAME = "data"; + + /** + * A {@link SourceFunction} that generates the elements (id, "message #" + id) with id being 0 - + * 20. + */ + public static class TestDataSourceFunction implements SourceFunction> { + private static final long serialVersionUID = 1L; + + private volatile boolean running = true; + + @Override + public void run(SourceFunction.SourceContext> ctx) + throws Exception { + for (int i = 0; i < NUM_ELEMENTS && running; i++) { + ctx.collect(Tuple2.of(i, DATA_PREFIX + i)); + } + } + + @Override + public void cancel() { + running = false; + } + } + + public static OpensearchSinkFunction> getJsonSinkFunction( + String index) { + return new TestOpensearchSinkFunction(index, XContentFactory::jsonBuilder); + } + + public static OpensearchSinkFunction> getSmileSinkFunction( + String index) { + return new TestOpensearchSinkFunction(index, XContentFactory::smileBuilder); + } + + private static class TestOpensearchSinkFunction + implements OpensearchSinkFunction> { + private static final long serialVersionUID = 1L; + + private final String index; + private final XContentBuilderProvider contentBuilderProvider; + + /** + * Create the sink function, specifying a target Opensearch index. + * + * @param index Name of the target Opensearch index. + */ + public TestOpensearchSinkFunction( + String index, XContentBuilderProvider contentBuilderProvider) { + this.index = index; + this.contentBuilderProvider = contentBuilderProvider; + } + + public IndexRequest createIndexRequest(Tuple2 element) { + Map document = new HashMap<>(); + document.put(DATA_FIELD_NAME, element.f1); + + try { + return new IndexRequest(index) + .id(element.f0.toString()) + .source(contentBuilderProvider.getBuilder().map(document)); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public void process( + Tuple2 element, RuntimeContext ctx, RequestIndexer indexer) { + indexer.add(createIndexRequest(element)); + } + } + + /** + * Verify the results in an Opensearch index. The results must first be produced into the index + * using a {@link TestOpensearchSinkFunction}; + * + * @param client The client to use to connect to Opensearch + * @param index The index to check + * @throws IOException IOException + */ + public static void verifyProducedSinkData(RestHighLevelClient client, String index) + throws IOException { + for (int i = 0; i < NUM_ELEMENTS; i++) { + GetResponse response = + client.get(new GetRequest(index, Integer.toString(i)), RequestOptions.DEFAULT); + Assert.assertEquals(DATA_PREFIX + i, response.getSource().get(DATA_FIELD_NAME)); + } + } + + @FunctionalInterface + private interface XContentBuilderProvider extends Serializable { + XContentBuilder getBuilder() throws IOException; + } +} diff --git a/flink-connector-opensearch/src/test/resources/log4j2-test.properties b/flink-connector-opensearch/src/test/resources/log4j2-test.properties new file mode 100644 index 0000000..835c2ec --- /dev/null +++ b/flink-connector-opensearch/src/test/resources/log4j2-test.properties @@ -0,0 +1,28 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +# Set root logger level to OFF to not flood build logs +# set manually to INFO for debugging purposes +rootLogger.level = OFF +rootLogger.appenderRef.test.ref = TestLogger + +appender.testlogger.name = TestLogger +appender.testlogger.type = CONSOLE +appender.testlogger.target = SYSTEM_ERR +appender.testlogger.layout.type = PatternLayout +appender.testlogger.layout.pattern = %-4r [%t] %-5p %c %x - %m%n diff --git a/flink-sql-connector-opensearch/pom.xml b/flink-sql-connector-opensearch/pom.xml new file mode 100644 index 0000000..9327977 --- /dev/null +++ b/flink-sql-connector-opensearch/pom.xml @@ -0,0 +1,159 @@ + + + + + 4.0.0 + + + org.apache.flink + flink-connector-opensearch-parent + 1.0.0-SNAPSHOT + .. + + + flink-sql-connector-opensearch + Flink : Connectors : SQL : Opensearch + + jar + + + + org.apache.flink + flink-connector-opensearch + ${project.version} + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + shade-flink + package + + shade + + + + + *:* + + + + com.tdunning:t-digest + joda-time:joda-time + net.sf.jopt-simple:jopt-simple + net.java.dev.jna:jna + org.hdrhistogram:HdrHistogram + org.yaml:snakeyaml + + + + + + org.opensearch:opensearch + + config/** + modules.txt + plugins.txt + org/joda/** + + org/opensearch/bootstrap/** + + + + org.opensearch.client:opensearch-rest-high-level-client + + forbidden/** + + + + org.apache.httpcomponents:httpclient + + mozilla/** + + + + org.apache.lucene:lucene-analyzers-common + + org/tartarus/** + + + + *:* + + + META-INF/versions/** + META-INF/services/com.fasterxml.** + META-INF/services/org.apache.lucene.** + META-INF/services/org.opensearch.** + META-INF/LICENSE.txt + + + + + + + org.apache.commons + org.apache.flink.opensearch.shaded.org.apache.commons + + + org.apache.http + org.apache.flink.opensearch.shaded.org.apache.http + + + org.apache.lucene + org.apache.flink.opensearch.shaded.org.apache.lucene + + + org.opensearch + org.apache.flink.opensearch.shaded.org.opensearch + + + com.fasterxml.jackson + org.apache.flink.opensearch.shaded.com.fasterxml.jackson + + + com.carrotsearch.hppc + org.apache.flink.opensearch.shaded.com.carrotsearch.hppc + + + com.github.mustachejava + org.apache.flink.opensearch.shaded.com.github.mustachejava + + + net.jpountz + org.apache.flink.opensearch.shaded.net.jpountz + + + + + + + + + + diff --git a/flink-sql-connector-opensearch/src/main/resources/META-INF/NOTICE b/flink-sql-connector-opensearch/src/main/resources/META-INF/NOTICE new file mode 100644 index 0000000..261cde6 --- /dev/null +++ b/flink-sql-connector-opensearch/src/main/resources/META-INF/NOTICE @@ -0,0 +1,50 @@ +flink-sql-connector-opensearch +Copyright 2014-2021 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +This project bundles the following dependencies under the Apache Software License 2.0. (http://www.apache.org/licenses/LICENSE-2.0.txt) + +- com.carrotsearch:hppc:0.8.1 +- com.fasterxml.jackson.core:jackson-annotations:2.13.4 +- com.fasterxml.jackson.core:jackson-core:2.13.4 +- com.fasterxml.jackson.dataformat:jackson-dataformat-cbor:2.13.4 +- com.fasterxml.jackson.dataformat:jackson-dataformat-smile:2.13.4 +- com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.13.4 +- com.github.spullara.mustache.java:compiler:0.9.10 +- commons-codec:commons-codec:1.15 +- commons-logging:commons-logging:1.1.3 +- org.apache.commons:commons-compress:1.21 +- org.apache.httpcomponents:httpasyncclient:4.1.4 +- org.apache.httpcomponents:httpclient:4.5.13 +- org.apache.httpcomponents:httpcore:4.4.14 +- org.apache.httpcomponents:httpcore-nio:4.4.12 +- org.apache.lucene:lucene-analyzers-common:8.10.1 +- org.apache.lucene:lucene-backward-codecs:8.10.1 +- org.apache.lucene:lucene-core:8.10.1 +- org.apache.lucene:lucene-grouping:8.10.1 +- org.apache.lucene:lucene-highlighter:8.10.1 +- org.apache.lucene:lucene-join:8.10.1 +- org.apache.lucene:lucene-memory:8.10.1 +- org.apache.lucene:lucene-misc:8.10.1 +- org.apache.lucene:lucene-queries:8.10.1 +- org.apache.lucene:lucene-queryparser:8.10.1 +- org.apache.lucene:lucene-sandbox:8.10.1 +- org.apache.lucene:lucene-spatial3d:8.10.1 +- org.apache.lucene:lucene-spatial-extras:8.10.1 +- org.apache.lucene:lucene-suggest:8.10.1 +- org.opensearch.client:opensearch-rest-client:1.3.0 +- org.opensearch.client:opensearch-rest-high-level-client:1.3.0 +- org.opensearch:opensearch-cli:1.3.0 +- org.opensearch:opensearch-core:1.3.0 +- org.opensearch:opensearch-geo:1.3.0 +- org.opensearch:opensearch:1.3.0 +- org.opensearch:opensearch-secure-sm:1.3.0 +- org.opensearch:opensearch-x-content:1.3.0 +- org.opensearch.plugin:aggs-matrix-stats-client:1.3.0 +- org.opensearch.plugin:lang-mustache-client:1.3.0 +- org.opensearch.plugin:mapper-extras-client:1.3.0 +- org.opensearch.plugin:parent-join-client:1.3.0 +- org.opensearch.plugin:rank-eval-client:1.3.0 +- net.java.dev.jna:jna:5.5.0 diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..8fb20aa --- /dev/null +++ b/pom.xml @@ -0,0 +1,451 @@ + + + + + + io.github.zentol.flink + flink-connector-parent + 1.0 + + + 4.0.0 + + org.apache.flink + flink-connector-opensearch-parent + 1.0.0-SNAPSHOT + Flink : Connectors : Opensearch : Parent + pom + https://flink.apache.org + 2022 + + + + The Apache Software License, Version 2.0 + https://www.apache.org/licenses/LICENSE-2.0.txt + repo + + + + + https://github.com/apache/flink-connector-opensearch + git@github.com:apache/flink-connector-opensearch.git + + scm:git:https://gitbox.apache.org/repos/asf/flink-connector-opensearch.git + + + + + flink-connector-opensearch + flink-connector-opensearch-e2e-tests + + + + UTF-8 + UTF-8 + + 1.16.0 + 15.0 + + 2.13.4.20221013 + 4.13.2 + 5.8.1 + 3.21.0 + 0.22.0 + 1.17.2 + 2.21.0 + + false + 1.15.0 + + 1.7.36 + 2.17.2 + + + + flink-connector-opensearch-parent + + + + + org.apache.flink + flink-shaded-force-shading + ${flink.shaded.version} + + + + + + + org.slf4j + slf4j-api + provided + + + + + com.google.code.findbugs + jsr305 + provided + + + + + org.junit.jupiter + junit-jupiter + test + + + + org.junit.vintage + junit-vintage-engine + test + + + + org.assertj + assertj-core + test + + + + org.mockito + mockito-core + ${mockito.version} + jar + test + + + + org.testcontainers + junit-jupiter + test + + + + + + org.apache.logging.log4j + log4j-slf4j-impl + test + + + + org.apache.logging.log4j + log4j-api + test + + + + org.apache.logging.log4j + log4j-core + test + + + + + org.apache.logging.log4j + log4j-1.2-api + test + + + + org.apache.flink + flink-test-utils-junit + test + + + + + org.apache.flink + flink-architecture-tests-test + test + + + org.apache.flink + flink-architecture-tests-production + test + + + + + + + + + + + + + org.apache.flink + flink-test-utils + ${flink.version} + test + + + log4j + log4j + + + org.slf4j + slf4j-log4j12 + + + + + + org.apache.flink + flink-test-utils-junit + ${flink.version} + test + + + + + + org.apache.flink + flink-architecture-tests-base + ${flink.version} + test + + + + org.apache.flink + flink-architecture-tests-test + ${flink.version} + test + + + + org.apache.flink + flink-architecture-tests-production + ${flink.version} + test + + + + + com.google.code.findbugs + jsr305 + 1.3.9 + + + + commons-codec + commons-codec + 1.15 + + + + org.apache.httpcomponents + httpcore + 4.4.14 + + + + org.apache.httpcomponents + httpclient + 4.5.13 + + + + org.slf4j + slf4j-api + ${slf4j.version} + + + + org.apache.logging.log4j + log4j-slf4j-impl + ${log4j.version} + + + + org.apache.logging.log4j + log4j-api + ${log4j.version} + + + + org.apache.logging.log4j + log4j-core + ${log4j.version} + + + + + org.apache.logging.log4j + log4j-1.2-api + ${log4j.version} + + + + + com.fasterxml.jackson + jackson-bom + pom + import + ${jackson-bom.version} + + + + + org.junit + junit-bom + ${junit5.version} + pom + import + + + + junit + junit + ${junit4.version} + + + + org.assertj + assertj-core + ${assertj.version} + test + + + + + com.esotericsoftware.kryo + kryo + 2.24.0 + + + + + org.objenesis + objenesis + 2.1 + + + + org.testcontainers + testcontainers-bom + ${testcontainers.version} + pom + import + + + + com.tngtech.archunit + archunit + ${archunit.version} + test + + + + com.tngtech.archunit + archunit-junit5 + ${archunit.version} + test + + + + + + + + + + + sql-jars + + + !skipSqlJars + + + + flink-sql-connector-opensearch + + + + + + + + + org.codehaus.mojo + exec-maven-plugin + false + + + org.apache.flink + flink-ci-tools + ${flink.version} + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + + io.github.zentol.japicmp + japicmp-maven-plugin + + + + org.apache.rat + apache-rat-plugin + false + + + org.apache.maven.plugins + maven-checkstyle-plugin + + + com.diffplug.spotless + spotless-maven-plugin + + + org.apache.maven.plugins + maven-compiler-plugin + + + + org.apache.maven.plugins + maven-surefire-plugin + + + + org.apache.maven.plugins + maven-enforcer-plugin + + + + org.apache.maven.plugins + maven-shade-plugin + + + + org.commonjava.maven.plugins + directory-maven-plugin + + + + diff --git a/tools/ci/log4j.properties b/tools/ci/log4j.properties new file mode 100644 index 0000000..7daf1c3 --- /dev/null +++ b/tools/ci/log4j.properties @@ -0,0 +1,43 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +rootLogger.level = INFO +rootLogger.appenderRef.out.ref = ConsoleAppender + +# ----------------------------------------------------------------------------- +# Console (use 'console') +# ----------------------------------------------------------------------------- + +appender.console.name = ConsoleAppender +appender.console.type = CONSOLE +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{HH:mm:ss,SSS} [%20t] %-5p %-60c %x - %m%n + +# ----------------------------------------------------------------------------- +# File (use 'file') +# ----------------------------------------------------------------------------- +appender.file.name = FileAppender +appender.file.type = FILE +appender.file.fileName = ${sys:log.dir}/mvn-${sys:mvn.forkNumber:-output}.log +appender.file.layout.type = PatternLayout +appender.file.layout.pattern = %d{HH:mm:ss,SSS} [%20t] %-5p %-60c %x - %m%n +appender.file.createOnDemand = true + +# suppress the irrelevant (wrong) warnings from the netty channel handler +logger.netty.name = org.jboss.netty.channel.DefaultChannelPipeline +logger.netty.level = ERROR diff --git a/tools/maven/checkstyle.xml b/tools/maven/checkstyle.xml new file mode 100644 index 0000000..2048fd1 --- /dev/null +++ b/tools/maven/checkstyle.xml @@ -0,0 +1,562 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/maven/suppressions.xml b/tools/maven/suppressions.xml new file mode 100644 index 0000000..4f80ec2 --- /dev/null +++ b/tools/maven/suppressions.xml @@ -0,0 +1,26 @@ + + + + + + + From e3c5e27ec0fb81df7f95b8c76d1ee9285ee127af Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Tue, 15 Nov 2022 09:48:07 -0500 Subject: [PATCH 02/18] Addressing code review comments Signed-off-by: Andriy Redko --- flink-connector-opensearch-e2e-tests/pom.xml | 26 +++++++++---------- flink-connector-opensearch/pom.xml | 16 +----------- flink-sql-connector-opensearch/pom.xml | 2 -- .../src/main/resources/META-INF/NOTICE | 2 +- 4 files changed, 14 insertions(+), 32 deletions(-) diff --git a/flink-connector-opensearch-e2e-tests/pom.xml b/flink-connector-opensearch-e2e-tests/pom.xml index 242b579..af6c4a5 100644 --- a/flink-connector-opensearch-e2e-tests/pom.xml +++ b/flink-connector-opensearch-e2e-tests/pom.xml @@ -27,7 +27,6 @@ under the License. org.apache.flink flink-connector-opensearch-parent 1.0.0-SNAPSHOT - .. flink-connector-opensearch-e2e-tests @@ -82,21 +81,20 @@ under the License. copy + + + + org.apache.flink + flink-connector-test-utils + ${flink.version} + flink-connector-test-utils.jar + jar + ${project.build.directory}/dependencies + + + - - - - org.apache.flink - flink-connector-test-utils - ${flink.version} - flink-connector-test-utils.jar - jar - ${project.build.directory}/dependencies - - - - diff --git a/flink-connector-opensearch/pom.xml b/flink-connector-opensearch/pom.xml index 662365b..392b112 100644 --- a/flink-connector-opensearch/pom.xml +++ b/flink-connector-opensearch/pom.xml @@ -48,6 +48,7 @@ under the License. org.apache.flink flink-connector-base ${flink.version} + provided org.apache.flink @@ -167,19 +168,4 @@ under the License. test - - - - - org.apache.maven.plugins - maven-surefire-plugin - - - 1 - - - - - diff --git a/flink-sql-connector-opensearch/pom.xml b/flink-sql-connector-opensearch/pom.xml index 9327977..9f3fd72 100644 --- a/flink-sql-connector-opensearch/pom.xml +++ b/flink-sql-connector-opensearch/pom.xml @@ -27,7 +27,6 @@ under the License. org.apache.flink flink-connector-opensearch-parent 1.0.0-SNAPSHOT - .. flink-sql-connector-opensearch @@ -115,7 +114,6 @@ under the License. - org.apache.commons org.apache.flink.opensearch.shaded.org.apache.commons diff --git a/flink-sql-connector-opensearch/src/main/resources/META-INF/NOTICE b/flink-sql-connector-opensearch/src/main/resources/META-INF/NOTICE index 261cde6..d9fe3de 100644 --- a/flink-sql-connector-opensearch/src/main/resources/META-INF/NOTICE +++ b/flink-sql-connector-opensearch/src/main/resources/META-INF/NOTICE @@ -1,5 +1,5 @@ flink-sql-connector-opensearch -Copyright 2014-2021 The Apache Software Foundation +Copyright 2014-2022 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). From 1d9eb6c0079b54f484e027263bc4e4da95afe6b2 Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Tue, 15 Nov 2022 10:18:46 -0500 Subject: [PATCH 03/18] Fix build and dependency convergence issues Signed-off-by: Andriy Redko --- flink-connector-opensearch-e2e-tests/pom.xml | 55 +++++++++++++++++++ .../src/main/resources/META-INF/NOTICE | 3 - pom.xml | 6 ++ 3 files changed, 61 insertions(+), 3 deletions(-) diff --git a/flink-connector-opensearch-e2e-tests/pom.xml b/flink-connector-opensearch-e2e-tests/pom.xml index af6c4a5..89e11ff 100644 --- a/flink-connector-opensearch-e2e-tests/pom.xml +++ b/flink-connector-opensearch-e2e-tests/pom.xml @@ -55,6 +55,13 @@ under the License. + + org.apache.maven.plugins + maven-deploy-plugin + + true + + org.apache.maven.plugins maven-shade-plugin @@ -67,6 +74,22 @@ under the License. opensearch-end-to-end-test dependencies + + + com.google.code.findbugs:jsr305 + org.slf4j:slf4j-api + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + @@ -99,4 +122,36 @@ under the License. + + + run-end-to-end-tests + + + + org.apache.maven.plugins + maven-surefire-plugin + + + end-to-end-tests + integration-test + + test + + + + **/*.* + + + 1 + + ${project.basedir} + + + + + + + + + diff --git a/flink-sql-connector-opensearch/src/main/resources/META-INF/NOTICE b/flink-sql-connector-opensearch/src/main/resources/META-INF/NOTICE index d9fe3de..81d8918 100644 --- a/flink-sql-connector-opensearch/src/main/resources/META-INF/NOTICE +++ b/flink-sql-connector-opensearch/src/main/resources/META-INF/NOTICE @@ -7,7 +7,6 @@ The Apache Software Foundation (http://www.apache.org/). This project bundles the following dependencies under the Apache Software License 2.0. (http://www.apache.org/licenses/LICENSE-2.0.txt) - com.carrotsearch:hppc:0.8.1 -- com.fasterxml.jackson.core:jackson-annotations:2.13.4 - com.fasterxml.jackson.core:jackson-core:2.13.4 - com.fasterxml.jackson.dataformat:jackson-dataformat-cbor:2.13.4 - com.fasterxml.jackson.dataformat:jackson-dataformat-smile:2.13.4 @@ -15,7 +14,6 @@ This project bundles the following dependencies under the Apache Software Licens - com.github.spullara.mustache.java:compiler:0.9.10 - commons-codec:commons-codec:1.15 - commons-logging:commons-logging:1.1.3 -- org.apache.commons:commons-compress:1.21 - org.apache.httpcomponents:httpasyncclient:4.1.4 - org.apache.httpcomponents:httpclient:4.5.13 - org.apache.httpcomponents:httpcore:4.4.14 @@ -47,4 +45,3 @@ This project bundles the following dependencies under the Apache Software Licens - org.opensearch.plugin:mapper-extras-client:1.3.0 - org.opensearch.plugin:parent-join-client:1.3.0 - org.opensearch.plugin:rank-eval-client:1.3.0 -- net.java.dev.jna:jna:5.5.0 diff --git a/pom.xml b/pom.xml index 8fb20aa..d2f24a7 100644 --- a/pom.xml +++ b/pom.xml @@ -327,6 +327,12 @@ under the License. + + net.java.dev.jna + jna + 5.8.0 + + com.esotericsoftware.kryo kryo From cc2148f0a2581a022223f857f12d4aa675f9386f Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Tue, 15 Nov 2022 14:56:01 -0500 Subject: [PATCH 04/18] Added Openseach Connector e2e tests Signed-off-by: Andriy Redko --- flink-connector-opensearch-e2e-tests/pom.xml | 35 ++++- .../streaming/tests/ComparableTuple2.java | 46 ++++++ .../tests/OpensearchTestEmitter.java | 51 +++++++ .../streaming/tests/OpensearchDataReader.java | 51 +++++++ .../tests/OpensearchSinkE2ECase.java | 117 +++++++++++++++ .../tests/OpensearchSinkExternalContext.java | 138 +++++++++++++++++ .../OpensearchSinkExternalContextFactory.java | 60 ++++++++ .../streaming/tests/OpensearchTestClient.java | 141 ++++++++++++++++++ .../src/test/resources/log4j2-test.properties | 35 +++++ flink-connector-opensearch/pom.xml | 17 ++- .../sink/OpensearchSinkBuilder.java | 2 +- .../opensearch/sink/OpensearchWriter.java | 15 +- .../sink/OpensearchWriterITCase.java | 74 ++++++++- .../opensearch/test/DockerImageVersions.java | 2 +- pom.xml | 6 + 15 files changed, 782 insertions(+), 8 deletions(-) create mode 100644 flink-connector-opensearch-e2e-tests/src/main/java/org/apache/flink/streaming/tests/ComparableTuple2.java create mode 100644 flink-connector-opensearch-e2e-tests/src/main/java/org/apache/flink/streaming/tests/OpensearchTestEmitter.java create mode 100644 flink-connector-opensearch-e2e-tests/src/test/java/org/apache/flink/streaming/tests/OpensearchDataReader.java create mode 100644 flink-connector-opensearch-e2e-tests/src/test/java/org/apache/flink/streaming/tests/OpensearchSinkE2ECase.java create mode 100644 flink-connector-opensearch-e2e-tests/src/test/java/org/apache/flink/streaming/tests/OpensearchSinkExternalContext.java create mode 100644 flink-connector-opensearch-e2e-tests/src/test/java/org/apache/flink/streaming/tests/OpensearchSinkExternalContextFactory.java create mode 100644 flink-connector-opensearch-e2e-tests/src/test/java/org/apache/flink/streaming/tests/OpensearchTestClient.java create mode 100644 flink-connector-opensearch-e2e-tests/src/test/resources/log4j2-test.properties diff --git a/flink-connector-opensearch-e2e-tests/pom.xml b/flink-connector-opensearch-e2e-tests/pom.xml index 89e11ff..97b9797 100644 --- a/flink-connector-opensearch-e2e-tests/pom.xml +++ b/flink-connector-opensearch-e2e-tests/pom.xml @@ -34,6 +34,11 @@ under the License. jar + + org.apache.flink + flink-connector-base + ${flink.version} + org.apache.flink flink-streaming-java @@ -45,12 +50,40 @@ under the License. flink-connector-opensearch ${project.version} + + org.apache.flink + flink-connector-opensearch + ${project.version} + test-jar + + + + * + * + + + org.apache.logging.log4j log4j-api - ${log4j.version} provided + + org.apache.flink + flink-test-utils + test + + + org.apache.flink + flink-connector-test-utils + ${flink.version} + test + + + org.opensearch + opensearch-testcontainers + test + diff --git a/flink-connector-opensearch-e2e-tests/src/main/java/org/apache/flink/streaming/tests/ComparableTuple2.java b/flink-connector-opensearch-e2e-tests/src/main/java/org/apache/flink/streaming/tests/ComparableTuple2.java new file mode 100644 index 0000000..5b0d1c0 --- /dev/null +++ b/flink-connector-opensearch-e2e-tests/src/main/java/org/apache/flink/streaming/tests/ComparableTuple2.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.tests; + +import org.apache.flink.api.java.tuple.Tuple2; + +/** Variation of the {@link Tuple2} which implements {@link Comparable}. */ +public class ComparableTuple2, T1 extends Comparable> + extends Tuple2 implements Comparable> { + private static final long serialVersionUID = 1L; + + public ComparableTuple2(T0 f0, T1 f1) { + super(f0, f1); + } + + @Override + public int compareTo(ComparableTuple2 other) { + int d = this.f0.compareTo(other.f0); + if (d == 0) { + return this.f1.compareTo(other.f1); + } + return d; + } + + /** Creates a new key-value pair. */ + public static , T1 extends Comparable> + ComparableTuple2 of(K key, T1 value) { + return new ComparableTuple2<>(key, value); + } +} diff --git a/flink-connector-opensearch-e2e-tests/src/main/java/org/apache/flink/streaming/tests/OpensearchTestEmitter.java b/flink-connector-opensearch-e2e-tests/src/main/java/org/apache/flink/streaming/tests/OpensearchTestEmitter.java new file mode 100644 index 0000000..be1048a --- /dev/null +++ b/flink-connector-opensearch-e2e-tests/src/main/java/org/apache/flink/streaming/tests/OpensearchTestEmitter.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.tests; + +import org.apache.flink.api.connector.sink2.SinkWriter; +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.connector.opensearch.sink.OpensearchEmitter; +import org.apache.flink.connector.opensearch.sink.RequestIndexer; + +import org.opensearch.action.update.UpdateRequest; + +import java.util.HashMap; +import java.util.Map; + +/** Test emitter for performing Opensearch indexing requests. */ +public class OpensearchTestEmitter implements OpensearchEmitter> { + private static final long serialVersionUID = 1L; + private final String indexName; + + OpensearchTestEmitter(String indexName) { + this.indexName = indexName; + } + + @Override + public void emit( + Tuple2 element, SinkWriter.Context context, RequestIndexer indexer) { + final Map json = new HashMap<>(); + json.put("key", element.f0); + json.put("value", element.f1); + + final UpdateRequest updateRequest = + new UpdateRequest(indexName, String.valueOf(element.f0)).doc(json).upsert(json); + indexer.add(updateRequest); + } +} diff --git a/flink-connector-opensearch-e2e-tests/src/test/java/org/apache/flink/streaming/tests/OpensearchDataReader.java b/flink-connector-opensearch-e2e-tests/src/test/java/org/apache/flink/streaming/tests/OpensearchDataReader.java new file mode 100644 index 0000000..da353d1 --- /dev/null +++ b/flink-connector-opensearch-e2e-tests/src/test/java/org/apache/flink/streaming/tests/OpensearchDataReader.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.tests; + +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.connector.testframe.external.ExternalSystemDataReader; + +import java.time.Duration; +import java.util.List; + +import static org.apache.flink.util.Preconditions.checkNotNull; + +/** Opensearch data reader. */ +public class OpensearchDataReader implements ExternalSystemDataReader> { + private final OpensearchTestClient client; + private final String indexName; + private final int pageLength; + + public OpensearchDataReader(OpensearchTestClient client, String indexName, int pageLength) { + this.client = checkNotNull(client); + this.indexName = checkNotNull(indexName); + this.pageLength = pageLength; + } + + @Override + public List> poll(Duration timeout) { + client.refreshIndex(indexName); + return client.fetchAll(indexName, "key", 0, pageLength, true); + } + + @Override + public void close() throws Exception { + client.close(); + } +} diff --git a/flink-connector-opensearch-e2e-tests/src/test/java/org/apache/flink/streaming/tests/OpensearchSinkE2ECase.java b/flink-connector-opensearch-e2e-tests/src/test/java/org/apache/flink/streaming/tests/OpensearchSinkE2ECase.java new file mode 100644 index 0000000..9103d85 --- /dev/null +++ b/flink-connector-opensearch-e2e-tests/src/test/java/org/apache/flink/streaming/tests/OpensearchSinkE2ECase.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.tests; + +import org.apache.flink.connector.opensearch.test.DockerImageVersions; +import org.apache.flink.connector.testframe.container.FlinkContainerTestEnvironment; +import org.apache.flink.connector.testframe.external.DefaultContainerizedExternalSystem; +import org.apache.flink.connector.testframe.external.ExternalSystemDataReader; +import org.apache.flink.connector.testframe.junit.annotations.TestContext; +import org.apache.flink.connector.testframe.junit.annotations.TestEnv; +import org.apache.flink.connector.testframe.junit.annotations.TestExternalSystem; +import org.apache.flink.connector.testframe.junit.annotations.TestSemantics; +import org.apache.flink.connector.testframe.testsuites.SinkTestSuiteBase; +import org.apache.flink.streaming.api.CheckpointingMode; +import org.apache.flink.test.resources.ResourceTestUtils; + +import org.opensearch.testcontainers.OpensearchContainer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.utility.DockerImageName; + +import java.time.Duration; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import static org.apache.flink.connector.testframe.utils.CollectIteratorAssertions.assertThat; +import static org.apache.flink.runtime.testutils.CommonTestUtils.waitUntilCondition; + +/** End to end test for OpensearchSink based on connector testing framework. */ +@SuppressWarnings("unused") +public class OpensearchSinkE2ECase extends SinkTestSuiteBase> { + private static final Logger LOG = LoggerFactory.getLogger(OpensearchSinkE2ECase.class); + private static final int READER_RETRY_ATTEMPTS = 10; + private static final int READER_TIMEOUT = -1; // Not used + + @TestSemantics + CheckpointingMode[] semantics = new CheckpointingMode[] {CheckpointingMode.EXACTLY_ONCE}; + + @TestEnv + protected FlinkContainerTestEnvironment flink = new FlinkContainerTestEnvironment(1, 6); + + public OpensearchSinkE2ECase() throws Exception {} + + @TestExternalSystem + DefaultContainerizedExternalSystem opensearch = + DefaultContainerizedExternalSystem.builder() + .fromContainer( + new OpensearchContainer( + DockerImageName.parse(DockerImageVersions.OPENSEARCH_1)) + .withEnv( + "cluster.routing.allocation.disk.threshold_enabled", + "false") + .withNetworkAliases("opensearch")) + .bindWithFlinkContainer(flink.getFlinkContainers().getJobManager()) + .build(); + + @TestContext + OpensearchSinkExternalContextFactory contextFactory = + new OpensearchSinkExternalContextFactory( + opensearch.getContainer(), + Arrays.asList( + ResourceTestUtils.getResource( + "dependencies/opensearch-end-to-end-test.jar") + .toAbsolutePath() + .toUri() + .toURL(), + ResourceTestUtils.getResource( + "dependencies/flink-connector-test-utils.jar") + .toAbsolutePath() + .toUri() + .toURL())); + + @Override + protected void checkResultWithSemantic( + ExternalSystemDataReader> reader, + List> testData, + CheckpointingMode semantic) + throws Exception { + waitUntilCondition( + () -> { + try { + List> result = + reader.poll(Duration.ofMillis(READER_TIMEOUT)); + assertThat(sort(result).iterator()) + .matchesRecordsFromSource( + Collections.singletonList(sort(testData)), semantic); + return true; + } catch (Throwable t) { + LOG.warn("Polled results not as expected", t); + return false; + } + }, + 5000, + READER_RETRY_ATTEMPTS); + } + + private static > List sort(List list) { + Collections.sort(list); + return list; + } +} diff --git a/flink-connector-opensearch-e2e-tests/src/test/java/org/apache/flink/streaming/tests/OpensearchSinkExternalContext.java b/flink-connector-opensearch-e2e-tests/src/test/java/org/apache/flink/streaming/tests/OpensearchSinkExternalContext.java new file mode 100644 index 0000000..f9537da --- /dev/null +++ b/flink-connector-opensearch-e2e-tests/src/test/java/org/apache/flink/streaming/tests/OpensearchSinkExternalContext.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.tests; + +import org.apache.flink.api.common.typeinfo.TypeHint; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.connector.sink2.Sink; +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.connector.opensearch.sink.OpensearchSinkBuilder; +import org.apache.flink.connector.testframe.external.ExternalSystemDataReader; +import org.apache.flink.connector.testframe.external.sink.DataStreamSinkV2ExternalContext; +import org.apache.flink.connector.testframe.external.sink.TestingSinkSettings; + +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.http.HttpHost; + +import java.net.URL; +import java.util.List; +import java.util.Random; +import java.util.concurrent.ThreadLocalRandom; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static org.apache.flink.util.Preconditions.checkNotNull; + +class OpensearchSinkExternalContext + implements DataStreamSinkV2ExternalContext> { + /** The constant INDEX_NAME_PREFIX. */ + private static final String INDEX_NAME_PREFIX = "os-index"; + + private static final int RANDOM_STRING_MAX_LENGTH = 50; + private static final int NUM_RECORDS_UPPER_BOUND = 500; + private static final int NUM_RECORDS_LOWER_BOUND = 100; + private static final int BULK_BUFFER = 100; + private static final int PAGE_LENGTH = NUM_RECORDS_UPPER_BOUND + 1; + /** The index name. */ + private final String indexName; + + /** The address reachable from Flink (internal to the testing environment). */ + private final String addressInternal; + + /** The connector jar paths. */ + private final List connectorJarPaths; + + /** The client. */ + private final OpensearchTestClient client; + + /** + * Instantiates a new Opensearch sink context base. + * + * @param addressExternal The address to access Opensearch from the host machine (outside of the + * containerized environment). + * @param addressInternal The address to access Opensearch from Flink. When running in a + * containerized environment, should correspond to the network alias that resolves within + * the environment's network together with the exposed port. + * @param connectorJarPaths The connector jar paths. + */ + OpensearchSinkExternalContext( + String addressExternal, String addressInternal, List connectorJarPaths) { + this.addressInternal = checkNotNull(addressInternal); + this.connectorJarPaths = checkNotNull(connectorJarPaths); + this.client = new OpensearchTestClient(addressExternal); + this.indexName = + INDEX_NAME_PREFIX + "-" + ThreadLocalRandom.current().nextLong(Long.MAX_VALUE); + } + + @Override + public Sink> createSink(TestingSinkSettings sinkSettings) + throws UnsupportedOperationException { + client.createIndexIfDoesNotExist(indexName, 1, 0); + return new OpensearchSinkBuilder>() + .setHosts(HttpHost.create(addressInternal)) + .setEmitter(new OpensearchTestEmitter(indexName)) + .setBulkFlushMaxActions(BULK_BUFFER) + .build(); + } + + @Override + public ExternalSystemDataReader> createSinkDataReader( + TestingSinkSettings sinkSettings) { + return new OpensearchDataReader(client, indexName, PAGE_LENGTH); + } + + @Override + public List> generateTestData( + TestingSinkSettings sinkSettings, long seed) { + Random random = new Random(seed); + int recordNum = + random.nextInt(NUM_RECORDS_UPPER_BOUND - NUM_RECORDS_LOWER_BOUND) + + NUM_RECORDS_LOWER_BOUND; + + return IntStream.range(0, recordNum) + .boxed() + .map( + i -> { + int valueLength = random.nextInt(RANDOM_STRING_MAX_LENGTH) + 1; + String value = RandomStringUtils.random(valueLength, true, true); + return ComparableTuple2.of(i, value); + }) + .collect(Collectors.toList()); + } + + @Override + public void close() throws Exception { + client.deleteIndex(indexName); + } + + @Override + public List getConnectorJarPaths() { + return connectorJarPaths; + } + + @Override + public TypeInformation> getProducedType() { + return TypeInformation.of(new TypeHint>() {}); + } + + @Override + public String toString() { + return "Opensearch sink context."; + } +} diff --git a/flink-connector-opensearch-e2e-tests/src/test/java/org/apache/flink/streaming/tests/OpensearchSinkExternalContextFactory.java b/flink-connector-opensearch-e2e-tests/src/test/java/org/apache/flink/streaming/tests/OpensearchSinkExternalContextFactory.java new file mode 100644 index 0000000..9e5efc0 --- /dev/null +++ b/flink-connector-opensearch-e2e-tests/src/test/java/org/apache/flink/streaming/tests/OpensearchSinkExternalContextFactory.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.tests; + +import org.apache.flink.connector.testframe.external.ExternalContextFactory; + +import org.opensearch.testcontainers.OpensearchContainer; + +import java.net.URL; +import java.util.List; + +import static org.apache.flink.util.Preconditions.checkNotNull; + +/** Opensearch sink external context factory. */ +class OpensearchSinkExternalContextFactory + implements ExternalContextFactory { + /** The OpensearchContainer container. */ + private final OpensearchContainer opensearchContainer; + + /** The connector jars. */ + private final List connectorJars; + + /** + * Instantiates a new Opensearch sink external context factory. + * + * @param opensearchContainer The Opensearch container. + * @param connectorJars The connector jars. + */ + OpensearchSinkExternalContextFactory( + OpensearchContainer opensearchContainer, List connectorJars) { + this.opensearchContainer = checkNotNull(opensearchContainer); + this.connectorJars = checkNotNull(connectorJars); + } + + @Override + public OpensearchSinkExternalContext createExternalContext(String testName) { + return new OpensearchSinkExternalContext( + opensearchContainer.getHttpHostAddress(), + opensearchContainer.getNetworkAliases().get(0) + + ":" + + opensearchContainer.getExposedPorts().get(0), + connectorJars); + } +} diff --git a/flink-connector-opensearch-e2e-tests/src/test/java/org/apache/flink/streaming/tests/OpensearchTestClient.java b/flink-connector-opensearch-e2e-tests/src/test/java/org/apache/flink/streaming/tests/OpensearchTestClient.java new file mode 100644 index 0000000..ee04a36 --- /dev/null +++ b/flink-connector-opensearch-e2e-tests/src/test/java/org/apache/flink/streaming/tests/OpensearchTestClient.java @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.tests; + +import org.apache.flink.api.java.tuple.Tuple2; + +import org.apache.http.HttpHost; +import org.opensearch.OpenSearchException; +import org.opensearch.action.admin.indices.delete.DeleteIndexRequest; +import org.opensearch.action.admin.indices.refresh.RefreshRequest; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestClient; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.client.indices.CreateIndexRequest; +import org.opensearch.client.indices.GetIndexRequest; +import org.opensearch.common.settings.Settings; +import org.opensearch.rest.RestStatus; +import org.opensearch.search.SearchHit; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.sort.SortOrder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +import static org.apache.flink.util.Preconditions.checkNotNull; + +/** The type Opensearch test client. */ +public class OpensearchTestClient implements AutoCloseable { + private static final Logger LOG = LoggerFactory.getLogger(OpensearchTestClient.class); + + private final RestHighLevelClient restClient; + + /** + * Instantiates a new Opensearch client. + * + * @param address The address to access Opensearch from the host machine (outside of the + * containerized environment). + */ + public OpensearchTestClient(String address) { + checkNotNull(address); + this.restClient = new RestHighLevelClient(RestClient.builder(HttpHost.create(address))); + } + + public void deleteIndex(String indexName) { + DeleteIndexRequest request = new DeleteIndexRequest(indexName); + try { + restClient.indices().delete(request, RequestOptions.DEFAULT); + } catch (IOException e) { + LOG.error("Cannot delete index {}", indexName, e); + } + // This is needed to avoid race conditions between tests that reuse the same index + refreshIndex(indexName); + } + + public void refreshIndex(String indexName) { + RefreshRequest refresh = new RefreshRequest(indexName); + try { + restClient.indices().refresh(refresh, RequestOptions.DEFAULT); + } catch (IOException e) { + LOG.error("Cannot delete index {}", indexName, e); + } catch (OpenSearchException e) { + if (e.status() == RestStatus.NOT_FOUND) { + LOG.info("Index {} not found", indexName); + } + } + } + + public void createIndexIfDoesNotExist(String indexName, int shards, int replicas) { + GetIndexRequest request = new GetIndexRequest(indexName); + CreateIndexRequest createIndexRequest = new CreateIndexRequest(indexName); + createIndexRequest.settings( + Settings.builder() + .put("index.number_of_shards", shards) + .put("index.number_of_replicas", replicas)); + try { + boolean exists = restClient.indices().exists(request, RequestOptions.DEFAULT); + if (!exists) { + restClient.indices().create(createIndexRequest, RequestOptions.DEFAULT); + } else { + LOG.info("Index already exists {}", indexName); + } + } catch (IOException e) { + LOG.error("Cannot create index {}", indexName, e); + } + } + + @Override + public void close() throws Exception { + restClient.close(); + } + + public List> fetchAll( + String indexName, String sortField, int from, int pageLength, boolean trackTotalHits) { + try { + SearchResponse response = + restClient.search( + new SearchRequest(indexName) + .source( + new SearchSourceBuilder() + .sort(sortField, SortOrder.ASC) + .from(from) + .size(pageLength) + .trackTotalHits(trackTotalHits)), + RequestOptions.DEFAULT); + SearchHit[] searchHits = response.getHits().getHits(); + return Arrays.stream(searchHits) + .map( + searchHit -> + ComparableTuple2.of( + Integer.valueOf(searchHit.getId()), + searchHit.getSourceAsMap().get("value").toString())) + .collect(Collectors.toList()); + } catch (IOException e) { + LOG.error("Fetching records failed", e); + return Collections.emptyList(); + } + } +} diff --git a/flink-connector-opensearch-e2e-tests/src/test/resources/log4j2-test.properties b/flink-connector-opensearch-e2e-tests/src/test/resources/log4j2-test.properties new file mode 100644 index 0000000..e48d6c0 --- /dev/null +++ b/flink-connector-opensearch-e2e-tests/src/test/resources/log4j2-test.properties @@ -0,0 +1,35 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ +# Set root logger level to OFF to not flood build logs +# set manually to INFO for debugging purposes +rootLogger.level=OFF +rootLogger.appenderRef.test.ref=TestLogger +appender.testlogger.name=TestLogger +appender.testlogger.type=CONSOLE +appender.testlogger.target=SYSTEM_ERR +appender.testlogger.layout.type=PatternLayout +appender.testlogger.layout.pattern=DOCKER> %m%n +# It is recommended to uncomment these lines when enabling the logger. The below package used +# by testcontainers is quite verbose +logger.yarn.name=org.testcontainers.shaded.com.github.dockerjava.core +logger.yarn.level=WARN +logger.yarn.appenderRef.console.ref=TestLogger +logger.testutils.name=org.apache.flink.runtime.testutils.CommonTestUtils +logger.testutils.level=WARN +logger.testutils.appenderRef.console.ref=TestLogger + diff --git a/flink-connector-opensearch/pom.xml b/flink-connector-opensearch/pom.xml index 392b112..072d6c5 100644 --- a/flink-connector-opensearch/pom.xml +++ b/flink-connector-opensearch/pom.xml @@ -164,8 +164,23 @@ under the License. org.opensearch opensearch-testcontainers - 1.0.0 test + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkBuilder.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkBuilder.java index 04ffbf7..0a34ab0 100644 --- a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkBuilder.java +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkBuilder.java @@ -74,7 +74,7 @@ public class OpensearchSinkBuilder { private FlushBackoffType bulkFlushBackoffType = FlushBackoffType.NONE; private int bulkFlushBackoffRetries = -1; private long bulkFlushBackOffDelay = -1; - private DeliveryGuarantee deliveryGuarantee = DeliveryGuarantee.NONE; + private DeliveryGuarantee deliveryGuarantee = DeliveryGuarantee.AT_LEAST_ONCE; private List hosts; protected OpensearchEmitter emitter; private String username; diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchWriter.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchWriter.java index 17d1d42..a1bdded 100644 --- a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchWriter.java +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchWriter.java @@ -108,7 +108,7 @@ class OpensearchWriter implements SinkWriter { RestClient.builder(hosts.toArray(new HttpHost[0])), networkClientConfig)); this.bulkProcessor = createBulkProcessor(bulkProcessorBuilderFactory, bulkProcessorConfig); - this.requestIndexer = new DefaultRequestIndexer(); + this.requestIndexer = new DefaultRequestIndexer(metricGroup.getNumRecordsSendCounter()); checkNotNull(metricGroup); metricGroup.setCurrentSendTimeGauge(() -> ackTime - lastSendTime); this.numBytesOutCounter = metricGroup.getIOMetricGroup().getNumBytesOutCounter(); @@ -129,9 +129,9 @@ public void write(IN element, Context context) throws IOException, InterruptedEx } @Override - public void flush(boolean flush) throws IOException, InterruptedException { + public void flush(boolean endOfInput) throws IOException, InterruptedException { checkpointInProgress = true; - while (pendingActions != 0 && (flushOnCheckpoint || flush)) { + while (pendingActions != 0 && (flushOnCheckpoint || endOfInput)) { bulkProcessor.flush(); LOG.info("Waiting for the response of {} pending actions.", pendingActions); mailboxExecutor.yield(); @@ -321,9 +321,16 @@ private boolean isClosed() { private class DefaultRequestIndexer implements RequestIndexer { + private final Counter numRecordsSendCounter; + + public DefaultRequestIndexer(Counter numRecordsSendCounter) { + this.numRecordsSendCounter = checkNotNull(numRecordsSendCounter); + } + @Override public void add(DeleteRequest... deleteRequests) { for (final DeleteRequest deleteRequest : deleteRequests) { + numRecordsSendCounter.inc(); pendingActions++; bulkProcessor.add(deleteRequest); } @@ -332,6 +339,7 @@ public void add(DeleteRequest... deleteRequests) { @Override public void add(IndexRequest... indexRequests) { for (final IndexRequest indexRequest : indexRequests) { + numRecordsSendCounter.inc(); pendingActions++; bulkProcessor.add(indexRequest); } @@ -340,6 +348,7 @@ public void add(IndexRequest... indexRequests) { @Override public void add(UpdateRequest... updateRequests) { for (final UpdateRequest updateRequest : updateRequests) { + numRecordsSendCounter.inc(); pendingActions++; bulkProcessor.add(updateRequest); } diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchWriterITCase.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchWriterITCase.java index ab8c06d..16ebf84 100644 --- a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchWriterITCase.java +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchWriterITCase.java @@ -18,6 +18,7 @@ package org.apache.flink.connector.opensearch.sink; import org.apache.flink.api.common.operators.MailboxExecutor; +import org.apache.flink.api.connector.sink2.SinkWriter; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.connector.opensearch.OpensearchUtil; import org.apache.flink.connector.opensearch.test.DockerImageVersions; @@ -26,6 +27,7 @@ import org.apache.flink.metrics.groups.OperatorIOMetricGroup; import org.apache.flink.metrics.groups.SinkWriterMetricGroup; import org.apache.flink.metrics.testutils.MetricListener; +import org.apache.flink.runtime.metrics.MetricNames; import org.apache.flink.runtime.metrics.groups.InternalSinkWriterMetricGroup; import org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups; import org.apache.flink.util.FlinkRuntimeException; @@ -42,6 +44,9 @@ import org.opensearch.action.bulk.BulkProcessor; import org.opensearch.action.bulk.BulkRequest; import org.opensearch.action.bulk.BulkResponse; +import org.opensearch.action.delete.DeleteRequest; +import org.opensearch.action.index.IndexRequest; +import org.opensearch.action.update.UpdateRequest; import org.opensearch.client.RequestOptions; import org.opensearch.client.RestHighLevelClient; import org.opensearch.common.unit.ByteSizeUnit; @@ -55,6 +60,8 @@ import java.io.IOException; import java.util.Collections; +import java.util.HashMap; +import java.util.Map; import java.util.Optional; import static org.apache.flink.connector.opensearch.sink.OpensearchTestClient.buildMessage; @@ -197,6 +204,30 @@ void testIncrementByteOutMetric() throws Exception { } } + @Test + void testIncrementRecordsSendMetric() throws Exception { + final String index = "test-inc-records-send"; + final int flushAfterNActions = 2; + final BulkProcessorConfig bulkProcessorConfig = + new BulkProcessorConfig(flushAfterNActions, -1, -1, FlushBackoffType.NONE, 0, 0); + + try (final OpensearchWriter> writer = + createWriter(index, false, bulkProcessorConfig)) { + final Optional recordsSend = + metricListener.getCounter(MetricNames.NUM_RECORDS_SEND); + writer.write(Tuple2.of(1, buildMessage(1)), null); + // Update existing index + writer.write(Tuple2.of(1, "u" + buildMessage(2)), null); + // Delete index + writer.write(Tuple2.of(1, "d" + buildMessage(3)), null); + + writer.blockingFlushAllActions(); + + assertThat(recordsSend).isPresent(); + assertThat(recordsSend.get().getCount()).isEqualTo(3L); + } + } + @Test void testCurrentSendTime() throws Exception { final String index = "test-current-send-time"; @@ -234,7 +265,7 @@ private OpensearchWriter> createWriter( SinkWriterMetricGroup metricGroup) { return new OpensearchWriter>( Collections.singletonList(HttpHost.create(OS_CONTAINER.getHttpHostAddress())), - TestEmitter.jsonEmitter(index, context.getDataFieldName()), + new UpdatingEmitter(index, context.getDataFieldName()), flushOnCheckpoint, bulkProcessorConfig, new TestBulkProcessorBuilderFactory(), @@ -310,6 +341,47 @@ public void accept( } } + private static class UpdatingEmitter implements OpensearchEmitter> { + private static final long serialVersionUID = 1L; + + private final String dataFieldName; + private final String index; + + UpdatingEmitter(String index, String dataFieldName) { + this.index = index; + this.dataFieldName = dataFieldName; + } + + @Override + public void emit( + Tuple2 element, + SinkWriter.Context context, + RequestIndexer indexer) { + + Map document = new HashMap<>(); + document.put(dataFieldName, element.f1); + + final char action = element.f1.charAt(0); + final String id = element.f0.toString(); + switch (action) { + case 'd': + { + indexer.add(new DeleteRequest(index).id(id)); + break; + } + case 'u': + { + indexer.add(new UpdateRequest().index(index).id(id).doc(document)); + break; + } + default: + { + indexer.add(new IndexRequest(index).id(id).source(document)); + } + } + } + } + private static class TestMailbox implements MailboxExecutor { @Override diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/test/DockerImageVersions.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/test/DockerImageVersions.java index 2087367..00b4b13 100644 --- a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/test/DockerImageVersions.java +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/test/DockerImageVersions.java @@ -22,5 +22,5 @@ * integration tests. */ public class DockerImageVersions { - public static final String OPENSEARCH_1 = "opensearchproject/opensearch:1.2.4"; + public static final String OPENSEARCH_1 = "opensearchproject/opensearch:1.3.6"; } diff --git a/pom.xml b/pom.xml index d2f24a7..7f70545 100644 --- a/pom.xml +++ b/pom.xml @@ -368,6 +368,12 @@ under the License. test + + org.opensearch + opensearch-testcontainers + 1.0.0 + test + From 4011fdfee9bd0698f4b8071fb77dbe5192a0b363 Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Thu, 17 Nov 2022 08:57:54 -0500 Subject: [PATCH 05/18] Addressing code review comments Signed-off-by: Andriy Redko --- flink-connector-opensearch-e2e-tests/pom.xml | 39 +++++++------------- 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/flink-connector-opensearch-e2e-tests/pom.xml b/flink-connector-opensearch-e2e-tests/pom.xml index 97b9797..2510ccc 100644 --- a/flink-connector-opensearch-e2e-tests/pom.xml +++ b/flink-connector-opensearch-e2e-tests/pom.xml @@ -55,13 +55,6 @@ under the License. flink-connector-opensearch ${project.version} test-jar - - - - * - * - - org.apache.logging.log4j @@ -107,22 +100,6 @@ under the License. opensearch-end-to-end-test dependencies - - - com.google.code.findbugs:jsr305 - org.slf4j:slf4j-api - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - @@ -152,6 +129,20 @@ under the License. + + org.apache.maven.plugins + maven-surefire-plugin + + + default-test + none + + + integration-tests + none + + + @@ -174,8 +165,6 @@ under the License. **/*.* - - 1 ${project.basedir} From 41cc7bb681aede94c543c28cc5629802874aac70 Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Thu, 17 Nov 2022 10:16:11 -0500 Subject: [PATCH 06/18] Added packaging test to flink-sql-connector-opensearch Signed-off-by: Andriy Redko --- .../connector/opensearch/PackagingITCase.java | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 flink-sql-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/PackagingITCase.java diff --git a/flink-sql-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/PackagingITCase.java b/flink-sql-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/PackagingITCase.java new file mode 100644 index 0000000..2a2d110 --- /dev/null +++ b/flink-sql-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/PackagingITCase.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.opensearch; + +import org.apache.flink.packaging.PackagingTestUtils; +import org.apache.flink.table.factories.Factory; +import org.apache.flink.test.resources.ResourceTestUtils; + +import org.junit.jupiter.api.Test; + +import java.nio.file.Path; +import java.util.Arrays; + +class PackagingITCase { + + @Test + void testPackaging() throws Exception { + final Path jar = + ResourceTestUtils.getResource(".*/flink-sql-connector-opensearch-[^/]*\\.jar"); + + PackagingTestUtils.assertJarContainsOnlyFilesMatching( + jar, + Arrays.asList( + "META-INF/", + "org/apache/flink/connector/base/", + "org/apache/flink/connector/opensearch/", + "org/apache/flink/opensearch/", + "org/apache/flink/streaming/connectors/opensearch/")); + PackagingTestUtils.assertJarContainsServiceEntry(jar, Factory.class); + } +} From b86f080942109416410e5ca7de368ea78701a5f5 Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Tue, 22 Nov 2022 09:59:13 -0500 Subject: [PATCH 07/18] Addressing code review comments Signed-off-by: Andriy Redko --- .../tests/OpensearchSinkE2ECase.java | 3 +- .../opensearch/sink/NetworkClientConfig.java | 6 +- .../opensearch/sink/OpensearchSink.java | 4 ++ .../opensearch/sink/OpensearchWriter.java | 3 +- .../table/IndexGeneratorFactory.java | 45 ++++++++++-- .../table/OpensearchConfiguration.java | 5 -- .../table/OpensearchConnectorOptions.java | 6 -- .../table/OpensearchDynamicSink.java | 24 ++++++- .../table/OpensearchDynamicSinkFactory.java | 71 +++++++++++++------ .../connectors/opensearch/OpensearchSink.java | 16 ++--- .../sink/OpensearchSinkBuilderTest.java | 7 ++ .../opensearch/table/IndexGeneratorTest.java | 69 ++++++++++++++++++ .../OpensearchDynamicSinkFactoryTest.java | 23 ++++++ .../table/OpensearchDynamicSinkITCase.java | 62 ++++++++++++++++ .../opensearch/OpensearchSinkITCase.java | 20 +----- pom.xml | 18 +---- 16 files changed, 293 insertions(+), 89 deletions(-) diff --git a/flink-connector-opensearch-e2e-tests/src/test/java/org/apache/flink/streaming/tests/OpensearchSinkE2ECase.java b/flink-connector-opensearch-e2e-tests/src/test/java/org/apache/flink/streaming/tests/OpensearchSinkE2ECase.java index 9103d85..6281e08 100644 --- a/flink-connector-opensearch-e2e-tests/src/test/java/org/apache/flink/streaming/tests/OpensearchSinkE2ECase.java +++ b/flink-connector-opensearch-e2e-tests/src/test/java/org/apache/flink/streaming/tests/OpensearchSinkE2ECase.java @@ -52,8 +52,7 @@ public class OpensearchSinkE2ECase extends SinkTestSuiteBase isAllowInsecure() { + return Optional.ofNullable(allowInsecure); } } diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSink.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSink.java index fd0733a..d80c00a 100644 --- a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSink.java +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSink.java @@ -88,4 +88,8 @@ public SinkWriter createWriter(InitContext context) throws IOException { context.metricGroup(), context.getMailboxExecutor()); } + + DeliveryGuarantee getDeliveryGuarantee() { + return deliveryGuarantee; + } } diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchWriter.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchWriter.java index a1bdded..0a88db3 100644 --- a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchWriter.java +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchWriter.java @@ -177,8 +177,7 @@ private static RestClientBuilder configureRestClientBuilder( httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider); } - if (networkClientConfig.isAllowInsecure() != null - && networkClientConfig.isAllowInsecure()) { + if (networkClientConfig.isAllowInsecure().orElse(false)) { try { httpClientBuilder.setSSLContext( SSLContexts.custom() diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/IndexGeneratorFactory.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/IndexGeneratorFactory.java index e87c41f..619599a 100644 --- a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/IndexGeneratorFactory.java +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/IndexGeneratorFactory.java @@ -30,7 +30,9 @@ import java.io.Serializable; import java.time.LocalDate; +import java.time.LocalDateTime; import java.time.LocalTime; +import java.time.ZoneId; import java.time.ZoneOffset; import java.time.format.DateTimeFormatter; import java.util.ArrayList; @@ -63,30 +65,57 @@ final class IndexGeneratorFactory { private IndexGeneratorFactory() {} public static IndexGenerator createIndexGenerator( - String index, List fieldNames, List dataTypes) { + String index, + List fieldNames, + List dataTypes, + ZoneId localTimeZoneId) { final IndexHelper indexHelper = new IndexHelper(); if (indexHelper.checkIsDynamicIndex(index)) { return createRuntimeIndexGenerator( index, fieldNames.toArray(new String[0]), dataTypes.toArray(new DataType[0]), - indexHelper); + indexHelper, + localTimeZoneId); } else { return new StaticIndexGenerator(index); } } + public static IndexGenerator createIndexGenerator( + String index, List fieldNames, List dataTypes) { + return createIndexGenerator(index, fieldNames, dataTypes, ZoneId.systemDefault()); + } + interface DynamicFormatter extends Serializable { String format(@Nonnull Object fieldValue, DateTimeFormatter formatter); } private static IndexGenerator createRuntimeIndexGenerator( - String index, String[] fieldNames, DataType[] fieldTypes, IndexHelper indexHelper) { + String index, + String[] fieldNames, + DataType[] fieldTypes, + IndexHelper indexHelper, + ZoneId localTimeZoneId) { final String dynamicIndexPatternStr = indexHelper.extractDynamicIndexPatternStr(index); final String indexPrefix = index.substring(0, index.indexOf(dynamicIndexPatternStr)); final String indexSuffix = index.substring(indexPrefix.length() + dynamicIndexPatternStr.length()); + if (indexHelper.checkIsDynamicIndexWithSystemTimeFormat(index)) { + final String dateTimeFormat = + indexHelper.extractDateFormat( + index, LogicalTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE); + return new AbstractTimeIndexGenerator(index, dateTimeFormat) { + @Override + public String generate(RowData row) { + return indexPrefix + .concat(LocalDateTime.now(localTimeZoneId).format(dateTimeFormatter)) + .concat(indexSuffix); + } + }; + } + final boolean isDynamicIndexWithFormat = indexHelper.checkIsDynamicIndexWithFormat(index); final int indexFieldPos = indexHelper.extractIndexFieldPos(index, fieldNames, isDynamicIndexWithFormat); @@ -172,10 +201,13 @@ private static DynamicFormatter createFormatFunction( * Helper class for {@link IndexGeneratorFactory}, this helper can use to validate index field * type ans parse index format from pattern. */ - private static class IndexHelper { + static class IndexHelper { private static final Pattern dynamicIndexPattern = Pattern.compile("\\{[^\\{\\}]+\\}?"); private static final Pattern dynamicIndexTimeExtractPattern = Pattern.compile(".*\\{.+\\|.*\\}.*"); + private static final Pattern dynamicIndexSystemTimeExtractPattern = + Pattern.compile( + ".*\\{\\s*(now\\(\\s*\\)|NOW\\(\\s*\\)|current_timestamp|CURRENT_TIMESTAMP)\\s*\\|.*\\}.*"); private static final List supportedTypes = new ArrayList<>(); private static final Map defaultFormats = new HashMap<>(); @@ -247,6 +279,11 @@ String extractDynamicIndexPatternStr(String index) { return index.substring(start, end + 1); } + /** Check generate dynamic index is from system time or not. */ + boolean checkIsDynamicIndexWithSystemTimeFormat(String index) { + return dynamicIndexSystemTimeExtractPattern.matcher(index).matches(); + } + /** Extract index field position in a fieldNames, return the field position. */ int extractIndexFieldPos( String index, String[] fieldNames, boolean isDynamicIndexWithFormat) { diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchConfiguration.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchConfiguration.java index 8bf52d1..a6f6dc6 100644 --- a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchConfiguration.java +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchConfiguration.java @@ -43,7 +43,6 @@ import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.CONNECTION_REQUEST_TIMEOUT; import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.CONNECTION_TIMEOUT; import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.DELIVERY_GUARANTEE_OPTION; -import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.FLUSH_ON_CHECKPOINT_OPTION; import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.HOSTS_OPTION; import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.INDEX_OPTION; import static org.apache.flink.connector.opensearch.table.OpensearchConnectorOptions.KEY_DELIMITER_OPTION; @@ -98,10 +97,6 @@ public Optional getBulkFlushBackoffDelay() { return config.getOptional(BULK_FLUSH_BACKOFF_DELAY_OPTION).map(Duration::toMillis); } - public boolean isDisableFlushOnCheckpoint() { - return !config.get(FLUSH_ON_CHECKPOINT_OPTION); - } - public String getIndex() { return config.get(INDEX_OPTION); } diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchConnectorOptions.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchConnectorOptions.java index 5fca777..dcd87cb 100644 --- a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchConnectorOptions.java +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchConnectorOptions.java @@ -105,12 +105,6 @@ public class OpensearchConnectorOptions { .noDefaultValue() .withDescription("Delay between each backoff attempt."); - public static final ConfigOption FLUSH_ON_CHECKPOINT_OPTION = - ConfigOptions.key("sink.flush-on-checkpoint") - .booleanType() - .defaultValue(true) - .withDescription("Disables flushing on checkpoint"); - public static final ConfigOption CONNECTION_PATH_PREFIX_OPTION = ConfigOptions.key("connection.path-prefix") .stringType() diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSink.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSink.java index ec1ec28..9550ac4 100644 --- a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSink.java +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSink.java @@ -23,6 +23,7 @@ import org.apache.flink.connector.opensearch.sink.FlushBackoffType; import org.apache.flink.connector.opensearch.sink.OpensearchSink; import org.apache.flink.connector.opensearch.sink.OpensearchSinkBuilder; +import org.apache.flink.table.api.ValidationException; import org.apache.flink.table.connector.ChangelogMode; import org.apache.flink.table.connector.format.EncodingFormat; import org.apache.flink.table.connector.sink.DynamicTableSink; @@ -35,6 +36,7 @@ import org.apache.http.HttpHost; import org.opensearch.common.xcontent.XContentType; +import java.time.ZoneId; import java.util.List; import java.util.Objects; import java.util.function.Function; @@ -52,9 +54,11 @@ class OpensearchDynamicSink implements DynamicTableSink { final DataType physicalRowDataType; final List primaryKeyLogicalTypesWithIndex; final OpensearchConfiguration config; + final ZoneId localTimeZoneId; final String summaryString; final OpensearchSinkBuilderSupplier builderSupplier; + final boolean isDynamicIndexWithSystemTime; OpensearchDynamicSink( EncodingFormat> format, @@ -62,13 +66,21 @@ class OpensearchDynamicSink implements DynamicTableSink { List primaryKeyLogicalTypesWithIndex, DataType physicalRowDataType, String summaryString, - OpensearchSinkBuilderSupplier builderSupplier) { + OpensearchSinkBuilderSupplier builderSupplier, + ZoneId localTimeZoneId) { this.format = checkNotNull(format); this.physicalRowDataType = checkNotNull(physicalRowDataType); this.primaryKeyLogicalTypesWithIndex = checkNotNull(primaryKeyLogicalTypesWithIndex); this.config = checkNotNull(config); this.summaryString = checkNotNull(summaryString); this.builderSupplier = checkNotNull(builderSupplier); + this.localTimeZoneId = localTimeZoneId; + this.isDynamicIndexWithSystemTime = isDynamicIndexWithSystemTime(); + } + + public boolean isDynamicIndexWithSystemTime() { + IndexGeneratorFactory.IndexHelper indexHelper = new IndexGeneratorFactory.IndexHelper(); + return indexHelper.checkIsDynamicIndexWithSystemTimeFormat(config.getIndex()); } Function createKeyExtractor() { @@ -80,7 +92,8 @@ IndexGenerator createIndexGenerator() { return IndexGeneratorFactory.createIndexGenerator( config.getIndex(), DataType.getFieldNames(physicalRowDataType), - DataType.getFieldDataTypes(physicalRowDataType)); + DataType.getFieldDataTypes(physicalRowDataType), + localTimeZoneId); } @Override @@ -91,6 +104,10 @@ public ChangelogMode getChangelogMode(ChangelogMode requestedMode) { builder.addContainedKind(kind); } } + if (isDynamicIndexWithSystemTime && !requestedMode.containsOnly(RowKind.INSERT)) { + throw new ValidationException( + "Dynamic indexing based on system time only works on append only stream."); + } return builder.build(); } @@ -162,7 +179,8 @@ public DynamicTableSink copy() { primaryKeyLogicalTypesWithIndex, physicalRowDataType, summaryString, - builderSupplier); + builderSupplier, + localTimeZoneId); } @Override diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkFactory.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkFactory.java index d02dce9..68f30de 100644 --- a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkFactory.java +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkFactory.java @@ -21,16 +21,16 @@ import org.apache.flink.annotation.Internal; import org.apache.flink.api.common.serialization.SerializationSchema; import org.apache.flink.configuration.ConfigOption; -import org.apache.flink.configuration.Configuration; +import org.apache.flink.configuration.ReadableConfig; import org.apache.flink.connector.opensearch.sink.OpensearchSinkBuilder; import org.apache.flink.table.api.ValidationException; +import org.apache.flink.table.api.config.TableConfigOptions; import org.apache.flink.table.catalog.Column; import org.apache.flink.table.catalog.ResolvedSchema; import org.apache.flink.table.connector.Projection; import org.apache.flink.table.connector.format.EncodingFormat; import org.apache.flink.table.connector.sink.DynamicTableSink; import org.apache.flink.table.data.RowData; -import org.apache.flink.table.factories.DynamicTableFactory; import org.apache.flink.table.factories.DynamicTableSinkFactory; import org.apache.flink.table.factories.FactoryUtil; import org.apache.flink.table.factories.SerializationFormatFactory; @@ -38,6 +38,7 @@ import org.apache.flink.table.types.logical.LogicalType; import org.apache.flink.util.StringUtils; +import java.time.ZoneId; import java.util.Arrays; import java.util.List; import java.util.Optional; @@ -75,17 +76,21 @@ public class OpensearchDynamicSinkFactory implements DynamicTableSinkFactory { private final OpensearchSinkBuilderSupplier sinkBuilderSupplier; public OpensearchDynamicSinkFactory() { - this.sinkBuilderSupplier = OpensearchSinkBuilder::new; + this.sinkBuilderSupplier = OpensearchSinkBuilder::new; } @Override public DynamicTableSink createDynamicTableSink(Context context) { List primaryKeyLogicalTypesWithIndex = getPrimaryKeyLogicalTypesWithIndex(context); + + final FactoryUtil.TableFactoryHelper helper = + FactoryUtil.createTableFactoryHelper(this, context); EncodingFormat> format = - getValidatedEncodingFormat(this, context); + helper.discoverEncodingFormat(SerializationFormatFactory.class, FORMAT_OPTION); - OpensearchConfiguration config = getConfiguration(context); + OpensearchConfiguration config = getConfiguration(helper); + helper.validate(); validateConfiguration(config); return new OpensearchDynamicSink( @@ -94,15 +99,25 @@ public DynamicTableSink createDynamicTableSink(Context context) { primaryKeyLogicalTypesWithIndex, context.getPhysicalRowDataType(), capitalize(FACTORY_IDENTIFIER), - sinkBuilderSupplier); + sinkBuilderSupplier, + getLocalTimeZoneId(context.getConfiguration())); } - OpensearchConfiguration getConfiguration(Context context) { - return new OpensearchConfiguration( - Configuration.fromMap(context.getCatalogTable().getOptions())); + private static ZoneId getLocalTimeZoneId(ReadableConfig readableConfig) { + final String zone = readableConfig.get(TableConfigOptions.LOCAL_TIME_ZONE); + final ZoneId zoneId = + TableConfigOptions.LOCAL_TIME_ZONE.defaultValue().equals(zone) + ? ZoneId.systemDefault() + : ZoneId.of(zone); + + return zoneId; + } + + private static OpensearchConfiguration getConfiguration(FactoryUtil.TableFactoryHelper helper) { + return new OpensearchConfiguration(helper.getOptions()); } - void validateConfiguration(OpensearchConfiguration config) { + private static void validateConfiguration(OpensearchConfiguration config) { config.getHosts(); // validate hosts validate( config.getIndex().length() >= 1, @@ -145,23 +160,13 @@ void validateConfiguration(OpensearchConfiguration config) { } } - static void validate(boolean condition, Supplier message) { + private static void validate(boolean condition, Supplier message) { if (!condition) { throw new ValidationException(message.get()); } } - EncodingFormat> getValidatedEncodingFormat( - DynamicTableFactory factory, DynamicTableFactory.Context context) { - final FactoryUtil.TableFactoryHelper helper = - FactoryUtil.createTableFactoryHelper(factory, context); - final EncodingFormat> format = - helper.discoverEncodingFormat(SerializationFormatFactory.class, FORMAT_OPTION); - helper.validate(); - return format; - } - - List getPrimaryKeyLogicalTypesWithIndex(Context context) { + private static List getPrimaryKeyLogicalTypesWithIndex(Context context) { DataType physicalRowDataType = context.getPhysicalRowDataType(); int[] primaryKeyIndexes = context.getPrimaryKeyIndexes(); if (primaryKeyIndexes.length != 0) { @@ -192,6 +197,28 @@ public Set> requiredOptions() { return Stream.of(HOSTS_OPTION, INDEX_OPTION).collect(Collectors.toSet()); } + @Override + public Set> forwardOptions() { + return Stream.of( + HOSTS_OPTION, + INDEX_OPTION, + PASSWORD_OPTION, + USERNAME_OPTION, + KEY_DELIMITER_OPTION, + BULK_FLUSH_MAX_ACTIONS_OPTION, + BULK_FLUSH_MAX_SIZE_OPTION, + BULK_FLUSH_INTERVAL_OPTION, + BULK_FLUSH_BACKOFF_TYPE_OPTION, + BULK_FLUSH_BACKOFF_MAX_RETRIES_OPTION, + BULK_FLUSH_BACKOFF_DELAY_OPTION, + CONNECTION_PATH_PREFIX_OPTION, + CONNECTION_REQUEST_TIMEOUT, + CONNECTION_TIMEOUT, + SOCKET_TIMEOUT, + ALLOW_INSECURE) + .collect(Collectors.toSet()); + } + @Override public Set> optionalOptions() { return Stream.of( diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSink.java b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSink.java index aae3623..4f64fdf 100644 --- a/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSink.java +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSink.java @@ -193,7 +193,7 @@ public void setDelayMillis(long delayMillis) { /** * Number of pending action requests not yet acknowledged by Opensearch. This value is - * maintained only if {@link OpensearchSinkBase#flushOnCheckpoint} is {@code true}. + * maintained only if {@link OpensearchSink#flushOnCheckpoint} is {@code true}. * *

This is incremented whenever the user adds (or re-adds through the {@link * ActionRequestFailureHandler}) requests to the {@link RequestIndexer}. It is decremented for @@ -236,7 +236,7 @@ private OpensearchSink( // we eagerly check if the user-provided sink function and failure handler is serializable; // otherwise, if they aren't serializable, users will merely get a non-informative error // message - // "OpensearchSinkBase is not serializable" + // "OpensearchSink is not serializable" checkArgument( InstantiationUtil.isSerializable(opensearchSinkFunction), @@ -325,7 +325,7 @@ public void disableFlushOnCheckpoint() { @Override public void open(Configuration parameters) throws Exception { - client = createClient(userConfig); + client = createClient(); bulkProcessor = buildBulkProcessor(new BulkProcessorListener()); requestIndexer = new OpensearchBulkProcessorIndexer( @@ -375,7 +375,7 @@ public void close() throws Exception { } /** Build the {@link BulkProcessor}. */ - protected BulkProcessor buildBulkProcessor(BulkProcessor.Listener listener) { + private BulkProcessor buildBulkProcessor(BulkProcessor.Listener listener) { checkNotNull(listener); BulkProcessor.Builder bulkProcessorBuilder = @@ -412,7 +412,7 @@ protected BulkProcessor buildBulkProcessor(BulkProcessor.Listener listener) { * @return The created client. * @throws IOException IOException */ - private RestHighLevelClient createClient(Map clientConfig) throws IOException { + private RestHighLevelClient createClient() throws IOException { RestClientBuilder builder = RestClient.builder(httpHosts.toArray(new HttpHost[httpHosts.size()])); restClientFactory.configureRestClientBuilder(builder); @@ -426,9 +426,9 @@ private RestHighLevelClient createClient(Map clientConfig) throw /** * Verify the client connection by making a test request/ping to the Opensearch cluster. * - *

Called by {@link OpensearchSinkBase#open(org.apache.flink.configuration.Configuration)} - * after creating the client. This makes sure the underlying client is closed if the connection - * is not successful and preventing thread leak. + *

Called by {@link OpensearchSink#open(org.apache.flink.configuration.Configuration)} after + * creating the client. This makes sure the underlying client is closed if the connection is not + * successful and preventing thread leak. * * @param client the Opensearch client. */ diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkBuilderTest.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkBuilderTest.java index d7fb231..3657145 100644 --- a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkBuilderTest.java +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkBuilderTest.java @@ -29,6 +29,7 @@ import java.util.stream.Stream; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -56,6 +57,12 @@ Stream testValidBuilders() { builder -> assertDoesNotThrow(builder::build)); } + @Test + void testDefaultDeliveryGuarantee() { + assertThat(createMinimalBuilder().build().getDeliveryGuarantee()) + .isEqualTo(DeliveryGuarantee.AT_LEAST_ONCE); + } + @Test void testThrowIfExactlyOnceConfigured() { assertThrows( diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/IndexGeneratorTest.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/IndexGeneratorTest.java index fe61398..35c3d6d 100644 --- a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/IndexGeneratorTest.java +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/IndexGeneratorTest.java @@ -35,6 +35,7 @@ import java.time.LocalDate; import java.time.LocalDateTime; import java.time.LocalTime; +import java.time.format.DateTimeFormatter; import java.time.temporal.UnsupportedTemporalTypeException; import java.util.Arrays; import java.util.List; @@ -261,4 +262,72 @@ public void testUnsupportedIndexFieldType() { Assertions.assertEquals(expectedExceptionMsg, e.getMessage()); } } + + @Test + public void testDynamicIndexFromSystemTime() { + List supportedUseCases = + Arrays.asList( + "now()", + "NOW()", + "now( )", + "NOW(\t)", + "\t NOW( ) \t", + "current_timestamp", + "CURRENT_TIMESTAMP", + "\tcurrent_timestamp\t", + " current_timestamp "); + + supportedUseCases.stream() + .forEach( + f -> { + DateTimeFormatter dateTimeFormatter = + DateTimeFormatter.ofPattern("yyyy_MM_dd"); + IndexGenerator indexGenerator = + IndexGeneratorFactory.createIndexGenerator( + String.format("my-index-{%s|yyyy_MM_dd}", f), + fieldNames, + dataTypes); + indexGenerator.open(); + // The date may change during the running of the unit test. + // Generate expected index-name based on the current time + // before and after calling the generate method. + String expectedIndex1 = + "my-index-" + LocalDateTime.now().format(dateTimeFormatter); + String actualIndex = indexGenerator.generate(rows.get(1)); + String expectedIndex2 = + "my-index-" + LocalDateTime.now().format(dateTimeFormatter); + Assertions.assertTrue( + actualIndex.equals(expectedIndex1) + || actualIndex.equals(expectedIndex2)); + }); + + List invalidUseCases = + Arrays.asList( + "now", + "now(", + "NOW", + "NOW)", + "current_timestamp()", + "CURRENT_TIMESTAMP()", + "CURRENT_timestamp"); + invalidUseCases.stream() + .forEach( + f -> { + String expectedExceptionMsg = + String.format( + "Unknown field '%s' in index pattern 'my-index-{%s|yyyy_MM_dd}'," + + " please check the field name.", + f, f); + try { + IndexGenerator indexGenerator = + IndexGeneratorFactory.createIndexGenerator( + String.format("my-index-{%s|yyyy_MM_dd}", f), + fieldNames, + dataTypes); + indexGenerator.open(); + } catch (TableException e) { + Assertions.assertEquals(expectedExceptionMsg, e.getMessage()); + } + }); + } } diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkFactoryTest.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkFactoryTest.java index f7f1430..5130c33 100644 --- a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkFactoryTest.java +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkFactoryTest.java @@ -25,8 +25,10 @@ import org.apache.flink.table.catalog.Column; import org.apache.flink.table.catalog.ResolvedSchema; import org.apache.flink.table.catalog.UniqueConstraint; +import org.apache.flink.table.connector.ChangelogMode; import org.apache.flink.table.connector.sink.DynamicTableSink; import org.apache.flink.table.connector.sink.SinkV2Provider; +import org.apache.flink.types.RowKind; import org.apache.flink.util.TestLoggerExtension; import org.junit.jupiter.api.Assertions; @@ -264,4 +266,25 @@ public void testSinkParallelism() { opensearchSink.getSinkRuntimeProvider(new OpensearchUtil.MockContext()); assertThat(2).isEqualTo(provider.getParallelism().get()); } + + @Test + public void validateDynamicIndexOnChangelogStream() { + OpensearchDynamicSinkFactory sinkFactory = new OpensearchDynamicSinkFactory(); + DynamicTableSink sink = + sinkFactory.createDynamicTableSink( + createPrefilledTestContext() + .withOption( + OpensearchConnectorOptions.INDEX_OPTION.key(), + "dynamic-index-{now()|yyyy-MM-dd}_index") + .build()); + + ChangelogMode changelogMode = + ChangelogMode.newBuilder() + .addContainedKind(RowKind.DELETE) + .addContainedKind(RowKind.INSERT) + .build(); + assertValidationException( + "Dynamic indexing based on system time only works on append only stream.", + () -> sink.getChangelogMode(changelogMode)); + } } diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkITCase.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkITCase.java index 56bec56..e81eafb 100644 --- a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkITCase.java +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkITCase.java @@ -41,10 +41,12 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; +import org.opensearch.OpenSearchStatusException; import org.opensearch.action.get.GetRequest; import org.opensearch.action.search.SearchRequest; import org.opensearch.client.RequestOptions; import org.opensearch.client.RestHighLevelClient; +import org.opensearch.rest.RestStatus; import org.opensearch.search.SearchHits; import org.opensearch.testcontainers.OpensearchContainer; import org.slf4j.Logger; @@ -56,6 +58,8 @@ import java.time.LocalDate; import java.time.LocalDateTime; import java.time.LocalTime; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; @@ -331,4 +335,62 @@ public void testWritingDocumentsWithDynamicIndex() throws Exception { expectedMap.put("b", "2012-12-12 12:12:12"); Assertions.assertEquals(response, expectedMap); } + + @Test + public void testWritingDocumentsWithDynamicIndexFromSystemTime() throws Exception { + TableEnvironment tableEnvironment = + TableEnvironment.create(EnvironmentSettings.inStreamingMode()); + + DateTimeFormatter dateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd"); + tableEnvironment + .getConfig() + .getConfiguration() + .setString("table.local-time-zone", "Asia/Shanghai"); + + String dynamicIndex1 = + "dynamic-index-" + + dateTimeFormatter.format(LocalDateTime.now(ZoneId.of("Asia/Shanghai"))) + + "_index"; + String index = "dynamic-index-{now()|yyyy-MM-dd}_index"; + tableEnvironment.executeSql( + "CREATE TABLE esTable (" + + "a BIGINT NOT NULL,\n" + + "b TIMESTAMP NOT NULL,\n" + + "PRIMARY KEY (a) NOT ENFORCED\n" + + ")\n" + + "WITH (\n" + + getConnectorSql(index) + + ")"); + String dynamicIndex2 = + "dynamic-index-" + + dateTimeFormatter.format(LocalDateTime.now(ZoneId.of("Asia/Shanghai"))) + + "_index"; + + tableEnvironment + .fromValues(row(1L, LocalDateTime.parse("2012-12-12T12:12:12"))) + .executeInsert("esTable") + .await(); + + RestHighLevelClient client = OpensearchUtil.createClient(OS_CONTAINER); + + Map response; + try { + response = + client.get(new GetRequest(dynamicIndex1, "1"), RequestOptions.DEFAULT) + .getSource(); + } catch (OpenSearchStatusException e) { + if (e.status() == RestStatus.NOT_FOUND) { + response = + client.get(new GetRequest(dynamicIndex2, "1"), RequestOptions.DEFAULT) + .getSource(); + } else { + throw e; + } + } + + Map expectedMap = new HashMap<>(); + expectedMap.put("a", 1); + expectedMap.put("b", "2012-12-12 12:12:12"); + Assertions.assertEquals(response, expectedMap); + } } diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkITCase.java b/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkITCase.java index e7378c3..38b9354 100644 --- a/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkITCase.java +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkITCase.java @@ -69,8 +69,7 @@ public void testOpensearchSinkWithSmile() throws Exception { @Test public void testNullAddresses() { try { - createOpensearchSink( - 1, getClusterName(), null, SourceSinkDataTestKit.getJsonSinkFunction("test")); + createOpensearchSink(1, null, SourceSinkDataTestKit.getJsonSinkFunction("test")); } catch (IllegalArgumentException | NullPointerException expectedException) { // test passes return; @@ -83,10 +82,7 @@ public void testNullAddresses() { public void testEmptyAddresses() { try { createOpensearchSink( - 1, - getClusterName(), - Collections.emptyList(), - SourceSinkDataTestKit.getJsonSinkFunction("test")); + 1, Collections.emptyList(), SourceSinkDataTestKit.getJsonSinkFunction("test")); } catch (IllegalArgumentException expectedException) { // test passes return; @@ -105,7 +101,6 @@ public void testInvalidOpensearchCluster() throws Exception { source.addSink( createOpensearchSinkForNode( 1, - "invalid-cluster-name", SourceSinkDataTestKit.getJsonSinkFunction("test"), "123.123.123.123")); // incorrect ip address @@ -119,13 +114,8 @@ public void testInvalidOpensearchCluster() throws Exception { fail(); } - private String getClusterName() { - return "docker-cluster"; - } - private OpensearchSink> createOpensearchSink( int bulkFlushMaxActions, - String clusterName, List httpHosts, OpensearchSinkFunction> opensearchSinkFunction) { @@ -138,7 +128,6 @@ private OpensearchSink> createOpensearchSink( private OpensearchSink> createOpensearchSinkForNode( int bulkFlushMaxActions, - String clusterName, OpensearchSinkFunction> opensearchSinkFunction, String hostAddress) { @@ -164,10 +153,7 @@ private void runOpensearchSinkTest( source.addSink( createOpensearchSinkForNode( - 1, - getClusterName(), - functionFactory.apply(index), - OS_CONTAINER.getHttpHostAddress())); + 1, functionFactory.apply(index), OS_CONTAINER.getHttpHostAddress())); env.execute("Opensearch Sink Test"); diff --git a/pom.xml b/pom.xml index 7f70545..116eb63 100644 --- a/pom.xml +++ b/pom.xml @@ -54,6 +54,7 @@ under the License. flink-connector-opensearch flink-connector-opensearch-e2e-tests + flink-sql-connector-opensearch @@ -377,23 +378,6 @@ under the License. - - - - - - sql-jars - - - !skipSqlJars - - - - flink-sql-connector-opensearch - - - - From 84b0ed8893314f5b6267d3dc83031edcc6a23e6f Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Tue, 22 Nov 2022 10:28:53 -0500 Subject: [PATCH 08/18] Addressing code review comments Signed-off-by: Andriy Redko --- .../sink/BulkProcessorBuilderFactory.java | 36 -------- .../opensearch/sink/OpensearchSink.java | 4 - .../sink/OpensearchSinkBuilder.java | 89 +------------------ .../opensearch/sink/OpensearchWriter.java | 64 +++++++++++-- .../util/IgnoringFailureHandler.java | 38 -------- .../RetryRejectedExecutionFailureHandler.java | 60 ------------- .../sink/OpensearchWriterITCase.java | 70 --------------- 7 files changed, 58 insertions(+), 303 deletions(-) delete mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/BulkProcessorBuilderFactory.java delete mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/util/IgnoringFailureHandler.java delete mode 100644 flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/util/RetryRejectedExecutionFailureHandler.java diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/BulkProcessorBuilderFactory.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/BulkProcessorBuilderFactory.java deleted file mode 100644 index ff284a6..0000000 --- a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/BulkProcessorBuilderFactory.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.connector.opensearch.sink; - -import org.apache.flink.annotation.Internal; -import org.apache.flink.util.function.TriFunction; - -import org.opensearch.action.bulk.BulkProcessor; -import org.opensearch.client.RestHighLevelClient; - -import java.io.Serializable; - -@Internal -interface BulkProcessorBuilderFactory - extends Serializable, - TriFunction< - RestHighLevelClient, - BulkProcessorConfig, - BulkProcessor.Listener, - BulkProcessor.Builder> {} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSink.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSink.java index d80c00a..1f2b21d 100644 --- a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSink.java +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSink.java @@ -56,7 +56,6 @@ public class OpensearchSink implements Sink { private final List hosts; private final OpensearchEmitter emitter; private final BulkProcessorConfig buildBulkProcessorConfig; - private final BulkProcessorBuilderFactory bulkProcessorBuilderFactory; private final NetworkClientConfig networkClientConfig; private final DeliveryGuarantee deliveryGuarantee; @@ -64,11 +63,9 @@ public class OpensearchSink implements Sink { List hosts, OpensearchEmitter emitter, DeliveryGuarantee deliveryGuarantee, - BulkProcessorBuilderFactory bulkProcessorBuilderFactory, BulkProcessorConfig buildBulkProcessorConfig, NetworkClientConfig networkClientConfig) { this.hosts = checkNotNull(hosts); - this.bulkProcessorBuilderFactory = checkNotNull(bulkProcessorBuilderFactory); checkArgument(!hosts.isEmpty(), "Hosts cannot be empty."); this.emitter = checkNotNull(emitter); this.deliveryGuarantee = checkNotNull(deliveryGuarantee); @@ -83,7 +80,6 @@ public SinkWriter createWriter(InitContext context) throws IOException { emitter, deliveryGuarantee == DeliveryGuarantee.AT_LEAST_ONCE, buildBulkProcessorConfig, - bulkProcessorBuilderFactory, networkClientConfig, context.metricGroup(), context.getMailboxExecutor()); diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkBuilder.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkBuilder.java index 0a34ab0..895ca03 100644 --- a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkBuilder.java +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkBuilder.java @@ -19,22 +19,10 @@ package org.apache.flink.connector.opensearch.sink; import org.apache.flink.annotation.PublicEvolving; -import org.apache.flink.api.common.ExecutionConfig; -import org.apache.flink.api.java.ClosureCleaner; import org.apache.flink.connector.base.DeliveryGuarantee; import org.apache.flink.util.InstantiationUtil; import org.apache.http.HttpHost; -import org.opensearch.action.ActionListener; -import org.opensearch.action.bulk.BackoffPolicy; -import org.opensearch.action.bulk.BulkProcessor; -import org.opensearch.action.bulk.BulkRequest; -import org.opensearch.action.bulk.BulkResponse; -import org.opensearch.client.RequestOptions; -import org.opensearch.client.RestHighLevelClient; -import org.opensearch.common.unit.ByteSizeUnit; -import org.opensearch.common.unit.ByteSizeValue; -import org.opensearch.common.unit.TimeValue; import java.util.Arrays; import java.util.List; @@ -297,72 +285,6 @@ public OpensearchSinkBuilder setAllowInsecure(boolean allowInsecure) { return self(); } - protected BulkProcessorBuilderFactory getBulkProcessorBuilderFactory() { - return new BulkProcessorBuilderFactory() { - @Override - public BulkProcessor.Builder apply( - RestHighLevelClient client, - BulkProcessorConfig bulkProcessorConfig, - BulkProcessor.Listener listener) { - BulkProcessor.Builder builder = - BulkProcessor.builder( - new BulkRequestConsumerFactory() { // This cannot be inlined as a - // lambda because then - // deserialization fails - @Override - public void accept( - BulkRequest bulkRequest, - ActionListener - bulkResponseActionListener) { - client.bulkAsync( - bulkRequest, - RequestOptions.DEFAULT, - bulkResponseActionListener); - } - }, - listener); - - if (bulkProcessorConfig.getBulkFlushMaxActions() != -1) { - builder.setBulkActions(bulkProcessorConfig.getBulkFlushMaxActions()); - } - - if (bulkProcessorConfig.getBulkFlushMaxMb() != -1) { - builder.setBulkSize( - new ByteSizeValue( - bulkProcessorConfig.getBulkFlushMaxMb(), ByteSizeUnit.MB)); - } - - if (bulkProcessorConfig.getBulkFlushInterval() != -1) { - builder.setFlushInterval( - new TimeValue(bulkProcessorConfig.getBulkFlushInterval())); - } - - BackoffPolicy backoffPolicy; - final TimeValue backoffDelay = - new TimeValue(bulkProcessorConfig.getBulkFlushBackOffDelay()); - final int maxRetryCount = bulkProcessorConfig.getBulkFlushBackoffRetries(); - switch (bulkProcessorConfig.getFlushBackoffType()) { - case CONSTANT: - backoffPolicy = BackoffPolicy.constantBackoff(backoffDelay, maxRetryCount); - break; - case EXPONENTIAL: - backoffPolicy = - BackoffPolicy.exponentialBackoff(backoffDelay, maxRetryCount); - break; - case NONE: - backoffPolicy = BackoffPolicy.noBackoff(); - break; - default: - throw new IllegalArgumentException( - "Received unknown backoff policy type " - + bulkProcessorConfig.getFlushBackoffType()); - } - builder.setBackoffPolicy(backoffPolicy); - return builder; - } - }; - } - /** * Constructs the {@link OpensearchSink} with the properties configured this builder. * @@ -375,17 +297,8 @@ public OpensearchSink build() { NetworkClientConfig networkClientConfig = buildNetworkClientConfig(); BulkProcessorConfig bulkProcessorConfig = buildBulkProcessorConfig(); - BulkProcessorBuilderFactory bulkProcessorBuilderFactory = getBulkProcessorBuilderFactory(); - ClosureCleaner.clean( - bulkProcessorBuilderFactory, ExecutionConfig.ClosureCleanerLevel.RECURSIVE, true); - return new OpensearchSink<>( - hosts, - emitter, - deliveryGuarantee, - bulkProcessorBuilderFactory, - bulkProcessorConfig, - networkClientConfig); + hosts, emitter, deliveryGuarantee, bulkProcessorConfig, networkClientConfig); } private NetworkClientConfig buildNetworkClientConfig() { diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchWriter.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchWriter.java index 0a88db3..3231b28 100644 --- a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchWriter.java +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchWriter.java @@ -33,7 +33,9 @@ import org.apache.http.conn.ssl.TrustAllStrategy; import org.apache.http.impl.client.BasicCredentialsProvider; import org.apache.http.ssl.SSLContexts; +import org.opensearch.action.ActionListener; import org.opensearch.action.DocWriteRequest; +import org.opensearch.action.bulk.BackoffPolicy; import org.opensearch.action.bulk.BulkItemResponse; import org.opensearch.action.bulk.BulkProcessor; import org.opensearch.action.bulk.BulkRequest; @@ -41,9 +43,13 @@ import org.opensearch.action.delete.DeleteRequest; import org.opensearch.action.index.IndexRequest; import org.opensearch.action.update.UpdateRequest; +import org.opensearch.client.RequestOptions; import org.opensearch.client.RestClient; import org.opensearch.client.RestClientBuilder; import org.opensearch.client.RestHighLevelClient; +import org.opensearch.common.unit.ByteSizeUnit; +import org.opensearch.common.unit.ByteSizeValue; +import org.opensearch.common.unit.TimeValue; import org.opensearch.rest.RestStatus; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -95,7 +101,6 @@ class OpensearchWriter implements SinkWriter { OpensearchEmitter emitter, boolean flushOnCheckpoint, BulkProcessorConfig bulkProcessorConfig, - BulkProcessorBuilderFactory bulkProcessorBuilderFactory, NetworkClientConfig networkClientConfig, SinkWriterMetricGroup metricGroup, MailboxExecutor mailboxExecutor) { @@ -107,7 +112,7 @@ class OpensearchWriter implements SinkWriter { configureRestClientBuilder( RestClient.builder(hosts.toArray(new HttpHost[0])), networkClientConfig)); - this.bulkProcessor = createBulkProcessor(bulkProcessorBuilderFactory, bulkProcessorConfig); + this.bulkProcessor = createBulkProcessor(bulkProcessorConfig); this.requestIndexer = new DefaultRequestIndexer(metricGroup.getNumRecordsSendCounter()); checkNotNull(metricGroup); metricGroup.setCurrentSendTimeGauge(() -> ackTime - lastSendTime); @@ -216,13 +221,58 @@ private static RestClientBuilder configureRestClientBuilder( return builder; } - private BulkProcessor createBulkProcessor( - BulkProcessorBuilderFactory bulkProcessorBuilderFactory, - BulkProcessorConfig bulkProcessorConfig) { + private BulkProcessor createBulkProcessor(BulkProcessorConfig bulkProcessorConfig) { + + final BulkProcessor.Builder builder = + BulkProcessor.builder( + new BulkRequestConsumerFactory() { // This cannot be inlined as a + // lambda because then + // deserialization fails + @Override + public void accept( + BulkRequest bulkRequest, + ActionListener bulkResponseActionListener) { + client.bulkAsync( + bulkRequest, + RequestOptions.DEFAULT, + bulkResponseActionListener); + } + }, + new BulkListener()); + + if (bulkProcessorConfig.getBulkFlushMaxActions() != -1) { + builder.setBulkActions(bulkProcessorConfig.getBulkFlushMaxActions()); + } - BulkProcessor.Builder builder = - bulkProcessorBuilderFactory.apply(client, bulkProcessorConfig, new BulkListener()); + if (bulkProcessorConfig.getBulkFlushMaxMb() != -1) { + builder.setBulkSize( + new ByteSizeValue(bulkProcessorConfig.getBulkFlushMaxMb(), ByteSizeUnit.MB)); + } + if (bulkProcessorConfig.getBulkFlushInterval() != -1) { + builder.setFlushInterval(new TimeValue(bulkProcessorConfig.getBulkFlushInterval())); + } + + BackoffPolicy backoffPolicy; + final TimeValue backoffDelay = + new TimeValue(bulkProcessorConfig.getBulkFlushBackOffDelay()); + final int maxRetryCount = bulkProcessorConfig.getBulkFlushBackoffRetries(); + switch (bulkProcessorConfig.getFlushBackoffType()) { + case CONSTANT: + backoffPolicy = BackoffPolicy.constantBackoff(backoffDelay, maxRetryCount); + break; + case EXPONENTIAL: + backoffPolicy = BackoffPolicy.exponentialBackoff(backoffDelay, maxRetryCount); + break; + case NONE: + backoffPolicy = BackoffPolicy.noBackoff(); + break; + default: + throw new IllegalArgumentException( + "Received unknown backoff policy type " + + bulkProcessorConfig.getFlushBackoffType()); + } + builder.setBackoffPolicy(backoffPolicy); // This makes flush() blocking builder.setConcurrentRequests(0); diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/util/IgnoringFailureHandler.java b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/util/IgnoringFailureHandler.java deleted file mode 100644 index a04cd55..0000000 --- a/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/util/IgnoringFailureHandler.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.opensearch.util; - -import org.apache.flink.annotation.Internal; -import org.apache.flink.streaming.connectors.opensearch.ActionRequestFailureHandler; -import org.apache.flink.streaming.connectors.opensearch.RequestIndexer; - -import org.opensearch.action.ActionRequest; - -/** Ignores all kinds of failures and drops the affected {@link ActionRequest}. */ -@Internal -public class IgnoringFailureHandler implements ActionRequestFailureHandler { - - private static final long serialVersionUID = 1662846593501L; - - @Override - public void onFailure( - ActionRequest action, Throwable failure, int restStatusCode, RequestIndexer indexer) { - // ignore failure - } -} diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/util/RetryRejectedExecutionFailureHandler.java b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/util/RetryRejectedExecutionFailureHandler.java deleted file mode 100644 index 922004e..0000000 --- a/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/util/RetryRejectedExecutionFailureHandler.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.connectors.opensearch.util; - -import org.apache.flink.annotation.PublicEvolving; -import org.apache.flink.streaming.connectors.opensearch.ActionRequestFailureHandler; -import org.apache.flink.streaming.connectors.opensearch.RequestIndexer; -import org.apache.flink.util.ExceptionUtils; - -import org.opensearch.action.ActionRequest; -import org.opensearch.common.util.concurrent.OpenSearchRejectedExecutionException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * An {@link ActionRequestFailureHandler} that re-adds requests that failed due to temporary {@link - * OpenSearchRejectedExecutionException}s (which means that Opensearch node queues are currently - * full), and fails for all other failures. - * - * @deprecated This hase been deprecated and will be removed in the future. - */ -@Deprecated -@PublicEvolving -public class RetryRejectedExecutionFailureHandler implements ActionRequestFailureHandler { - - private static final long serialVersionUID = -7423562912824511906L; - - private static final Logger LOG = - LoggerFactory.getLogger(RetryRejectedExecutionFailureHandler.class); - - @Override - public void onFailure( - ActionRequest action, Throwable failure, int restStatusCode, RequestIndexer indexer) - throws Throwable { - LOG.error("Failed Opensearch item request: {}", failure.getMessage(), failure); - if (ExceptionUtils.findThrowable(failure, OpenSearchRejectedExecutionException.class) - .isPresent()) { - indexer.add(action); - } else { - // rethrow all other failures - throw failure; - } - } -} diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchWriterITCase.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchWriterITCase.java index 16ebf84..523dff7 100644 --- a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchWriterITCase.java +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchWriterITCase.java @@ -39,19 +39,10 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; -import org.opensearch.action.ActionListener; -import org.opensearch.action.bulk.BackoffPolicy; -import org.opensearch.action.bulk.BulkProcessor; -import org.opensearch.action.bulk.BulkRequest; -import org.opensearch.action.bulk.BulkResponse; import org.opensearch.action.delete.DeleteRequest; import org.opensearch.action.index.IndexRequest; import org.opensearch.action.update.UpdateRequest; -import org.opensearch.client.RequestOptions; import org.opensearch.client.RestHighLevelClient; -import org.opensearch.common.unit.ByteSizeUnit; -import org.opensearch.common.unit.ByteSizeValue; -import org.opensearch.common.unit.TimeValue; import org.opensearch.testcontainers.OpensearchContainer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -268,7 +259,6 @@ private OpensearchWriter> createWriter( new UpdatingEmitter(index, context.getDataFieldName()), flushOnCheckpoint, bulkProcessorConfig, - new TestBulkProcessorBuilderFactory(), new NetworkClientConfig( OS_CONTAINER.getUsername(), OS_CONTAINER.getPassword(), @@ -281,66 +271,6 @@ private OpensearchWriter> createWriter( new TestMailbox()); } - private static class TestBulkProcessorBuilderFactory implements BulkProcessorBuilderFactory { - @Override - public BulkProcessor.Builder apply( - RestHighLevelClient client, - BulkProcessorConfig bulkProcessorConfig, - BulkProcessor.Listener listener) { - BulkProcessor.Builder builder = - BulkProcessor.builder( - new BulkRequestConsumerFactory() { // This cannot be inlined as a lambda - // because then deserialization fails - @Override - public void accept( - BulkRequest bulkRequest, - ActionListener bulkResponseActionListener) { - client.bulkAsync( - bulkRequest, - RequestOptions.DEFAULT, - bulkResponseActionListener); - } - }, - listener); - - if (bulkProcessorConfig.getBulkFlushMaxActions() != -1) { - builder.setBulkActions(bulkProcessorConfig.getBulkFlushMaxActions()); - } - - if (bulkProcessorConfig.getBulkFlushMaxMb() != -1) { - builder.setBulkSize( - new ByteSizeValue( - bulkProcessorConfig.getBulkFlushMaxMb(), ByteSizeUnit.MB)); - } - - if (bulkProcessorConfig.getBulkFlushInterval() != -1) { - builder.setFlushInterval(new TimeValue(bulkProcessorConfig.getBulkFlushInterval())); - } - - BackoffPolicy backoffPolicy; - final TimeValue backoffDelay = - new TimeValue(bulkProcessorConfig.getBulkFlushBackOffDelay()); - final int maxRetryCount = bulkProcessorConfig.getBulkFlushBackoffRetries(); - switch (bulkProcessorConfig.getFlushBackoffType()) { - case CONSTANT: - backoffPolicy = BackoffPolicy.constantBackoff(backoffDelay, maxRetryCount); - break; - case EXPONENTIAL: - backoffPolicy = BackoffPolicy.exponentialBackoff(backoffDelay, maxRetryCount); - break; - case NONE: - backoffPolicy = BackoffPolicy.noBackoff(); - break; - default: - throw new IllegalArgumentException( - "Received unknown backoff policy type " - + bulkProcessorConfig.getFlushBackoffType()); - } - builder.setBackoffPolicy(backoffPolicy); - return builder; - } - } - private static class UpdatingEmitter implements OpensearchEmitter> { private static final long serialVersionUID = 1L; From 38f9783dc64c3c812f33625eafd0ac97c6bf3339 Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Tue, 22 Nov 2022 10:42:56 -0500 Subject: [PATCH 09/18] Addressing code review comments Signed-off-by: Andriy Redko --- .../opensearch/table/OpensearchDynamicSink.java | 12 +++--------- .../table/OpensearchDynamicSinkFactory.java | 8 -------- 2 files changed, 3 insertions(+), 17 deletions(-) diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSink.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSink.java index 9550ac4..5c880d3 100644 --- a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSink.java +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSink.java @@ -57,7 +57,6 @@ class OpensearchDynamicSink implements DynamicTableSink { final ZoneId localTimeZoneId; final String summaryString; - final OpensearchSinkBuilderSupplier builderSupplier; final boolean isDynamicIndexWithSystemTime; OpensearchDynamicSink( @@ -66,14 +65,12 @@ class OpensearchDynamicSink implements DynamicTableSink { List primaryKeyLogicalTypesWithIndex, DataType physicalRowDataType, String summaryString, - OpensearchSinkBuilderSupplier builderSupplier, ZoneId localTimeZoneId) { this.format = checkNotNull(format); this.physicalRowDataType = checkNotNull(physicalRowDataType); this.primaryKeyLogicalTypesWithIndex = checkNotNull(primaryKeyLogicalTypesWithIndex); this.config = checkNotNull(config); this.summaryString = checkNotNull(summaryString); - this.builderSupplier = checkNotNull(builderSupplier); this.localTimeZoneId = localTimeZoneId; this.isDynamicIndexWithSystemTime = isDynamicIndexWithSystemTime(); } @@ -120,7 +117,7 @@ public SinkRuntimeProvider getSinkRuntimeProvider(Context context) { new RowOpensearchEmitter( createIndexGenerator(), format, XContentType.JSON, createKeyExtractor()); - OpensearchSinkBuilder builder = builderSupplier.get(); + final OpensearchSinkBuilder builder = new OpensearchSinkBuilder<>(); builder.setEmitter(rowOpensearchEmitter); builder.setHosts(config.getHosts().toArray(new HttpHost[0])); builder.setDeliveryGuarantee(config.getDeliveryGuarantee()); @@ -179,7 +176,6 @@ public DynamicTableSink copy() { primaryKeyLogicalTypesWithIndex, physicalRowDataType, summaryString, - builderSupplier, localTimeZoneId); } @@ -202,8 +198,7 @@ public boolean equals(Object o) { && Objects.equals( primaryKeyLogicalTypesWithIndex, that.primaryKeyLogicalTypesWithIndex) && Objects.equals(config, that.config) - && Objects.equals(summaryString, that.summaryString) - && Objects.equals(builderSupplier, that.builderSupplier); + && Objects.equals(summaryString, that.summaryString); } @Override @@ -213,7 +208,6 @@ public int hashCode() { physicalRowDataType, primaryKeyLogicalTypesWithIndex, config, - summaryString, - builderSupplier); + summaryString); } } diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkFactory.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkFactory.java index 68f30de..f4cb45c 100644 --- a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkFactory.java +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkFactory.java @@ -22,7 +22,6 @@ import org.apache.flink.api.common.serialization.SerializationSchema; import org.apache.flink.configuration.ConfigOption; import org.apache.flink.configuration.ReadableConfig; -import org.apache.flink.connector.opensearch.sink.OpensearchSinkBuilder; import org.apache.flink.table.api.ValidationException; import org.apache.flink.table.api.config.TableConfigOptions; import org.apache.flink.table.catalog.Column; @@ -73,12 +72,6 @@ public class OpensearchDynamicSinkFactory implements DynamicTableSinkFactory { private static final String FACTORY_IDENTIFIER = "opensearch"; - private final OpensearchSinkBuilderSupplier sinkBuilderSupplier; - - public OpensearchDynamicSinkFactory() { - this.sinkBuilderSupplier = OpensearchSinkBuilder::new; - } - @Override public DynamicTableSink createDynamicTableSink(Context context) { List primaryKeyLogicalTypesWithIndex = @@ -99,7 +92,6 @@ public DynamicTableSink createDynamicTableSink(Context context) { primaryKeyLogicalTypesWithIndex, context.getPhysicalRowDataType(), capitalize(FACTORY_IDENTIFIER), - sinkBuilderSupplier, getLocalTimeZoneId(context.getConfiguration())); } From c23b7000ba7fe00821dd74617ba2dd28dfc64fe0 Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Tue, 22 Nov 2022 13:00:13 -0500 Subject: [PATCH 10/18] Added architecture tests Signed-off-by: Andriy Redko --- .../0c16f106-1632-4ba5-aa26-eb3ab7c7d43e | 1 + .../0c6c8466-9ce6-41a7-b6dd-947cc5702975 | 0 .../4382f1f0-807a-45ff-97d8-42f72b6e9484 | 18 ++++++ .../73099dba-7f06-4637-b2ad-b3c906aaaf1c | 0 .../7c5588a1-e67a-4f85-a1c2-6be9dfe44c09 | 0 .../85d671bb-83fe-41ff-bc35-bd171908a156 | 0 .../879defc0-4d7e-43cb-ace6-1eb5aec9bac5 | 0 .../c921f20b-c7ee-4fd5-a8e9-c19ac7a59f20 | 0 .../archunit-violations/stored.rules | 10 ++++ flink-connector-opensearch/pom.xml | 8 +++ .../ProductionCodeArchitectureTest.java | 41 +++++++++++++ .../TestCodeArchitectureTest.java | 57 +++++++++++++++++++ .../src/test/resources/archunit.properties | 31 ++++++++++ 13 files changed, 166 insertions(+) create mode 100644 flink-connector-opensearch/archunit-violations/0c16f106-1632-4ba5-aa26-eb3ab7c7d43e create mode 100644 flink-connector-opensearch/archunit-violations/0c6c8466-9ce6-41a7-b6dd-947cc5702975 create mode 100644 flink-connector-opensearch/archunit-violations/4382f1f0-807a-45ff-97d8-42f72b6e9484 create mode 100644 flink-connector-opensearch/archunit-violations/73099dba-7f06-4637-b2ad-b3c906aaaf1c create mode 100644 flink-connector-opensearch/archunit-violations/7c5588a1-e67a-4f85-a1c2-6be9dfe44c09 create mode 100644 flink-connector-opensearch/archunit-violations/85d671bb-83fe-41ff-bc35-bd171908a156 create mode 100644 flink-connector-opensearch/archunit-violations/879defc0-4d7e-43cb-ace6-1eb5aec9bac5 create mode 100644 flink-connector-opensearch/archunit-violations/c921f20b-c7ee-4fd5-a8e9-c19ac7a59f20 create mode 100644 flink-connector-opensearch/archunit-violations/stored.rules create mode 100644 flink-connector-opensearch/src/test/java/org/apache/flink/architecture/ProductionCodeArchitectureTest.java create mode 100644 flink-connector-opensearch/src/test/java/org/apache/flink/architecture/TestCodeArchitectureTest.java create mode 100644 flink-connector-opensearch/src/test/resources/archunit.properties diff --git a/flink-connector-opensearch/archunit-violations/0c16f106-1632-4ba5-aa26-eb3ab7c7d43e b/flink-connector-opensearch/archunit-violations/0c16f106-1632-4ba5-aa26-eb3ab7c7d43e new file mode 100644 index 0000000..338d495 --- /dev/null +++ b/flink-connector-opensearch/archunit-violations/0c16f106-1632-4ba5-aa26-eb3ab7c7d43e @@ -0,0 +1 @@ +org.apache.flink.connector.opensearch.sink.OpensearchEmitter.emit(java.lang.Object, org.apache.flink.api.connector.sink2.SinkWriter$Context, org.apache.flink.connector.opensearch.sink.RequestIndexer): Argument leaf type org.apache.flink.connector.opensearch.sink.RequestIndexer does not satisfy: reside outside of package 'org.apache.flink..' or reside in any package ['..shaded..'] or annotated with @Public or annotated with @PublicEvolving or annotated with @Deprecated \ No newline at end of file diff --git a/flink-connector-opensearch/archunit-violations/0c6c8466-9ce6-41a7-b6dd-947cc5702975 b/flink-connector-opensearch/archunit-violations/0c6c8466-9ce6-41a7-b6dd-947cc5702975 new file mode 100644 index 0000000..e69de29 diff --git a/flink-connector-opensearch/archunit-violations/4382f1f0-807a-45ff-97d8-42f72b6e9484 b/flink-connector-opensearch/archunit-violations/4382f1f0-807a-45ff-97d8-42f72b6e9484 new file mode 100644 index 0000000..02ea2fd --- /dev/null +++ b/flink-connector-opensearch/archunit-violations/4382f1f0-807a-45ff-97d8-42f72b6e9484 @@ -0,0 +1,18 @@ +org.apache.flink.connector.opensearch.sink.OpensearchSinkITCase does not satisfy: only one of the following predicates match:\ +* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ +* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension\ +* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ +* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ + or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule +org.apache.flink.connector.opensearch.sink.OpensearchWriterITCase does not satisfy: only one of the following predicates match:\ +* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ +* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension\ +* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ +* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ + or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule +org.apache.flink.connector.opensearch.table.OpensearchDynamicSinkITCase does not satisfy: only one of the following predicates match:\ +* reside in a package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type InternalMiniClusterExtension and annotated with @RegisterExtension\ +* reside outside of package 'org.apache.flink.runtime.*' and contain any fields that are static, final, and of type MiniClusterExtension and annotated with @RegisterExtension\ +* reside in a package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class InternalMiniClusterExtension\ +* reside outside of package 'org.apache.flink.runtime.*' and is annotated with @ExtendWith with class MiniClusterExtension\ + or contain any fields that are public, static, and of type MiniClusterWithClientResource and final and annotated with @ClassRule or contain any fields that is of type MiniClusterWithClientResource and public and final and not static and annotated with @Rule \ No newline at end of file diff --git a/flink-connector-opensearch/archunit-violations/73099dba-7f06-4637-b2ad-b3c906aaaf1c b/flink-connector-opensearch/archunit-violations/73099dba-7f06-4637-b2ad-b3c906aaaf1c new file mode 100644 index 0000000..e69de29 diff --git a/flink-connector-opensearch/archunit-violations/7c5588a1-e67a-4f85-a1c2-6be9dfe44c09 b/flink-connector-opensearch/archunit-violations/7c5588a1-e67a-4f85-a1c2-6be9dfe44c09 new file mode 100644 index 0000000..e69de29 diff --git a/flink-connector-opensearch/archunit-violations/85d671bb-83fe-41ff-bc35-bd171908a156 b/flink-connector-opensearch/archunit-violations/85d671bb-83fe-41ff-bc35-bd171908a156 new file mode 100644 index 0000000..e69de29 diff --git a/flink-connector-opensearch/archunit-violations/879defc0-4d7e-43cb-ace6-1eb5aec9bac5 b/flink-connector-opensearch/archunit-violations/879defc0-4d7e-43cb-ace6-1eb5aec9bac5 new file mode 100644 index 0000000..e69de29 diff --git a/flink-connector-opensearch/archunit-violations/c921f20b-c7ee-4fd5-a8e9-c19ac7a59f20 b/flink-connector-opensearch/archunit-violations/c921f20b-c7ee-4fd5-a8e9-c19ac7a59f20 new file mode 100644 index 0000000..e69de29 diff --git a/flink-connector-opensearch/archunit-violations/stored.rules b/flink-connector-opensearch/archunit-violations/stored.rules new file mode 100644 index 0000000..ce6d53d --- /dev/null +++ b/flink-connector-opensearch/archunit-violations/stored.rules @@ -0,0 +1,10 @@ +# +#Tue Nov 22 12:58:19 EST 2022 +Return\ and\ argument\ types\ of\ methods\ annotated\ with\ @Public\ must\ be\ annotated\ with\ @Public.=879defc0-4d7e-43cb-ace6-1eb5aec9bac5 +Connector\ production\ code\ must\ not\ depend\ on\ non-public\ API\ outside\ of\ connector\ packages=85d671bb-83fe-41ff-bc35-bd171908a156 +ITCASE\ tests\ should\ use\ a\ MiniCluster\ resource\ or\ extension=4382f1f0-807a-45ff-97d8-42f72b6e9484 +Production\ code\ must\ not\ call\ methods\ annotated\ with\ @VisibleForTesting=73099dba-7f06-4637-b2ad-b3c906aaaf1c +Options\ for\ connectors\ and\ formats\ should\ reside\ in\ a\ consistent\ package\ and\ be\ public\ API.=7c5588a1-e67a-4f85-a1c2-6be9dfe44c09 +Tests\ inheriting\ from\ AbstractTestBase\ should\ have\ name\ ending\ with\ ITCase=0c6c8466-9ce6-41a7-b6dd-947cc5702975 +Return\ and\ argument\ types\ of\ methods\ annotated\ with\ @PublicEvolving\ must\ be\ annotated\ with\ @Public(Evolving).=0c16f106-1632-4ba5-aa26-eb3ab7c7d43e +Classes\ in\ API\ packages\ should\ have\ at\ least\ one\ API\ visibility\ annotation.=c921f20b-c7ee-4fd5-a8e9-c19ac7a59f20 diff --git a/flink-connector-opensearch/pom.xml b/flink-connector-opensearch/pom.xml index 072d6c5..ab4f718 100644 --- a/flink-connector-opensearch/pom.xml +++ b/flink-connector-opensearch/pom.xml @@ -166,6 +166,14 @@ under the License. opensearch-testcontainers test + + + + + org.apache.flink + flink-architecture-tests-test + test + diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/architecture/ProductionCodeArchitectureTest.java b/flink-connector-opensearch/src/test/java/org/apache/flink/architecture/ProductionCodeArchitectureTest.java new file mode 100644 index 0000000..5b9811c --- /dev/null +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/architecture/ProductionCodeArchitectureTest.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.architecture; + +import org.apache.flink.architecture.common.ImportOptions; + +import com.tngtech.archunit.core.importer.ImportOption; +import com.tngtech.archunit.junit.AnalyzeClasses; +import com.tngtech.archunit.junit.ArchTest; +import com.tngtech.archunit.junit.ArchTests; + +/** product code Architecture tests. */ +@AnalyzeClasses( + packages = "org.apache.flink.connector", + importOptions = { + ImportOption.DoNotIncludeTests.class, + ImportOption.DoNotIncludeArchives.class, + ImportOptions.ExcludeScalaImportOption.class, + ImportOptions.ExcludeShadedImportOption.class + }) +public class ProductionCodeArchitectureTest { + + @ArchTest + public static final ArchTests COMMON_TESTS = ArchTests.in(ProductionCodeArchitectureBase.class); +} diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/architecture/TestCodeArchitectureTest.java b/flink-connector-opensearch/src/test/java/org/apache/flink/architecture/TestCodeArchitectureTest.java new file mode 100644 index 0000000..9692731 --- /dev/null +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/architecture/TestCodeArchitectureTest.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.architecture; + +import org.apache.flink.architecture.common.ImportOptions; + +import com.tngtech.archunit.core.importer.ImportOption; +import com.tngtech.archunit.core.importer.Location; +import com.tngtech.archunit.junit.AnalyzeClasses; +import com.tngtech.archunit.junit.ArchTest; +import com.tngtech.archunit.junit.ArchTests; + +import java.util.regex.Pattern; + +/** Architecture tests for test code. */ +@AnalyzeClasses( + packages = { + "org.apache.flink.connector.opensearch", + "org.apache.flink.streaming.connectors.opensearch" + }, + importOptions = { + ImportOption.OnlyIncludeTests.class, + TestCodeArchitectureTest.IncludeES7ImportOption.class, + ImportOptions.ExcludeScalaImportOption.class, + ImportOptions.ExcludeShadedImportOption.class + }) +public class TestCodeArchitectureTest { + + @ArchTest + public static final ArchTests COMMON_TESTS = ArchTests.in(TestCodeArchitectureTestBase.class); + + /** Only include ES7 related locations. */ + public static final class IncludeES7ImportOption implements ImportOption { + private static final Pattern OPENSEARCH = Pattern.compile(".*opensearch.*"); + + @Override + public boolean includes(Location location) { + return location.matches(OPENSEARCH); + } + } +} diff --git a/flink-connector-opensearch/src/test/resources/archunit.properties b/flink-connector-opensearch/src/test/resources/archunit.properties new file mode 100644 index 0000000..15be88c --- /dev/null +++ b/flink-connector-opensearch/src/test/resources/archunit.properties @@ -0,0 +1,31 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# By default we allow removing existing violations, but fail when new violations are added. +freeze.store.default.allowStoreUpdate=true + +# Enable this if a new (frozen) rule has been added in order to create the initial store and record the existing violations. +#freeze.store.default.allowStoreCreation=true + +# Enable this to add allow new violations to be recorded. +# NOTE: Adding new violations should be avoided when possible. If the rule was correct to flag a new +# violation, please try to avoid creating the violation. If the violation was created due to a +# shortcoming of the rule, file a JIRA issue so the rule can be improved. +#freeze.refreeze=true + +freeze.store.default.path=archunit-violations From 2d1456b44ad1e5eb211068fbe07e2386e15a06ca Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Tue, 22 Nov 2022 14:39:30 -0500 Subject: [PATCH 11/18] Replaces hamcrest / junit assertions with assertj ones Signed-off-by: Andriy Redko --- .../sink/OpensearchSinkBuilderTest.java | 44 +++++++------- .../opensearch/sink/OpensearchSinkITCase.java | 10 ++-- .../opensearch/sink/OpensearchTestClient.java | 12 ++-- .../sink/OpensearchWriterITCase.java | 10 ++-- .../opensearch/table/IndexGeneratorTest.java | 58 ++++++++++--------- .../opensearch/table/KeyExtractorTest.java | 15 ++--- .../OpensearchDynamicSinkFactoryTest.java | 12 ++-- .../table/OpensearchDynamicSinkITCase.java | 12 ++-- .../opensearch/OpensearchSinkITCase.java | 54 +++++++---------- .../testutils/SourceSinkDataTestKit.java | 5 +- 10 files changed, 113 insertions(+), 119 deletions(-) diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkBuilderTest.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkBuilderTest.java index 3657145..cba2ddc 100644 --- a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkBuilderTest.java +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkBuilderTest.java @@ -30,8 +30,8 @@ import java.util.stream.Stream; import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; -import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.assertj.core.api.Assertions.assertThatNoException; +import static org.assertj.core.api.Assertions.assertThatThrownBy; /** Tests for {@link OpensearchSinkBuilder}. */ @ExtendWith(TestLoggerExtension.class) @@ -54,7 +54,7 @@ Stream testValidBuilders() { return DynamicTest.stream( validBuilders, OpensearchSinkBuilder::toString, - builder -> assertDoesNotThrow(builder::build)); + builder -> assertThatNoException().isThrownBy(builder::build)); } @Test @@ -65,36 +65,38 @@ void testDefaultDeliveryGuarantee() { @Test void testThrowIfExactlyOnceConfigured() { - assertThrows( - IllegalStateException.class, - () -> createMinimalBuilder().setDeliveryGuarantee(DeliveryGuarantee.EXACTLY_ONCE)); + assertThatThrownBy( + () -> + createMinimalBuilder() + .setDeliveryGuarantee(DeliveryGuarantee.EXACTLY_ONCE)) + .isInstanceOf(IllegalStateException.class); } @Test void testThrowIfHostsNotSet() { - assertThrows( - NullPointerException.class, - () -> createEmptyBuilder().setEmitter((element, indexer, context) -> {}).build()); + assertThatThrownBy( + () -> + createEmptyBuilder() + .setEmitter((element, indexer, context) -> {}) + .build()) + .isInstanceOf(NullPointerException.class); } @Test void testThrowIfEmitterNotSet() { - assertThrows( - NullPointerException.class, - () -> createEmptyBuilder().setHosts(new HttpHost("localhost:3000")).build()); + assertThatThrownBy( + () -> createEmptyBuilder().setHosts(new HttpHost("localhost:3000")).build()) + .isInstanceOf(NullPointerException.class); } @Test void testThrowIfSetInvalidTimeouts() { - assertThrows( - IllegalStateException.class, - () -> createEmptyBuilder().setConnectionRequestTimeout(-1).build()); - assertThrows( - IllegalStateException.class, - () -> createEmptyBuilder().setConnectionTimeout(-1).build()); - assertThrows( - IllegalStateException.class, - () -> createEmptyBuilder().setSocketTimeout(-1).build()); + assertThatThrownBy(() -> createEmptyBuilder().setConnectionRequestTimeout(-1).build()) + .isInstanceOf(IllegalStateException.class); + assertThatThrownBy(() -> createEmptyBuilder().setConnectionTimeout(-1).build()) + .isInstanceOf(IllegalStateException.class); + assertThatThrownBy(() -> createEmptyBuilder().setSocketTimeout(-1).build()) + .isInstanceOf(IllegalStateException.class); } private OpensearchSinkBuilder createEmptyBuilder() { diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkITCase.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkITCase.java index 084485b..c85e42b 100644 --- a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkITCase.java +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchSinkITCase.java @@ -52,9 +52,7 @@ import java.util.UUID; import java.util.function.BiFunction; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertSame; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.assertj.core.api.Assertions.assertThat; /** Tests for {@link OpensearchSink}. */ @Testcontainers @@ -94,9 +92,9 @@ void testWriteToOpensearchWithDeliveryGuarantee(DeliveryGuarantee deliveryGuaran runTest(index, false, TestEmitter::jsonEmitter, deliveryGuarantee, null); } catch (IllegalStateException e) { failure = true; - assertSame(deliveryGuarantee, DeliveryGuarantee.EXACTLY_ONCE); + assertThat(deliveryGuarantee).isSameAs(DeliveryGuarantee.EXACTLY_ONCE); } finally { - assertEquals(failure, deliveryGuarantee == DeliveryGuarantee.EXACTLY_ONCE); + assertThat(failure).isEqualTo(deliveryGuarantee == DeliveryGuarantee.EXACTLY_ONCE); } } @@ -113,7 +111,7 @@ void testWriteJsonToOpensearch( void testRecovery() throws Exception { final String index = "test-recovery-opensearch-sink"; runTest(index, true, TestEmitter::jsonEmitter, new FailingMapper()); - assertTrue(failed); + assertThat(failed).isTrue(); } private void runTest( diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchTestClient.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchTestClient.java index 021ad37..322ffc1 100644 --- a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchTestClient.java +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchTestClient.java @@ -25,8 +25,7 @@ import java.io.IOException; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.assertj.core.api.Assertions.assertThat; class OpensearchTestClient { private static final String DATA_FIELD_NAME = "data"; @@ -44,10 +43,11 @@ void assertThatIdsAreNotWritten(String index, int... ids) throws IOException { for (final int id : ids) { try { final GetResponse response = getResponse(index, id); - assertFalse( - response.isExists(), String.format("Id %s is unexpectedly present.", id)); + assertThat(response.isExists()) + .isFalse() + .as(String.format("Id %s is unexpectedly present.", id)); } catch (OpenSearchStatusException e) { - assertEquals(404, e.status().getStatus()); + assertThat(e.status().getStatus()).isEqualTo(404); } } } @@ -60,7 +60,7 @@ void assertThatIdsAreWritten(String index, int... ids) response = getResponse(index, id); Thread.sleep(10); } while (response.isSourceEmpty()); - assertEquals(buildMessage(id), response.getSource().get(DATA_FIELD_NAME)); + assertThat(response.getSource().get(DATA_FIELD_NAME)).isEqualTo(buildMessage(id)); } } diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchWriterITCase.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchWriterITCase.java index 523dff7..11a17fc 100644 --- a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchWriterITCase.java +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/sink/OpensearchWriterITCase.java @@ -57,8 +57,6 @@ import static org.apache.flink.connector.opensearch.sink.OpensearchTestClient.buildMessage; import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; /** Tests for {@link OpensearchWriter}. */ @Testcontainers @@ -178,20 +176,20 @@ void testIncrementByteOutMetric() throws Exception { try (final OpensearchWriter> writer = createWriter(index, false, bulkProcessorConfig, metricGroup)) { final Counter numBytesOut = operatorIOMetricGroup.getNumBytesOutCounter(); - assertEquals(numBytesOut.getCount(), 0); + assertThat(numBytesOut.getCount()).isEqualTo(0); writer.write(Tuple2.of(1, buildMessage(1)), null); writer.write(Tuple2.of(2, buildMessage(2)), null); writer.blockingFlushAllActions(); long first = numBytesOut.getCount(); - assertTrue(first > 0); + assertThat(first).isGreaterThan(0); writer.write(Tuple2.of(1, buildMessage(1)), null); writer.write(Tuple2.of(2, buildMessage(2)), null); writer.blockingFlushAllActions(); - assertTrue(numBytesOut.getCount() > first); + assertThat(numBytesOut.getCount()).isGreaterThan(first); } } @@ -235,7 +233,7 @@ void testCurrentSendTime() throws Exception { writer.blockingFlushAllActions(); - assertTrue(currentSendTime.isPresent()); + assertThat(currentSendTime).isPresent(); assertThat(currentSendTime.get().getValue()).isGreaterThan(0L); } } diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/IndexGeneratorTest.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/IndexGeneratorTest.java index 35c3d6d..7e88328 100644 --- a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/IndexGeneratorTest.java +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/IndexGeneratorTest.java @@ -26,7 +26,6 @@ import org.apache.flink.table.data.TimestampData; import org.apache.flink.table.types.DataType; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import java.sql.Date; @@ -40,6 +39,8 @@ import java.util.Arrays; import java.util.List; +import static org.assertj.core.api.Assertions.assertThat; + /** Suite tests for {@link IndexGenerator}. */ class IndexGeneratorTest { @@ -110,12 +111,12 @@ public void testDynamicIndexFromTimestamp() { IndexGeneratorFactory.createIndexGenerator( "{order_timestamp|yyyy_MM_dd_HH-ss}_index", fieldNames, dataTypes); indexGenerator.open(); - Assertions.assertEquals("2020_03_18_12-14_index", indexGenerator.generate(rows.get(0))); + assertThat(indexGenerator.generate(rows.get(0))).isEqualTo("2020_03_18_12-14_index"); IndexGenerator indexGenerator1 = IndexGeneratorFactory.createIndexGenerator( "{order_timestamp|yyyy_MM_dd_HH_mm}_index", fieldNames, dataTypes); indexGenerator1.open(); - Assertions.assertEquals("2020_03_19_12_22_index", indexGenerator1.generate(rows.get(1))); + assertThat(indexGenerator1.generate(rows.get(1))).isEqualTo("2020_03_19_12_22_index"); } @Test @@ -124,12 +125,12 @@ public void testDynamicIndexFromLocalDateTime() { IndexGeneratorFactory.createIndexGenerator( "{local_datetime|yyyy_MM_dd_HH-ss}_index", fieldNames, dataTypes); indexGenerator.open(); - Assertions.assertEquals("2020_03_18_12-14_index", indexGenerator.generate(rows.get(0))); + assertThat(indexGenerator.generate(rows.get(0))).isEqualTo("2020_03_18_12-14_index"); IndexGenerator indexGenerator1 = IndexGeneratorFactory.createIndexGenerator( "{local_datetime|yyyy_MM_dd_HH_mm}_index", fieldNames, dataTypes); indexGenerator1.open(); - Assertions.assertEquals("2020_03_19_12_22_index", indexGenerator1.generate(rows.get(1))); + assertThat(indexGenerator1.generate(rows.get(1))).isEqualTo("2020_03_19_12_22_index"); } @Test @@ -138,8 +139,8 @@ public void testDynamicIndexFromDate() { IndexGeneratorFactory.createIndexGenerator( "my-index-{log_date|yyyy/MM/dd}", fieldNames, dataTypes); indexGenerator.open(); - Assertions.assertEquals("my-index-2020/03/18", indexGenerator.generate(rows.get(0))); - Assertions.assertEquals("my-index-2020/03/19", indexGenerator.generate(rows.get(1))); + assertThat(indexGenerator.generate(rows.get(0))).isEqualTo("my-index-2020/03/18"); + assertThat(indexGenerator.generate(rows.get(1))).isEqualTo("my-index-2020/03/19"); } @Test @@ -148,8 +149,8 @@ public void testDynamicIndexFromLocalDate() { IndexGeneratorFactory.createIndexGenerator( "my-index-{local_date|yyyy/MM/dd}", fieldNames, dataTypes); indexGenerator.open(); - Assertions.assertEquals("my-index-2020/03/18", indexGenerator.generate(rows.get(0))); - Assertions.assertEquals("my-index-2020/03/19", indexGenerator.generate(rows.get(1))); + assertThat(indexGenerator.generate(rows.get(0))).isEqualTo("my-index-2020/03/18"); + assertThat(indexGenerator.generate(rows.get(1))).isEqualTo("my-index-2020/03/19"); } @Test @@ -158,8 +159,8 @@ public void testDynamicIndexFromTime() { IndexGeneratorFactory.createIndexGenerator( "my-index-{log_time|HH-mm}", fieldNames, dataTypes); indexGenerator.open(); - Assertions.assertEquals("my-index-12-12", indexGenerator.generate(rows.get(0))); - Assertions.assertEquals("my-index-12-22", indexGenerator.generate(rows.get(1))); + assertThat(indexGenerator.generate(rows.get(0))).isEqualTo("my-index-12-12"); + assertThat(indexGenerator.generate(rows.get(1))).isEqualTo("my-index-12-22"); } @Test @@ -168,8 +169,8 @@ public void testDynamicIndexFromLocalTime() { IndexGeneratorFactory.createIndexGenerator( "my-index-{local_time|HH-mm}", fieldNames, dataTypes); indexGenerator.open(); - Assertions.assertEquals("my-index-12-13", indexGenerator.generate(rows.get(0))); - Assertions.assertEquals("my-index-12-13", indexGenerator.generate(rows.get(1))); + assertThat(indexGenerator.generate(rows.get(0))).isEqualTo("my-index-12-13"); + assertThat(indexGenerator.generate(rows.get(1))).isEqualTo("my-index-12-13"); } @Test @@ -178,8 +179,8 @@ public void testDynamicIndexDefaultFormat() { IndexGeneratorFactory.createIndexGenerator( "my-index-{local_time|}", fieldNames, dataTypes); indexGenerator.open(); - Assertions.assertEquals("my-index-12_13_14", indexGenerator.generate(rows.get(0))); - Assertions.assertEquals("my-index-12_13_14", indexGenerator.generate(rows.get(1))); + assertThat(indexGenerator.generate(rows.get(0))).isEqualTo("my-index-12_13_14"); + assertThat(indexGenerator.generate(rows.get(1))).isEqualTo("my-index-12_13_14"); } @Test @@ -187,8 +188,8 @@ public void testGeneralDynamicIndex() { IndexGenerator indexGenerator = IndexGeneratorFactory.createIndexGenerator("index_{item}", fieldNames, dataTypes); indexGenerator.open(); - Assertions.assertEquals("index_apple", indexGenerator.generate(rows.get(0))); - Assertions.assertEquals("index_peanut", indexGenerator.generate(rows.get(1))); + assertThat(indexGenerator.generate(rows.get(0))).isEqualTo("index_apple"); + assertThat(indexGenerator.generate(rows.get(1))).isEqualTo("index_peanut"); } @Test @@ -196,8 +197,8 @@ public void testStaticIndex() { IndexGenerator indexGenerator = IndexGeneratorFactory.createIndexGenerator("my-index", fieldNames, dataTypes); indexGenerator.open(); - Assertions.assertEquals("my-index", indexGenerator.generate(rows.get(0))); - Assertions.assertEquals("my-index", indexGenerator.generate(rows.get(1))); + assertThat(indexGenerator.generate(rows.get(0))).isEqualTo("my-index"); + assertThat(indexGenerator.generate(rows.get(1))).isEqualTo("my-index"); } @Test @@ -209,7 +210,7 @@ public void testUnknownField() { IndexGeneratorFactory.createIndexGenerator( "my-index-{unknown_ts|yyyy-MM-dd}", fieldNames, dataTypes); } catch (TableException e) { - Assertions.assertEquals(e.getMessage(), expectedExceptionMsg); + assertThat(e.getMessage()).isEqualTo(expectedExceptionMsg); } } @@ -222,7 +223,7 @@ public void testUnsupportedTimeType() { IndexGeneratorFactory.createIndexGenerator( "my-index-{id|yyyy-MM-dd}", fieldNames, dataTypes); } catch (TableException e) { - Assertions.assertEquals(expectedExceptionMsg, e.getMessage()); + assertThat(e.getMessage()).isEqualTo(expectedExceptionMsg); } } @@ -235,7 +236,7 @@ public void testUnsupportedMultiParametersType() { IndexGeneratorFactory.createIndexGenerator( "my-index-{local_date}-{local_time}", fieldNames, dataTypes); } catch (TableException e) { - Assertions.assertEquals(expectedExceptionMsg, e.getMessage()); + assertThat(e.getMessage()).isEqualTo(expectedExceptionMsg); } } @@ -246,7 +247,7 @@ public void testDynamicIndexUnsupportedFormat() { IndexGeneratorFactory.createIndexGenerator( "my-index-{local_date|yyyy/MM/dd HH:mm}", fieldNames, dataTypes); } catch (UnsupportedTemporalTypeException e) { - Assertions.assertEquals(expectedExceptionMsg, e.getMessage()); + assertThat(e.getMessage()).isEqualTo(expectedExceptionMsg); } } @@ -259,7 +260,7 @@ public void testUnsupportedIndexFieldType() { try { IndexGeneratorFactory.createIndexGenerator("index_{status}", fieldNames, dataTypes); } catch (IllegalArgumentException e) { - Assertions.assertEquals(expectedExceptionMsg, e.getMessage()); + assertThat(e.getMessage()).isEqualTo(expectedExceptionMsg); } } @@ -296,9 +297,10 @@ public void testDynamicIndexFromSystemTime() { String actualIndex = indexGenerator.generate(rows.get(1)); String expectedIndex2 = "my-index-" + LocalDateTime.now().format(dateTimeFormatter); - Assertions.assertTrue( - actualIndex.equals(expectedIndex1) - || actualIndex.equals(expectedIndex2)); + assertThat( + actualIndex.equals(expectedIndex1) + || actualIndex.equals(expectedIndex2)) + .isTrue(); }); List invalidUseCases = @@ -326,7 +328,7 @@ public void testDynamicIndexFromSystemTime() { dataTypes); indexGenerator.open(); } catch (TableException e) { - Assertions.assertEquals(expectedExceptionMsg, e.getMessage()); + assertThat(e.getMessage()).isEqualTo(expectedExceptionMsg); } }); } diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/KeyExtractorTest.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/KeyExtractorTest.java index 537fc72..31ef457 100644 --- a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/KeyExtractorTest.java +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/KeyExtractorTest.java @@ -24,7 +24,6 @@ import org.apache.flink.table.data.StringData; import org.apache.flink.table.data.TimestampData; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import java.time.Instant; @@ -37,6 +36,8 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.assertj.core.api.Assertions.assertThat; + /** Tests for {@link KeyExtractor}. */ class KeyExtractorTest { @Test @@ -51,7 +52,7 @@ public void testSimpleKey() { KeyExtractor.createKeyExtractor(logicalTypesWithIndex, "_"); String key = keyExtractor.apply(GenericRowData.of(12L, StringData.fromString("ABCD"))); - Assertions.assertEquals(key, "12"); + assertThat(key).isEqualTo("12"); } @Test @@ -62,7 +63,7 @@ public void testNoPrimaryKey() { KeyExtractor.createKeyExtractor(logicalTypesWithIndex, "_"); String key = keyExtractor.apply(GenericRowData.of(12L, StringData.fromString("ABCD"))); - Assertions.assertEquals(key, null); + assertThat(key).isNull(); } @Test @@ -85,7 +86,7 @@ public void testTwoFieldsKey() { StringData.fromString("ABCD"), TimestampData.fromLocalDateTime( LocalDateTime.parse("2012-12-12T12:12:12")))); - Assertions.assertEquals(key, "12_2012-12-12T12:12:12"); + assertThat(key).isEqualTo("12_2012-12-12T12:12:12"); } @Test @@ -140,8 +141,8 @@ public void testAllTypesKey() { TimestampData.fromInstant(Instant.parse("2013-01-13T13:13:13Z")), (int) (LocalTime.parse("14:14:14").toNanoOfDay() / 1_000_000), (int) LocalDate.parse("2015-05-15").toEpochDay())); - Assertions.assertEquals( - key, - "1_2_3_4_true_1.0_2.0_ABCD_2012-12-12T12:12:12_2013-01-13T13:13:13_14:14:14_2015-05-15"); + assertThat(key) + .isEqualTo( + "1_2_3_4_true_1.0_2.0_ABCD_2012-12-12T12:12:12_2013-01-13T13:13:13_14:14:14_2015-05-15"); } } diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkFactoryTest.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkFactoryTest.java index 5130c33..6da390f 100644 --- a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkFactoryTest.java +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkFactoryTest.java @@ -31,16 +31,16 @@ import org.apache.flink.types.RowKind; import org.apache.flink.util.TestLoggerExtension; -import org.junit.jupiter.api.Assertions; +import org.assertj.core.api.ThrowableAssert; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; -import org.junit.jupiter.api.function.Executable; import java.util.Arrays; import java.util.Collections; import static org.apache.flink.table.factories.FactoryUtil.SINK_PARALLELISM; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; /** Tests for validation in {@link OpensearchDynamicSinkFactory}. */ @ExtendWith(TestLoggerExtension.class) @@ -66,9 +66,11 @@ public void validateEmptyConfiguration() { () -> sinkFactory.createDynamicTableSink(TestContext.context().build())); } - void assertValidationException(String expectedMessage, Executable executable) { - ValidationException thrown = Assertions.assertThrows(ValidationException.class, executable); - Assertions.assertEquals(expectedMessage, thrown.getMessage()); + void assertValidationException( + String expectedMessage, ThrowableAssert.ThrowingCallable executable) { + assertThatThrownBy(executable) + .isInstanceOf(ValidationException.class) + .hasMessage(expectedMessage); } @Test diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkITCase.java b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkITCase.java index e81eafb..7125211 100644 --- a/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkITCase.java +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/connector/opensearch/table/OpensearchDynamicSinkITCase.java @@ -38,7 +38,6 @@ import org.apache.flink.types.RowKind; import org.apache.flink.util.TestLoggerExtension; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.opensearch.OpenSearchStatusException; @@ -67,6 +66,7 @@ import java.util.Map; import static org.apache.flink.table.api.Expressions.row; +import static org.assertj.core.api.Assertions.assertThat; /** IT tests for {@link OpensearchDynamicSink}. */ @ExtendWith(TestLoggerExtension.class) @@ -169,7 +169,7 @@ public void testWritingDocuments() throws Exception { expectedMap.put("e", 2); expectedMap.put("f", "2003-10-20"); expectedMap.put("g", "2012-12-12 12:12:12"); - Assertions.assertEquals(response, expectedMap); + assertThat(response).isEqualTo(expectedMap); } @Test @@ -219,7 +219,7 @@ public void testWritingDocumentsFromTableApi() throws Exception { expectedMap.put("e", 2); expectedMap.put("f", "2003-10-20"); expectedMap.put("g", "2012-12-12 12:12:12"); - Assertions.assertEquals(response, expectedMap); + assertThat(response).isEqualTo(expectedMap); } @Test @@ -302,7 +302,7 @@ public void testWritingDocumentsNoPrimaryKey() throws Exception { HashSet> expectedSet = new HashSet<>(); expectedSet.add(expectedMap1); expectedSet.add(expectedMap2); - Assertions.assertEquals(resultSet, expectedSet); + assertThat(resultSet).isEqualTo(expectedSet); } @Test @@ -333,7 +333,7 @@ public void testWritingDocumentsWithDynamicIndex() throws Exception { Map expectedMap = new HashMap<>(); expectedMap.put("a", 1); expectedMap.put("b", "2012-12-12 12:12:12"); - Assertions.assertEquals(response, expectedMap); + assertThat(response).isEqualTo(expectedMap); } @Test @@ -391,6 +391,6 @@ public void testWritingDocumentsWithDynamicIndexFromSystemTime() throws Exceptio Map expectedMap = new HashMap<>(); expectedMap.put("a", 1); expectedMap.put("b", "2012-12-12 12:12:12"); - Assertions.assertEquals(response, expectedMap); + assertThat(response).isEqualTo(expectedMap); } } diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkITCase.java b/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkITCase.java index 38b9354..52b6e5e 100644 --- a/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkITCase.java +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkITCase.java @@ -29,29 +29,29 @@ import org.apache.flink.test.util.AbstractTestBase; import org.apache.http.HttpHost; -import org.junit.ClassRule; -import org.junit.Test; +import org.junit.jupiter.api.Test; import org.opensearch.client.RestHighLevelClient; import org.opensearch.testcontainers.OpensearchContainer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.function.Function; -import static org.hamcrest.CoreMatchers.instanceOf; -import static org.hamcrest.MatcherAssert.assertThat; -import static org.junit.Assert.fail; +import static org.assertj.core.api.Assertions.assertThatThrownBy; /** IT cases for the {@link OpensearchSink}. */ +@Testcontainers public class OpensearchSinkITCase extends AbstractTestBase { private static final Logger LOG = LoggerFactory.getLogger(OpensearchSinkITCase.class); - @ClassRule - public static final OpensearchContainer OS_CONTAINER = + @Container + private static final OpensearchContainer OS_CONTAINER = OpensearchUtil.createOpensearchContainer(DockerImageVersions.OPENSEARCH_1, LOG); @Test @@ -68,27 +68,22 @@ public void testOpensearchSinkWithSmile() throws Exception { @Test public void testNullAddresses() { - try { - createOpensearchSink(1, null, SourceSinkDataTestKit.getJsonSinkFunction("test")); - } catch (IllegalArgumentException | NullPointerException expectedException) { - // test passes - return; - } - - fail(); + assertThatThrownBy( + () -> + createOpensearchSink( + 1, null, SourceSinkDataTestKit.getJsonSinkFunction("test"))) + .isInstanceOfAny(IllegalArgumentException.class, NullPointerException.class); } @Test public void testEmptyAddresses() { - try { - createOpensearchSink( - 1, Collections.emptyList(), SourceSinkDataTestKit.getJsonSinkFunction("test")); - } catch (IllegalArgumentException expectedException) { - // test passes - return; - } - - fail(); + assertThatThrownBy( + () -> + createOpensearchSink( + 1, + Collections.emptyList(), + SourceSinkDataTestKit.getJsonSinkFunction("test"))) + .isInstanceOf(IllegalArgumentException.class); } @Test @@ -104,14 +99,9 @@ public void testInvalidOpensearchCluster() throws Exception { SourceSinkDataTestKit.getJsonSinkFunction("test"), "123.123.123.123")); // incorrect ip address - try { - env.execute("Opensearch Sink Test"); - } catch (JobExecutionException expectedException) { - assertThat(expectedException.getCause(), instanceOf(JobException.class)); - return; - } - - fail(); + assertThatThrownBy(() -> env.execute("Opensearch Sink Test")) + .isInstanceOf(JobExecutionException.class) + .hasCauseInstanceOf(JobException.class); } private OpensearchSink> createOpensearchSink( diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/testutils/SourceSinkDataTestKit.java b/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/testutils/SourceSinkDataTestKit.java index fe97d57..293484d 100644 --- a/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/testutils/SourceSinkDataTestKit.java +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/testutils/SourceSinkDataTestKit.java @@ -23,7 +23,6 @@ import org.apache.flink.streaming.connectors.opensearch.OpensearchSinkFunction; import org.apache.flink.streaming.connectors.opensearch.RequestIndexer; -import org.junit.Assert; import org.opensearch.action.get.GetRequest; import org.opensearch.action.get.GetResponse; import org.opensearch.action.index.IndexRequest; @@ -37,6 +36,8 @@ import java.util.HashMap; import java.util.Map; +import static org.assertj.core.api.Assertions.assertThat; + /** * This class contains utilities and a pre-defined source function and Opensearch Sink function used * to simulate and verify data used in tests. @@ -132,7 +133,7 @@ public static void verifyProducedSinkData(RestHighLevelClient client, String ind for (int i = 0; i < NUM_ELEMENTS; i++) { GetResponse response = client.get(new GetRequest(index, Integer.toString(i)), RequestOptions.DEFAULT); - Assert.assertEquals(DATA_PREFIX + i, response.getSource().get(DATA_FIELD_NAME)); + assertThat(response.getSource().get(DATA_FIELD_NAME)).isEqualTo(DATA_PREFIX + i); } } From b08f16164f31c8a2173fabde92c3b44f5bbb380b Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Wed, 23 Nov 2022 08:28:51 -0500 Subject: [PATCH 12/18] Address code review comments Signed-off-by: Andriy Redko --- .../0c16f106-1632-4ba5-aa26-eb3ab7c7d43e | 1 - .../connector/opensearch/sink/OpensearchSink.java | 2 ++ .../connector/opensearch/sink/RequestIndexer.java | 4 ++-- .../architecture/TestCodeArchitectureTest.java | 14 -------------- pom.xml | 7 ------- 5 files changed, 4 insertions(+), 24 deletions(-) diff --git a/flink-connector-opensearch/archunit-violations/0c16f106-1632-4ba5-aa26-eb3ab7c7d43e b/flink-connector-opensearch/archunit-violations/0c16f106-1632-4ba5-aa26-eb3ab7c7d43e index 338d495..e69de29 100644 --- a/flink-connector-opensearch/archunit-violations/0c16f106-1632-4ba5-aa26-eb3ab7c7d43e +++ b/flink-connector-opensearch/archunit-violations/0c16f106-1632-4ba5-aa26-eb3ab7c7d43e @@ -1 +0,0 @@ -org.apache.flink.connector.opensearch.sink.OpensearchEmitter.emit(java.lang.Object, org.apache.flink.api.connector.sink2.SinkWriter$Context, org.apache.flink.connector.opensearch.sink.RequestIndexer): Argument leaf type org.apache.flink.connector.opensearch.sink.RequestIndexer does not satisfy: reside outside of package 'org.apache.flink..' or reside in any package ['..shaded..'] or annotated with @Public or annotated with @PublicEvolving or annotated with @Deprecated \ No newline at end of file diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSink.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSink.java index 1f2b21d..b23b9fc 100644 --- a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSink.java +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/OpensearchSink.java @@ -19,6 +19,7 @@ package org.apache.flink.connector.opensearch.sink; import org.apache.flink.annotation.PublicEvolving; +import org.apache.flink.annotation.VisibleForTesting; import org.apache.flink.api.connector.sink2.Sink; import org.apache.flink.api.connector.sink2.SinkWriter; import org.apache.flink.connector.base.DeliveryGuarantee; @@ -85,6 +86,7 @@ public SinkWriter createWriter(InitContext context) throws IOException { context.getMailboxExecutor()); } + @VisibleForTesting DeliveryGuarantee getDeliveryGuarantee() { return deliveryGuarantee; } diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/RequestIndexer.java b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/RequestIndexer.java index 227a526..6ee0fa5 100644 --- a/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/RequestIndexer.java +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/connector/opensearch/sink/RequestIndexer.java @@ -18,7 +18,7 @@ package org.apache.flink.connector.opensearch.sink; -import org.apache.flink.annotation.Internal; +import org.apache.flink.annotation.PublicEvolving; import org.opensearch.action.delete.DeleteRequest; import org.opensearch.action.index.IndexRequest; @@ -28,7 +28,7 @@ * Users add multiple delete, index or update requests to a {@link RequestIndexer} to prepare them * for sending to an Opensearch cluster. */ -@Internal +@PublicEvolving public interface RequestIndexer { /** * Add multiple {@link DeleteRequest} to the indexer to prepare for sending requests to diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/architecture/TestCodeArchitectureTest.java b/flink-connector-opensearch/src/test/java/org/apache/flink/architecture/TestCodeArchitectureTest.java index 9692731..7e44a8c 100644 --- a/flink-connector-opensearch/src/test/java/org/apache/flink/architecture/TestCodeArchitectureTest.java +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/architecture/TestCodeArchitectureTest.java @@ -21,13 +21,10 @@ import org.apache.flink.architecture.common.ImportOptions; import com.tngtech.archunit.core.importer.ImportOption; -import com.tngtech.archunit.core.importer.Location; import com.tngtech.archunit.junit.AnalyzeClasses; import com.tngtech.archunit.junit.ArchTest; import com.tngtech.archunit.junit.ArchTests; -import java.util.regex.Pattern; - /** Architecture tests for test code. */ @AnalyzeClasses( packages = { @@ -36,7 +33,6 @@ }, importOptions = { ImportOption.OnlyIncludeTests.class, - TestCodeArchitectureTest.IncludeES7ImportOption.class, ImportOptions.ExcludeScalaImportOption.class, ImportOptions.ExcludeShadedImportOption.class }) @@ -44,14 +40,4 @@ public class TestCodeArchitectureTest { @ArchTest public static final ArchTests COMMON_TESTS = ArchTests.in(TestCodeArchitectureTestBase.class); - - /** Only include ES7 related locations. */ - public static final class IncludeES7ImportOption implements ImportOption { - private static final Pattern OPENSEARCH = Pattern.compile(".*opensearch.*"); - - @Override - public boolean includes(Location location) { - return location.matches(OPENSEARCH); - } - } } diff --git a/pom.xml b/pom.xml index 116eb63..9e055c6 100644 --- a/pom.xml +++ b/pom.xml @@ -65,7 +65,6 @@ under the License. 15.0 2.13.4.20221013 - 4.13.2 5.8.1 3.21.0 0.22.0 @@ -314,12 +313,6 @@ under the License. import - - junit - junit - ${junit4.version} - - org.assertj assertj-core From e43736c3d52a1c139f18de7d54b5b4035011d420 Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Mon, 28 Nov 2022 09:17:26 -0500 Subject: [PATCH 13/18] Address code review comments Signed-off-by: Andriy Redko --- .../streaming/connectors/opensearch/OpensearchSink.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSink.java b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSink.java index 4f64fdf..1a337fa 100644 --- a/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSink.java +++ b/flink-connector-opensearch/src/main/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSink.java @@ -17,7 +17,6 @@ package org.apache.flink.streaming.connectors.opensearch; -import org.apache.flink.annotation.Internal; import org.apache.flink.annotation.PublicEvolving; import org.apache.flink.api.java.utils.ParameterTool; import org.apache.flink.configuration.Configuration; @@ -71,8 +70,11 @@ * OpensearchSinkFunction} for processing. * * @param Type of the elements handled by this sink + * @deprecated This sink has been deprecated in favor of {@link + * org.apache.flink.connector.opensearch.sink.OpensearchSink} */ -@Internal +@Deprecated +@PublicEvolving public class OpensearchSink extends RichSinkFunction implements CheckpointedFunction { private static final long serialVersionUID = -1007596293618451942L; private static final Logger LOG = LoggerFactory.getLogger(OpensearchSink.class); From 45098e3da62c09a78e6fa6d598d4143e196b5f2a Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Mon, 28 Nov 2022 17:15:45 -0500 Subject: [PATCH 14/18] Address code review comments. Added OpensearchSinkTest test case Signed-off-by: Andriy Redko --- .../opensearch/OpensearchSinkTest.java | 562 ++++++++++++++++++ 1 file changed, 562 insertions(+) create mode 100644 flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkTest.java diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkTest.java b/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkTest.java new file mode 100644 index 0000000..eee2883 --- /dev/null +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkTest.java @@ -0,0 +1,562 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.opensearch; + +import org.apache.flink.api.common.functions.RuntimeContext; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.core.testutils.CheckedThread; +import org.apache.flink.streaming.api.operators.StreamSink; +import org.apache.flink.streaming.connectors.opensearch.util.NoOpFailureHandler; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.apache.flink.streaming.util.MockStreamingRuntimeContext; +import org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness; + +import org.apache.http.HttpHost; +import org.apache.http.HttpResponse; +import org.apache.http.entity.ByteArrayEntity; +import org.apache.http.entity.ContentType; +import org.apache.http.impl.bootstrap.HttpServer; +import org.apache.http.impl.bootstrap.ServerBootstrap; +import org.apache.http.protocol.HttpRequestHandlerMapper; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; +import org.opensearch.action.ActionRequest; +import org.opensearch.action.DocWriteRequest.OpType; +import org.opensearch.action.bulk.BulkItemResponse; +import org.opensearch.action.bulk.BulkItemResponse.Failure; +import org.opensearch.action.bulk.BulkResponse; +import org.opensearch.action.index.IndexResponse; +import org.opensearch.client.Requests; +import org.opensearch.common.xcontent.ToXContent; +import org.opensearch.common.xcontent.XContentBuilder; +import org.opensearch.common.xcontent.json.JsonXContent; +import org.opensearch.index.shard.ShardId; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Arrays; +import java.util.Deque; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.ConcurrentLinkedDeque; +import java.util.function.Consumer; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** Suite of tests for {@link OpensearchSink}. */ +public class OpensearchSinkTest { + private HttpServer server; + private final Deque> responses = new ConcurrentLinkedDeque<>(); + + @BeforeEach + public void setUp() throws IOException { + final HttpRequestHandlerMapper handlers = + (request) -> { + final String method = request.getRequestLine().getMethod(); + if (method.equalsIgnoreCase("HEAD")) { + // Connection request always OKed + return (req, resp, context) -> resp.setStatusCode(200); + } else if (method.equalsIgnoreCase("POST")) { + // Bulk responses are configured per test case + return (req, resp, context) -> responses.poll().accept(resp); + } else { + return null; + } + }; + server = ServerBootstrap.bootstrap().setHandlerMapper(handlers).create(); + server.start(); + } + + @AfterEach + public void tearDown() { + server.stop(); + server = null; + responses.clear(); + } + + /** + * Tests that any item failure in the listener callbacks is rethrown on an immediately following + * invoke call. + */ + @Test + public void testItemFailureRethrownOnInvoke() throws Throwable { + final OpensearchSink.Builder builder = + new OpensearchSink.Builder<>( + Arrays.asList(new HttpHost("localhost", server.getLocalPort())), + new SimpleSinkFunction()); + builder.setBulkFlushMaxActions(1); + builder.setFailureHandler(new NoOpFailureHandler()); + + final OpensearchSink sink = builder.build(); + final OneInputStreamOperatorTestHarness testHarness = + new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink)); + + responses.add( + createResponse( + new BulkItemResponse( + 1, + OpType.INDEX, + new Failure( + "test", + "_doc", + "1", + new Exception("artificial failure for record"))))); + testHarness.open(); + + // setup the next bulk request, and its mock item failures + testHarness.processElement(new StreamRecord<>("msg")); + + assertThatThrownBy(() -> testHarness.processElement(new StreamRecord<>("next msg"))) + .getCause() + .hasMessageContaining("artificial failure for record"); + } + + /** + * Tests that any item failure in the listener callbacks is rethrown on an immediately following + * checkpoint. + */ + @Test + public void testItemFailureRethrownOnCheckpoint() throws Throwable { + final OpensearchSink.Builder builder = + new OpensearchSink.Builder<>( + Arrays.asList(new HttpHost("localhost", server.getLocalPort())), + new SimpleSinkFunction()); + builder.setBulkFlushMaxActions(1); + builder.setFailureHandler(new NoOpFailureHandler()); + + final OpensearchSink sink = builder.build(); + final OneInputStreamOperatorTestHarness testHarness = + new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink)); + + testHarness.open(); + + responses.add( + createResponse( + new BulkItemResponse( + 1, + OpType.INDEX, + new Failure( + "test", + "_doc", + "1", + new Exception("artificial failure for record"))))); + testHarness.processElement(new StreamRecord<>("msg")); + + assertThatThrownBy(() -> testHarness.snapshot(1L, 1000L)) + .getCause() + .getCause() + .hasMessageContaining("artificial failure for record"); + } + + /** + * Tests that any item failure in the listener callbacks due to flushing on an immediately + * following checkpoint is rethrown; we set a timeout because the test will not finish if the + * logic is broken. + */ + @Test + @Timeout(5) + public void testItemFailureRethrownOnCheckpointAfterFlush() throws Throwable { + final OpensearchSink.Builder builder = + new OpensearchSink.Builder<>( + Arrays.asList(new HttpHost("localhost", server.getLocalPort())), + new SimpleSinkFunction()); + builder.setBulkFlushInterval(1000); + builder.setFailureHandler(new NoOpFailureHandler()); + + final OpensearchSink sink = builder.build(); + final OneInputStreamOperatorTestHarness testHarness = + new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink)); + + testHarness.open(); + + responses.add( + createResponse( + new BulkItemResponse( + 1, + OpType.INDEX, + new IndexResponse( + new ShardId("test", "-", 0), "_doc", "1", 0, 0, 1, true)))); + + responses.add( + createResponse( + new BulkItemResponse( + 2, + OpType.INDEX, + new Failure( + "test", + "_doc", + "2", + new Exception("artificial failure for record"))))); + + testHarness.processElement(new StreamRecord<>("msg-1")); + + // Await for flush to be complete + awaitForFlushToFinish(1); + + // setup the requests to be flushed in the snapshot + testHarness.processElement(new StreamRecord<>("msg-2")); + // let the snapshot-triggered flush continue (2 records in the bulk, so the 2nd one should + // fail) + testHarness.processElement(new StreamRecord<>("msg-3")); + + CheckedThread snapshotThread = + new CheckedThread() { + @Override + public void go() throws Exception { + testHarness.snapshot(1L, 1000L); + } + }; + snapshotThread.start(); + + // Await for flush to be complete + awaitForFlushToFinish(0); + + assertThatThrownBy(snapshotThread::sync) + .getCause() + .getCause() + .hasMessageContaining("artificial failure for record"); + } + + /** + * Tests that any bulk failure in the listener callbacks is rethrown on an immediately following + * invoke call. + */ + @Test + public void testBulkFailureRethrownOnInvoke() throws Throwable { + final OpensearchSink.Builder builder = + new OpensearchSink.Builder<>( + Arrays.asList(new HttpHost("localhost", server.getLocalPort())), + new SimpleSinkFunction()); + builder.setBulkFlushMaxActions(1); + builder.setFailureHandler(new NoOpFailureHandler()); + + final OpensearchSink sink = builder.build(); + final OneInputStreamOperatorTestHarness testHarness = + new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink)); + + testHarness.open(); + + // Let the whole bulk request fail + responses.add(response -> response.setStatusCode(500)); + + testHarness.processElement(new StreamRecord<>("msg")); + + assertThatThrownBy(() -> testHarness.processElement(new StreamRecord<>("next msg"))) + .getCause() + .hasMessageContaining("Unable to parse response body"); + } + + /** + * Tests that any bulk failure in the listener callbacks is rethrown on an immediately following + * checkpoint. + */ + @Test + public void testBulkFailureRethrownOnCheckpoint() throws Throwable { + final OpensearchSink.Builder builder = + new OpensearchSink.Builder<>( + Arrays.asList(new HttpHost("localhost", server.getLocalPort())), + new SimpleSinkFunction()); + builder.setBulkFlushMaxActions(1); + builder.setFailureHandler(new NoOpFailureHandler()); + + final OpensearchSink sink = builder.build(); + final OneInputStreamOperatorTestHarness testHarness = + new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink)); + + testHarness.open(); + + // Let the whole bulk request fail + responses.add(response -> response.setStatusCode(500)); + + testHarness.processElement(new StreamRecord<>("msg")); + + assertThatThrownBy(() -> testHarness.snapshot(1L, 1000L)) + .getCause() + .getCause() + .hasMessageContaining("Unable to parse response body"); + } + + /** + * Tests that any bulk failure in the listener callbacks due to flushing on an immediately + * following checkpoint is rethrown; we set a timeout because the test will not finish if the + * logic is broken. + */ + @Test + @Timeout(5) + public void testBulkFailureRethrownOnOnCheckpointAfterFlush() throws Throwable { + final OpensearchSink.Builder builder = + new OpensearchSink.Builder<>( + Arrays.asList(new HttpHost("localhost", server.getLocalPort())), + new SimpleSinkFunction()); + builder.setBulkFlushInterval(500); + builder.setFailureHandler(new NoOpFailureHandler()); + + final OpensearchSink sink = builder.build(); + final OneInputStreamOperatorTestHarness testHarness = + new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink)); + + testHarness.open(); + + responses.add( + createResponse( + new BulkItemResponse( + 1, + OpType.INDEX, + new IndexResponse( + new ShardId("test", "-", 0), "_doc", "1", 0, 0, 1, true)))); + + // Let the whole bulk request fail + responses.add(response -> response.setStatusCode(500)); + + // setup the next bulk request, and let bulk request succeed + testHarness.processElement(new StreamRecord<>("msg-1")); + + // Await for flush to be complete + awaitForFlushToFinish(1); + + // setup the requests to be flushed in the snapshot + testHarness.processElement(new StreamRecord<>("msg-2")); + testHarness.processElement(new StreamRecord<>("msg-3")); + + CheckedThread snapshotThread = + new CheckedThread() { + @Override + public void go() throws Exception { + testHarness.snapshot(1L, 1000L); + } + }; + snapshotThread.start(); + + // Await for flush to be complete + awaitForFlushToFinish(0); + + assertThatThrownBy(snapshotThread::sync) + .getCause() + .getCause() + .hasMessageContaining("Unable to parse response body"); + } + + /** + * Tests that the sink correctly waits for pending requests (including re-added requests) on + * checkpoints; we set a timeout because the test will not finish if the logic is broken. + */ + @Test + @Timeout(5) + public void testAtLeastOnceSink() throws Throwable { + final OpensearchSink.Builder builder = + new OpensearchSink.Builder<>( + Arrays.asList(new HttpHost("localhost", server.getLocalPort())), + new SimpleSinkFunction()); + builder.setBulkFlushInterval(500); + builder.setFailureHandler( + new DummyRetryFailureHandler()); // use a failure handler that simply + // re-adds requests + + final OpensearchSink sink = builder.build(); + final OneInputStreamOperatorTestHarness testHarness = + new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink)); + + testHarness.open(); + + // setup the next bulk request, and its mock item failures; + // it contains 1 request, which will fail and re-added to the next bulk request + responses.add( + createResponse( + new BulkItemResponse( + 1, + OpType.INDEX, + new Failure( + "test", + "_doc", + "1", + new Exception("artificial failure for record"))))); + + testHarness.processElement(new StreamRecord<>("msg")); + + // current number of pending request should be 1 due to the re-add + assertThat(sink.getNumPendingRequests()).isEqualTo(1); + + CheckedThread snapshotThread = + new CheckedThread() { + @Override + public void go() throws Exception { + testHarness.snapshot(1L, 1000L); + } + }; + snapshotThread.start(); + + // Await for flush to be complete + awaitForFlushToFinish(0); + + // since the previous flush should have resulted in a request re-add from the failure + // handler, + // we should have flushed again, and eventually be blocked before snapshot triggers the 2nd + // flush + + responses.add( + createResponse( + new BulkItemResponse( + 2, + OpType.INDEX, + new IndexResponse( + new ShardId("test", "-", 0), "_doc", "2", 0, 0, 1, true)))); + + // current number of pending request should be 1 due to the re-add + assertThat(sink.getNumPendingRequests()).isEqualTo(1); + + // Await for flush to be complete + awaitForFlushToFinish(0); + + // the snapshot should finish with no exceptions + snapshotThread.sync(); + + testHarness.close(); + } + + /** + * This test is meant to assure that testAtLeastOnceSink is valid by testing that if flushing is + * disabled, the snapshot method does indeed finishes without waiting for pending requests; we + * set a timeout because the test will not finish if the logic is broken. + */ + @Test + @Timeout(5) + public void testDoesNotWaitForPendingRequestsIfFlushingDisabled() throws Exception { + final OpensearchSink.Builder builder = + new OpensearchSink.Builder<>( + Arrays.asList(new HttpHost("localhost", server.getLocalPort())), + new SimpleSinkFunction()); + + final OpensearchSink sink = builder.build(); + sink.disableFlushOnCheckpoint(); // disable flushing + + final OneInputStreamOperatorTestHarness testHarness = + new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink)); + + testHarness.open(); + + responses.add( + createResponse( + new BulkItemResponse( + 1, + OpType.INDEX, + new Failure( + "test", + "_doc", + "1", + new Exception("artificial failure for record"))))); + + testHarness.processElement(new StreamRecord<>("msg-1")); + + // the snapshot should not block even though we haven't flushed the bulk request + testHarness.snapshot(1L, 1000L); + + assertThatThrownBy(() -> testHarness.close()) + .getCause() + .hasMessageContaining("artificial failure for record"); + } + + @Test + public void testOpenAndCloseInSinkFunction() throws Exception { + final SimpleClosableSinkFunction sinkFunction = new SimpleClosableSinkFunction<>(); + final OpensearchSink.Builder builder = + new OpensearchSink.Builder<>( + Arrays.asList(new HttpHost("localhost", server.getLocalPort())), + sinkFunction); + builder.setFailureHandler(new DummyRetryFailureHandler()); + + final OpensearchSink sink = builder.build(); + sink.setRuntimeContext(new MockStreamingRuntimeContext(false, 1, 0)); + sink.open(new Configuration()); + sink.close(); + + assertThat(sinkFunction.openCalled).isTrue(); + assertThat(sinkFunction.closeCalled).isTrue(); + } + + private static class SimpleSinkFunction implements OpensearchSinkFunction { + private static final long serialVersionUID = -176739293659135148L; + + @Override + public void process(String element, RuntimeContext ctx, RequestIndexer indexer) { + Map json = new HashMap<>(); + json.put("data", element); + + indexer.add(Requests.indexRequest().index("index").type("type").id("id").source(json)); + } + } + + private static class SimpleClosableSinkFunction + implements OpensearchSinkFunction { + + private static final long serialVersionUID = 1872065917794006848L; + + private boolean openCalled; + private boolean closeCalled; + + @Override + public void open() { + openCalled = true; + } + + @Override + public void close() { + closeCalled = true; + } + + @Override + public void process(String element, RuntimeContext ctx, RequestIndexer indexer) {} + } + + private static class DummyRetryFailureHandler implements ActionRequestFailureHandler { + private static final long serialVersionUID = 5400023700099200745L; + + @Override + public void onFailure( + ActionRequest action, Throwable failure, int restStatusCode, RequestIndexer indexer) + throws Throwable { + indexer.add(action); + } + } + + private Consumer createResponse(BulkItemResponse item) { + return response -> { + try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + response.setStatusCode(200); + try (XContentBuilder builder = + new XContentBuilder(JsonXContent.jsonXContent, baos)) { + final BulkResponse bulkResponse = + new BulkResponse(new BulkItemResponse[] {item}, 200); + bulkResponse.toXContent(builder, ToXContent.EMPTY_PARAMS); + } + response.setEntity( + new ByteArrayEntity(baos.toByteArray(), ContentType.APPLICATION_JSON)); + } catch (final IOException ex) { + response.setStatusCode(500); + } + }; + } + + private void awaitForFlushToFinish(int n) throws InterruptedException { + while (responses.size() > n) { + Thread.sleep(10); + } + } +} From 9dc4d725d82dfb5eafc3e79f63feb302f1bad24d Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Tue, 29 Nov 2022 08:57:14 -0500 Subject: [PATCH 15/18] Address code review comments Signed-off-by: Andriy Redko --- .../opensearch/OpensearchSinkTest.java | 52 ++++++++++--------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkTest.java b/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkTest.java index eee2883..a635c65 100644 --- a/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkTest.java +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkTest.java @@ -56,7 +56,10 @@ import java.util.HashMap; import java.util.Map; import java.util.concurrent.ConcurrentLinkedDeque; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.LockSupport; import java.util.function.Consumer; +import java.util.function.Supplier; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -209,7 +212,7 @@ public void testItemFailureRethrownOnCheckpointAfterFlush() throws Throwable { testHarness.processElement(new StreamRecord<>("msg-1")); // Await for flush to be complete - awaitForFlushToFinish(1); + awaitForCondition(() -> responses.size() == 1); // setup the requests to be flushed in the snapshot testHarness.processElement(new StreamRecord<>("msg-2")); @@ -227,7 +230,7 @@ public void go() throws Exception { snapshotThread.start(); // Await for flush to be complete - awaitForFlushToFinish(0); + awaitForCondition(responses::isEmpty); assertThatThrownBy(snapshotThread::sync) .getCause() @@ -330,7 +333,7 @@ public void testBulkFailureRethrownOnOnCheckpointAfterFlush() throws Throwable { testHarness.processElement(new StreamRecord<>("msg-1")); // Await for flush to be complete - awaitForFlushToFinish(1); + awaitForCondition(() -> responses.size() == 1); // setup the requests to be flushed in the snapshot testHarness.processElement(new StreamRecord<>("msg-2")); @@ -346,7 +349,7 @@ public void go() throws Exception { snapshotThread.start(); // Await for flush to be complete - awaitForFlushToFinish(0); + awaitForCondition(responses::isEmpty); assertThatThrownBy(snapshotThread::sync) .getCause() @@ -359,16 +362,15 @@ public void go() throws Exception { * checkpoints; we set a timeout because the test will not finish if the logic is broken. */ @Test - @Timeout(5) + @Timeout(50) public void testAtLeastOnceSink() throws Throwable { final OpensearchSink.Builder builder = new OpensearchSink.Builder<>( Arrays.asList(new HttpHost("localhost", server.getLocalPort())), new SimpleSinkFunction()); builder.setBulkFlushInterval(500); - builder.setFailureHandler( - new DummyRetryFailureHandler()); // use a failure handler that simply - // re-adds requests + // use a failure handler that simply re-adds requests + builder.setFailureHandler(new DummyRetryFailureHandler()); final OpensearchSink sink = builder.build(); final OneInputStreamOperatorTestHarness testHarness = @@ -389,6 +391,14 @@ public void testAtLeastOnceSink() throws Throwable { "1", new Exception("artificial failure for record"))))); + responses.add( + createResponse( + new BulkItemResponse( + 2, + OpType.INDEX, + new IndexResponse( + new ShardId("test", "-", 0), "_doc", "2", 0, 0, 1, true)))); + testHarness.processElement(new StreamRecord<>("msg")); // current number of pending request should be 1 due to the re-add @@ -404,26 +414,20 @@ public void go() throws Exception { snapshotThread.start(); // Await for flush to be complete - awaitForFlushToFinish(0); + awaitForCondition(() -> responses.size() == 1); // since the previous flush should have resulted in a request re-add from the failure // handler, // we should have flushed again, and eventually be blocked before snapshot triggers the 2nd // flush - responses.add( - createResponse( - new BulkItemResponse( - 2, - OpType.INDEX, - new IndexResponse( - new ShardId("test", "-", 0), "_doc", "2", 0, 0, 1, true)))); - - // current number of pending request should be 1 due to the re-add - assertThat(sink.getNumPendingRequests()).isEqualTo(1); + // current number of pending request should be 1 due to the re-add, since the + // failureRequestIndexer will be called only on the next bulk flush interval, we may need + // to wait for numPendingRequests to be updated. + awaitForCondition(() -> sink.getNumPendingRequests() == 1); // Await for flush to be complete - awaitForFlushToFinish(0); + awaitForCondition(responses::isEmpty); // the snapshot should finish with no exceptions snapshotThread.sync(); @@ -536,7 +540,7 @@ public void onFailure( } } - private Consumer createResponse(BulkItemResponse item) { + private static Consumer createResponse(BulkItemResponse item) { return response -> { try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { response.setStatusCode(200); @@ -554,9 +558,9 @@ private Consumer createResponse(BulkItemResponse item) { }; } - private void awaitForFlushToFinish(int n) throws InterruptedException { - while (responses.size() > n) { - Thread.sleep(10); + private static void awaitForCondition(Supplier condition) { + while (!condition.get()) { + LockSupport.parkNanos(TimeUnit.MILLISECONDS.toNanos(10)); } } } From d8445bb0abdd699b6e4b9f3cd259084c50073527 Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Tue, 29 Nov 2022 10:37:59 -0500 Subject: [PATCH 16/18] Address code review comments Signed-off-by: Andriy Redko --- .../opensearch/OpensearchSinkTest.java | 44 ++++++++++++++----- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkTest.java b/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkTest.java index a635c65..9d29b2e 100644 --- a/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkTest.java +++ b/flink-connector-opensearch/src/test/java/org/apache/flink/streaming/connectors/opensearch/OpensearchSinkTest.java @@ -56,8 +56,9 @@ import java.util.HashMap; import java.util.Map; import java.util.concurrent.ConcurrentLinkedDeque; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.locks.LockSupport; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; import java.util.function.Consumer; import java.util.function.Supplier; @@ -68,6 +69,8 @@ public class OpensearchSinkTest { private HttpServer server; private final Deque> responses = new ConcurrentLinkedDeque<>(); + private final Lock lock = new ReentrantLock(); + private final Condition flushed = lock.newCondition(); @BeforeEach public void setUp() throws IOException { @@ -79,7 +82,15 @@ public void setUp() throws IOException { return (req, resp, context) -> resp.setStatusCode(200); } else if (method.equalsIgnoreCase("POST")) { // Bulk responses are configured per test case - return (req, resp, context) -> responses.poll().accept(resp); + return (req, resp, context) -> { + lock.lock(); + try { + responses.poll().accept(resp); + flushed.signalAll(); + } finally { + lock.unlock(); + } + }; } else { return null; } @@ -212,7 +223,7 @@ public void testItemFailureRethrownOnCheckpointAfterFlush() throws Throwable { testHarness.processElement(new StreamRecord<>("msg-1")); // Await for flush to be complete - awaitForCondition(() -> responses.size() == 1); + awaitForFlushToFinish(); // setup the requests to be flushed in the snapshot testHarness.processElement(new StreamRecord<>("msg-2")); @@ -230,7 +241,7 @@ public void go() throws Exception { snapshotThread.start(); // Await for flush to be complete - awaitForCondition(responses::isEmpty); + awaitForFlushToFinish(); assertThatThrownBy(snapshotThread::sync) .getCause() @@ -333,7 +344,7 @@ public void testBulkFailureRethrownOnOnCheckpointAfterFlush() throws Throwable { testHarness.processElement(new StreamRecord<>("msg-1")); // Await for flush to be complete - awaitForCondition(() -> responses.size() == 1); + awaitForFlushToFinish(); // setup the requests to be flushed in the snapshot testHarness.processElement(new StreamRecord<>("msg-2")); @@ -349,7 +360,7 @@ public void go() throws Exception { snapshotThread.start(); // Await for flush to be complete - awaitForCondition(responses::isEmpty); + awaitForFlushToFinish(); assertThatThrownBy(snapshotThread::sync) .getCause() @@ -362,7 +373,7 @@ public void go() throws Exception { * checkpoints; we set a timeout because the test will not finish if the logic is broken. */ @Test - @Timeout(50) + @Timeout(5) public void testAtLeastOnceSink() throws Throwable { final OpensearchSink.Builder builder = new OpensearchSink.Builder<>( @@ -414,7 +425,7 @@ public void go() throws Exception { snapshotThread.start(); // Await for flush to be complete - awaitForCondition(() -> responses.size() == 1); + awaitForFlushToFinish(); // since the previous flush should have resulted in a request re-add from the failure // handler, @@ -427,7 +438,7 @@ public void go() throws Exception { awaitForCondition(() -> sink.getNumPendingRequests() == 1); // Await for flush to be complete - awaitForCondition(responses::isEmpty); + awaitForFlushToFinish(); // the snapshot should finish with no exceptions snapshotThread.sync(); @@ -558,9 +569,18 @@ private static Consumer createResponse(BulkItemResponse item) { }; } - private static void awaitForCondition(Supplier condition) { + private static void awaitForCondition(Supplier condition) throws InterruptedException { while (!condition.get()) { - LockSupport.parkNanos(TimeUnit.MILLISECONDS.toNanos(10)); + Thread.sleep(10); + } + } + + private void awaitForFlushToFinish() throws InterruptedException { + lock.lock(); + try { + flushed.await(); + } finally { + lock.unlock(); } } } From 81d49d6c20c24ed7e76f61eb16fcb4436666db10 Mon Sep 17 00:00:00 2001 From: Chesnay Schepler Date: Thu, 8 Dec 2022 12:54:31 +0100 Subject: [PATCH 17/18] Update pom.xml --- pom.xml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/pom.xml b/pom.xml index 9e055c6..fd8800c 100644 --- a/pom.xml +++ b/pom.xml @@ -197,16 +197,6 @@ under the License. flink-test-utils ${flink.version} test - - - log4j - log4j - - - org.slf4j - slf4j-log4j12 - - From 0b8d0d9c36267d0dfa80e306a58902c05cb407bc Mon Sep 17 00:00:00 2001 From: Chesnay Schepler Date: Thu, 8 Dec 2022 12:58:12 +0100 Subject: [PATCH 18/18] pom cleanup --- flink-connector-opensearch-e2e-tests/pom.xml | 2 +- flink-connector-opensearch/pom.xml | 3 +-- flink-sql-connector-opensearch/pom.xml | 2 +- pom.xml | 7 +------ 4 files changed, 4 insertions(+), 10 deletions(-) diff --git a/flink-connector-opensearch-e2e-tests/pom.xml b/flink-connector-opensearch-e2e-tests/pom.xml index 2510ccc..6fd49a0 100644 --- a/flink-connector-opensearch-e2e-tests/pom.xml +++ b/flink-connector-opensearch-e2e-tests/pom.xml @@ -26,7 +26,7 @@ under the License. org.apache.flink flink-connector-opensearch-parent - 1.0.0-SNAPSHOT + 1.0-SNAPSHOT flink-connector-opensearch-e2e-tests diff --git a/flink-connector-opensearch/pom.xml b/flink-connector-opensearch/pom.xml index ab4f718..fdc52b2 100644 --- a/flink-connector-opensearch/pom.xml +++ b/flink-connector-opensearch/pom.xml @@ -26,8 +26,7 @@ under the License. org.apache.flink flink-connector-opensearch-parent - 1.0.0-SNAPSHOT - .. + 1.0-SNAPSHOT flink-connector-opensearch diff --git a/flink-sql-connector-opensearch/pom.xml b/flink-sql-connector-opensearch/pom.xml index 9f3fd72..d993ab4 100644 --- a/flink-sql-connector-opensearch/pom.xml +++ b/flink-sql-connector-opensearch/pom.xml @@ -26,7 +26,7 @@ under the License. org.apache.flink flink-connector-opensearch-parent - 1.0.0-SNAPSHOT + 1.0-SNAPSHOT flink-sql-connector-opensearch diff --git a/pom.xml b/pom.xml index fd8800c..079703b 100644 --- a/pom.xml +++ b/pom.xml @@ -29,7 +29,7 @@ under the License. org.apache.flink flink-connector-opensearch-parent - 1.0.0-SNAPSHOT + 1.0-SNAPSHOT Flink : Connectors : Opensearch : Parent pom https://flink.apache.org @@ -58,9 +58,6 @@ under the License. - UTF-8 - UTF-8 - 1.16.0 15.0 @@ -77,8 +74,6 @@ under the License. 1.7.36 2.17.2 - - flink-connector-opensearch-parent