Merge pull request tensorflow#1996 from ROCmSoftwarePlatform/develop-…

…upstream-sync-230220 Develop upstream sync 230220
fsx950223 · Feb 23, 2023 · ecf7f52 · ecf7f52
2 parents 736ffc2 + cdcd909
commit ecf7f52
Show file tree

Hide file tree

Showing 913 changed files with 23,905 additions and 18,896 deletions.
diff --git a/.github/bot_config.yml b/.github/bot_config.yml
@@ -17,7 +17,7 @@
 assignees:
    - synandi
    - tiruk007
-   - gaikwadrahul8
+   - tilakrayal
    - pjpratik
 # A list of assignees for compiler folder
 compiler_assignees:

diff --git a/RELEASE.md b/RELEASE.md
@@ -1,21 +1,24 @@
 # Release 2.13.0
 
-# Breaking Changes
+## Breaking Changes
 
 * <DOCUMENT BREAKING CHANGES HERE>
 * <THIS SECTION SHOULD CONTAIN API, ABI AND BEHAVIORAL BREAKING CHANGES>
 
-# Known Caveats
+## Known Caveats
 
 * <CAVEATS REGARDING THE RELEASE (BUT NOT BREAKING CHANGES).>
 * <ADDING/BUMPING DEPENDENCIES SHOULD GO HERE>
 * <KNOWN LACK OF SUPPORT ON SOME PLATFORM, SHOULD GO HERE>
 
-# Major Features and Improvements
+## Major Features and Improvements
 
 *   `tf.lite`:
 
     *   Add 16-bit and 64-bit float type support for built-in op `cast`.
+    *   The Python TF Lite Interpreter bindings now have an option
+        `experimental_disable_delegate_clustering` to turn-off delegate
+        clustering.
 
 *   `tf.keras`
 
@@ -27,14 +30,20 @@
         graph). This can be used for integrating metrics from external Python
         libraries (like sklearn or pycocotools) into Keras as first-class Keras
         metrics.
+    *   The `SidecarEvaluatorModelExport` callback has been added to Keras as
+        `keras.callbacks.SidecarEvaluatorModelExport`. This callback allows for
+        exporting the model the best-scoring model as evaluated by a
+        `SidecarEvaluator` evaluator. The evaluator regularly evaluates the
+        model and exports it if the user-defined comparison function determines
+        that it is an improvement.
 
-# Bug Fixes and Other Changes
+## Bug Fixes and Other Changes
 
 * <SIMILAR TO ABOVE SECTION, BUT FOR OTHER IMPORTANT CHANGES / BUG FIXES>
 * <IF A CHANGE CLOSES A GITHUB ISSUE, IT SHOULD BE DOCUMENTED HERE>
 * <NOTES SHOULD BE GROUPED PER AREA>
 
-# Thanks to our Contributors
+## Thanks to our Contributors
 
 This release contains contributions from many people at Google, as well as:
 
@@ -202,7 +211,15 @@ This release contains contributions from many people at Google, as well as:
         `rerandomize_each_iteration=True`, the `sample_from_datasets()`
         operation will use a different (deterministic) sequence of numbers every
         epoch.
-
+    *   Added a new field, `warm_start`, to
+        `tf.data.experimental.OptimizationOptions`. If it is set to `True`,
+        tf.data will start background threads of asynchronous
+        transformations upon iterator creation (as opposed to upon first call
+        to `GetNext`). To enable this behavior, set `warm_start=True` in
+        `tf.data.experimental.OptimizationOptions`. It should be noted that this
+        possibly improves the latency of the initial 'GetNext' call at the
+        expense of requiring more memory to hold prefetched elements between
+        the time of iterator construction and usage.
 *   `tf.test`:
 
     *   Added `tf.test.experimental.sync_devices`, which is useful for

diff --git a/tensorflow/c/c_api_internal.h b/tensorflow/c/c_api_internal.h
@@ -115,6 +115,9 @@ struct TF_OperationDescription {
 
 struct TF_Operation {
   tensorflow::Node node;
+
+ private:
+  ~TF_Operation() = default;
 };
 
 struct TF_Session {

diff --git a/tensorflow/c/experimental/next_pluggable_device/BUILD b/tensorflow/c/experimental/next_pluggable_device/BUILD
@@ -12,17 +12,17 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         "//tensorflow/c:c_api",
-        "//tensorflow/c:kernels",
         "//tensorflow/c:kernels_experimental_hdrs",
+        "//tensorflow/c:kernels_hdrs",
         "//tensorflow/c:tf_status_helper",
         "//tensorflow/c:tf_status_internal",
         "//tensorflow/c:tf_tensor_internal",
-        "//tensorflow/compiler/jit:xla_launch_util",
+        "//tensorflow/compiler/jit:variable_info",
+        "//tensorflow/compiler/jit:variable_info_util",
         "//tensorflow/compiler/xla/pjrt:pjrt_c_api_client",
         "//tensorflow/compiler/xla/pjrt:pjrt_client",
         "//tensorflow/compiler/xla/pjrt/c:pjrt_c_api_hdrs",
         "//tensorflow/core:framework",
-        "//tensorflow/core/common_runtime/next_pluggable_device",
         "//tensorflow/core/common_runtime/next_pluggable_device:plugin_resource",
         "//tensorflow/core/platform:status",
         "//tensorflow/core/tfrt/common:async_value_tensor",

diff --git a/tensorflow/c/experimental/next_pluggable_device/c_api.cc b/tensorflow/c/experimental/next_pluggable_device/c_api.cc
@@ -26,10 +26,10 @@ limitations under the License.
 #include "tensorflow/c/tf_status_internal.h"
 #include "tensorflow/c/tf_tensor.h"
 #include "tensorflow/c/tf_tensor_internal.h"
-#include "tensorflow/compiler/jit/xla_launch_util.h"
+#include "tensorflow/compiler/jit/variable_info.h"
+#include "tensorflow/compiler/jit/variable_info_util.h"
 #include "tensorflow/compiler/xla/pjrt/pjrt_c_api_client.h"
 #include "tensorflow/compiler/xla/pjrt/pjrt_client.h"
-#include "tensorflow/core/common_runtime/next_pluggable_device/next_pluggable_device.h"
 #include "tensorflow/core/common_runtime/next_pluggable_device/plugin_resource.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/platform/status.h"
@@ -44,13 +44,6 @@ TF_Device* TF_GetDevice(TF_OpKernelContext* ctx) {
   return reinterpret_cast<TF_Device*>(cc_ctx->device());
 }
 
-size_t TF_GetDeviceOrdinal(TF_Device* device) {
-  // TODO(chuanhao): make GetDeviceOrdinal a virtual member function in the base
-  // device class, instead of casting to `NextPluggableDevice`.
-  auto cc_device = reinterpret_cast<tensorflow::NextPluggableDevice*>(device);
-  return cc_device->GetDeviceOrdinal();
-}
-
 // --------------------------  Resource  ---------------------------------------
 void TF_CreatePluginResource(TF_OpKernelContext* ctx,
                              const char* container_name,
@@ -101,7 +94,7 @@ struct TF_VariableInfo {
   TF_VariableInfo() = delete;
   // TF_VariableInfo is constructed here by TensorFlow, and will be passed to
   // plugin as a opaque pointer. Plugin will need to call C APIs below to
-  // operate on TF_VaribleInfo (such as allocate temp tensor for the `var` held
+  // operate on TF_VariableInfo (such as allocate temp tensor for the `var` held
   // by the underlying tensorflow::VariableInfo.
   TF_VariableInfo(int index, const std::string& name, tensorflow::Var* var) {
     var_info = tensorflow::VariableInfo{index, name, var};
@@ -258,7 +251,7 @@ void TF_CreateAndSetPjRtCApiClient(const char* device_type, TF_Status* status) {
 
 PJRT_Client* TF_GetPjRtCClient(const char* device_type, TF_Status* status) {
   tsl::StatusOr<xla::PjRtClient*> pjrt_client =
-      tensorflow::GetOrCreatePjRtClient(tensorflow::DeviceType(device_type));
+      tensorflow::GetPjRtClient(tensorflow::DeviceType(device_type));
   if (!pjrt_client.ok()) {
     tensorflow::Set_TF_Status_from_Status(status, pjrt_client.status());
     return nullptr;
@@ -312,7 +305,7 @@ void TF_CreatePjRtBuffer(TF_Tensor* c_tensor, PJRT_Buffer* c_buffer,
     return;
   }
   auto pjrt_client =
-      tensorflow::GetOrCreatePjRtClient(tensorflow::DeviceType(device_type));
+      tensorflow::GetPjRtClient(tensorflow::DeviceType(device_type));
   if (!pjrt_client.ok()) {
     tensorflow::Set_TF_Status_from_Status(status, pjrt_client.status());
     return;

diff --git a/tensorflow/c/experimental/next_pluggable_device/c_api.h b/tensorflow/c/experimental/next_pluggable_device/c_api.h
@@ -60,8 +60,6 @@ typedef struct TF_VariableInfo TF_VariableInfo;
 // but in theory every this is a C API for every kind of device.
 TF_CAPI_EXPORT extern TF_Device* TF_GetDevice(TF_OpKernelContext* ctx);
 
-TF_CAPI_EXPORT extern size_t TF_GetDeviceOrdinal(TF_Device* device);
-
 // --------------------------  Resource  ---------------------------------------
 // Create a `tensorflow::PluginResource` to the ResourceMgr provided by the
 // `ctx`. The `tensorflow::PluginResource` wraps a resource by plugin (as a

diff --git a/tensorflow/c/experimental/stream_executor/stream_executor_test_util.cc b/tensorflow/c/experimental/stream_executor/stream_executor_test_util.cc
@@ -83,9 +83,6 @@ void SynchronizeAllActivity(const SP_Device* const device,
 TF_Bool HostCallback(const SP_Device* const device, SP_Stream stream,
                      SE_StatusCallbackFn const callback_fn,
                      void* const callback_arg) {
-  TSL_Status* status_ignored = TSL_NewStatus();
-  callback_fn(callback_arg, status_ignored);
-  TSL_DeleteStatus(status_ignored);
   return true;
 }
 

diff --git a/tensorflow/c/python_api.cc b/tensorflow/c/python_api.cc
@@ -67,23 +67,6 @@ void UpdateEdge(TF_Graph* graph, TF_Output new_src, TF_Input dst,
   TF_UpdateEdge(graph, new_src, dst, status);
 }
 
-void RemoveAllControlInputs(TF_Graph* graph, TF_Operation* op) {
-  mutex_lock l(graph->mu);
-  std::vector<const Edge*> control_edges;
-  for (const Edge* edge : op->node.in_edges()) {
-    if (!edge->IsControlEdge()) continue;
-    control_edges.push_back(edge);
-  }
-  for (const Edge* edge : control_edges) {
-    graph->graph.RemoveControlEdge(edge);
-  }
-}
-
-void SetRequireShapeInferenceFns(TF_Graph* graph, bool require) {
-  mutex_lock l(graph->mu);
-  graph->refiner.set_require_shape_inference_fns(require);
-}
-
 void ExtendSession(TF_Session* session, TF_Status* status) {
   ExtendSessionGraphHelper(session, status);
   session->extend_before_run = false;

diff --git a/tensorflow/c/python_api.h b/tensorflow/c/python_api.h
@@ -48,12 +48,6 @@ void SetRequestedDevice(TF_Graph* graph, TF_Operation* op, const char* device);
 void UpdateEdge(TF_Graph* graph, TF_Output new_src, TF_Input dst,
                 TF_Status* status);
 
-void RemoveAllControlInputs(TF_Graph* graph, TF_Operation* op);
-
-// Sets whether ops missing a shape inference function should trigger an
-// error. The default is true.
-void SetRequireShapeInferenceFns(TF_Graph* graph, bool require);
-
 // Extends `session` with any new operations added to its associated graph.
 // Usually this happens automatically in TF_SessionRun. After this is called,
 // TF_SessionRun will no longer extend the session on every call.

diff --git a/tensorflow/cc/saved_model/BUILD b/tensorflow/cc/saved_model/BUILD
@@ -439,7 +439,6 @@ tf_cc_test(
 tf_cc_fuzz_test(
     name = "saved_model_fuzz",
     srcs = ["saved_model_fuzz.cc"],
-    componentid = 893731,  # Core > ML > Frameworks > TensorFlow > Core > Saved Model
     deps = [
         ":constants",
         ":loader",

diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
@@ -227,6 +227,10 @@ cc_library(
 
 XLA_DEVICE_DEPS = [
     ":common",
+    ":pjrt_device_context",
+    ":variable_info",
+    ":variable_info_util",
+    ":xla_compile_util",
     ":xla_launch_util",
     ":xla_tensor",
     "@com_google_absl//absl/base",
@@ -280,6 +284,7 @@ XLA_DEVICE_DEPS = [
     "//tensorflow/core/kernels/data:prefetch_dataset_op",
     "//tensorflow/core/kernels/data:options_dataset_op",
     "//tensorflow/core/profiler/lib:traceme",
+    "//tensorflow/core/tfrt/common:async_value_tensor",
     "//tensorflow/compiler/xla/stream_executor:tf_allocator_adapter",
     "//tensorflow/compiler/xla/stream_executor/platform",
 ]
@@ -403,6 +408,50 @@ cc_library(
 
 # Internal targets below this point.
 
+cc_library(
+    name = "variable_info",
+    srcs = ["variable_info.cc"],
+    hdrs = ["variable_info.h"],
+    visibility = [
+        ":internal",
+        # We reuse VariableInfo in TFRT's implementation of TpuExecuteOp.
+        "//learning/brain/tfrt/tf_tpu:__pkg__",
+        "//learning/brain/tfrt/tpu_plugin:__pkg__",
+        "//learning/brain/tfrt/tpu_common:__pkg__",
+        "//tensorflow/core/common_runtime/next_pluggable_device:__pkg__",
+    ],
+    deps = [
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+cc_library(
+    name = "variable_info_util",
+    srcs = ["variable_info_util.cc"],
+    hdrs = ["variable_info_util.h"],
+    visibility = [
+        ":internal",
+        # We reuse VariableInfo in TFRT's implementation of TpuExecuteOp.
+        "//learning/brain/tfrt/tf_tpu:__pkg__",
+        "//learning/brain/tfrt/tpu_plugin:__pkg__",
+        "//learning/brain/tfrt/tpu_common:__pkg__",
+        "//tensorflow/core/common_runtime/next_pluggable_device:__pkg__",
+    ],
+    deps = [
+        ":variable_info",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "@com_google_absl//absl/algorithm:container",
+    ],
+)
+
 cc_library(
     name = "xla_launch_util",
     srcs = ["xla_launch_util.cc"],
@@ -416,14 +465,13 @@ cc_library(
         "//tensorflow/core/common_runtime/next_pluggable_device:__pkg__",
     ],
     deps = [
-        ":common",
+        ":variable_info",
+        ":variable_info_util",
         ":xla_tensor",
         "//tensorflow/compiler/tf2xla:common",
         "//tensorflow/compiler/tf2xla:xla_compiler",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:status_macros",
-        "//tensorflow/compiler/xla:statusor",
-        "//tensorflow/compiler/xla/client:client_library",
         "//tensorflow/compiler/xla/client:local_client",
         "//tensorflow/compiler/xla/service:shaped_buffer",
         "//tensorflow/compiler/xla/stream_executor:device_memory_allocator",
@@ -436,7 +484,6 @@ cc_library(
         "//tensorflow/core:protos_all_cc",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/cleanup",
-        "@com_google_absl//absl/memory",
     ],
 )
 
@@ -549,6 +596,8 @@ cc_library(
     deps = [
         ":compilability_check_util",
         ":device_compiler",
+        ":variable_info",
+        ":variable_info_util",
         ":xla_device_no_jit_rewrite_registration",
         ":xla_launch_util",
         "//tensorflow/compiler/tf2xla:xla_compiler",
@@ -1298,6 +1347,8 @@ cc_library(
         ":xla_device_compiler_client",
         "//tensorflow/compiler/tf2xla:xla_compiler",
         "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla/client:local_client",
+        "//tensorflow/compiler/xla/pjrt:pjrt_client",
         "//tensorflow/compiler/xla/service:hlo_proto_cc",
         "//tensorflow/core:core_cpu_base",
         "//tensorflow/core:framework",
@@ -1355,7 +1406,7 @@ cc_library(
         "//tensorflow/core/platform:status",
         "//tensorflow/core/profiler/lib:traceme",
         "//tensorflow/core/tfrt/common:async_value_tensor",
-        "//tensorflow/core/tfrt/common:pjrt_util",
+        "//tensorflow/core/tfrt/common:create_pjrt_client_util",
     ],
 )
 
@@ -1392,6 +1443,7 @@ tf_cc_test(
     deps = [
         ":device_compiler_client",
         ":device_executable_persistor",
+        ":pjrt_device_compiler_client",
         ":xla_compilation_cache_proto_cc",
         ":xla_cpu_device",
         ":xla_cpu_jit",
@@ -1402,10 +1454,14 @@ tf_cc_test(
         "//tensorflow/compiler/xla/client:client_library",
         "//tensorflow/compiler/xla/client:executable_build_options",
         "//tensorflow/compiler/xla/client:local_client",
+        "//tensorflow/compiler/xla/pjrt:pjrt_client",
+        "//tensorflow/compiler/xla/pjrt:tfrt_cpu_pjrt_client",
         "//tensorflow/core:test",
         "//tensorflow/core/platform:errors",
         "//tensorflow/core/platform:status_matchers",
         "//tensorflow/core/platform:statusor",
+        "//tensorflow/core/tfrt/common:create_pjrt_client_util",
+        "//tensorflow/core/tfrt/common:pjrt_util",
         "@com_google_googletest//:gtest_main",
     ],
 )
@@ -1459,3 +1515,19 @@ tf_cuda_cc_test(
         "@com_google_googletest//:gtest_main",
     ],
 )
+
+tf_cuda_cc_test(
+    name = "device_context_test",
+    srcs = ["device_context_test.cc"],
+    tags = tf_cuda_tests_tags(),
+    deps = [
+        ":flags",
+        ":xla_device",
+        ":xla_gpu_device",
+        "//tensorflow/compiler/tf2xla:xla_op_registry",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:test",
+        "//tensorflow/core/framework:tensor_testutil",
+        "@com_google_googletest//:gtest_main",
+    ],
+)