Skip to content

Commit

Permalink
Merge pull request tensorflow#1996 from ROCmSoftwarePlatform/develop-…
Browse files Browse the repository at this point in the history
…upstream-sync-230220

Develop upstream sync 230220
  • Loading branch information
jayfurmanek authored Feb 23, 2023
2 parents 736ffc2 + cdcd909 commit ecf7f52
Show file tree
Hide file tree
Showing 913 changed files with 23,905 additions and 18,896 deletions.
2 changes: 1 addition & 1 deletion .github/bot_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
assignees:
- synandi
- tiruk007
- gaikwadrahul8
- tilakrayal
- pjpratik
# A list of assignees for compiler folder
compiler_assignees:
Expand Down
29 changes: 23 additions & 6 deletions RELEASE.md
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
# Release 2.13.0

# Breaking Changes
## Breaking Changes

* <DOCUMENT BREAKING CHANGES HERE>
* <THIS SECTION SHOULD CONTAIN API, ABI AND BEHAVIORAL BREAKING CHANGES>

# Known Caveats
## Known Caveats

* <CAVEATS REGARDING THE RELEASE (BUT NOT BREAKING CHANGES).>
* <ADDING/BUMPING DEPENDENCIES SHOULD GO HERE>
* <KNOWN LACK OF SUPPORT ON SOME PLATFORM, SHOULD GO HERE>

# Major Features and Improvements
## Major Features and Improvements

* `tf.lite`:

* Add 16-bit and 64-bit float type support for built-in op `cast`.
* The Python TF Lite Interpreter bindings now have an option
`experimental_disable_delegate_clustering` to turn-off delegate
clustering.

* `tf.keras`

Expand All @@ -27,14 +30,20 @@
graph). This can be used for integrating metrics from external Python
libraries (like sklearn or pycocotools) into Keras as first-class Keras
metrics.
* The `SidecarEvaluatorModelExport` callback has been added to Keras as
`keras.callbacks.SidecarEvaluatorModelExport`. This callback allows for
exporting the model the best-scoring model as evaluated by a
`SidecarEvaluator` evaluator. The evaluator regularly evaluates the
model and exports it if the user-defined comparison function determines
that it is an improvement.

# Bug Fixes and Other Changes
## Bug Fixes and Other Changes

* <SIMILAR TO ABOVE SECTION, BUT FOR OTHER IMPORTANT CHANGES / BUG FIXES>
* <IF A CHANGE CLOSES A GITHUB ISSUE, IT SHOULD BE DOCUMENTED HERE>
* <NOTES SHOULD BE GROUPED PER AREA>

# Thanks to our Contributors
## Thanks to our Contributors

This release contains contributions from many people at Google, as well as:

Expand Down Expand Up @@ -202,7 +211,15 @@ This release contains contributions from many people at Google, as well as:
`rerandomize_each_iteration=True`, the `sample_from_datasets()`
operation will use a different (deterministic) sequence of numbers every
epoch.

* Added a new field, `warm_start`, to
`tf.data.experimental.OptimizationOptions`. If it is set to `True`,
tf.data will start background threads of asynchronous
transformations upon iterator creation (as opposed to upon first call
to `GetNext`). To enable this behavior, set `warm_start=True` in
`tf.data.experimental.OptimizationOptions`. It should be noted that this
possibly improves the latency of the initial 'GetNext' call at the
expense of requiring more memory to hold prefetched elements between
the time of iterator construction and usage.
* `tf.test`:

* Added `tf.test.experimental.sync_devices`, which is useful for
Expand Down
3 changes: 3 additions & 0 deletions tensorflow/c/c_api_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ struct TF_OperationDescription {

struct TF_Operation {
tensorflow::Node node;

private:
~TF_Operation() = default;
};

struct TF_Session {
Expand Down
6 changes: 3 additions & 3 deletions tensorflow/c/experimental/next_pluggable_device/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,17 @@ cc_library(
visibility = ["//visibility:public"],
deps = [
"//tensorflow/c:c_api",
"//tensorflow/c:kernels",
"//tensorflow/c:kernels_experimental_hdrs",
"//tensorflow/c:kernels_hdrs",
"//tensorflow/c:tf_status_helper",
"//tensorflow/c:tf_status_internal",
"//tensorflow/c:tf_tensor_internal",
"//tensorflow/compiler/jit:xla_launch_util",
"//tensorflow/compiler/jit:variable_info",
"//tensorflow/compiler/jit:variable_info_util",
"//tensorflow/compiler/xla/pjrt:pjrt_c_api_client",
"//tensorflow/compiler/xla/pjrt:pjrt_client",
"//tensorflow/compiler/xla/pjrt/c:pjrt_c_api_hdrs",
"//tensorflow/core:framework",
"//tensorflow/core/common_runtime/next_pluggable_device",
"//tensorflow/core/common_runtime/next_pluggable_device:plugin_resource",
"//tensorflow/core/platform:status",
"//tensorflow/core/tfrt/common:async_value_tensor",
Expand Down
17 changes: 5 additions & 12 deletions tensorflow/c/experimental/next_pluggable_device/c_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ limitations under the License.
#include "tensorflow/c/tf_status_internal.h"
#include "tensorflow/c/tf_tensor.h"
#include "tensorflow/c/tf_tensor_internal.h"
#include "tensorflow/compiler/jit/xla_launch_util.h"
#include "tensorflow/compiler/jit/variable_info.h"
#include "tensorflow/compiler/jit/variable_info_util.h"
#include "tensorflow/compiler/xla/pjrt/pjrt_c_api_client.h"
#include "tensorflow/compiler/xla/pjrt/pjrt_client.h"
#include "tensorflow/core/common_runtime/next_pluggable_device/next_pluggable_device.h"
#include "tensorflow/core/common_runtime/next_pluggable_device/plugin_resource.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/platform/status.h"
Expand All @@ -44,13 +44,6 @@ TF_Device* TF_GetDevice(TF_OpKernelContext* ctx) {
return reinterpret_cast<TF_Device*>(cc_ctx->device());
}

size_t TF_GetDeviceOrdinal(TF_Device* device) {
// TODO(chuanhao): make GetDeviceOrdinal a virtual member function in the base
// device class, instead of casting to `NextPluggableDevice`.
auto cc_device = reinterpret_cast<tensorflow::NextPluggableDevice*>(device);
return cc_device->GetDeviceOrdinal();
}

// -------------------------- Resource ---------------------------------------
void TF_CreatePluginResource(TF_OpKernelContext* ctx,
const char* container_name,
Expand Down Expand Up @@ -101,7 +94,7 @@ struct TF_VariableInfo {
TF_VariableInfo() = delete;
// TF_VariableInfo is constructed here by TensorFlow, and will be passed to
// plugin as a opaque pointer. Plugin will need to call C APIs below to
// operate on TF_VaribleInfo (such as allocate temp tensor for the `var` held
// operate on TF_VariableInfo (such as allocate temp tensor for the `var` held
// by the underlying tensorflow::VariableInfo.
TF_VariableInfo(int index, const std::string& name, tensorflow::Var* var) {
var_info = tensorflow::VariableInfo{index, name, var};
Expand Down Expand Up @@ -258,7 +251,7 @@ void TF_CreateAndSetPjRtCApiClient(const char* device_type, TF_Status* status) {

PJRT_Client* TF_GetPjRtCClient(const char* device_type, TF_Status* status) {
tsl::StatusOr<xla::PjRtClient*> pjrt_client =
tensorflow::GetOrCreatePjRtClient(tensorflow::DeviceType(device_type));
tensorflow::GetPjRtClient(tensorflow::DeviceType(device_type));
if (!pjrt_client.ok()) {
tensorflow::Set_TF_Status_from_Status(status, pjrt_client.status());
return nullptr;
Expand Down Expand Up @@ -312,7 +305,7 @@ void TF_CreatePjRtBuffer(TF_Tensor* c_tensor, PJRT_Buffer* c_buffer,
return;
}
auto pjrt_client =
tensorflow::GetOrCreatePjRtClient(tensorflow::DeviceType(device_type));
tensorflow::GetPjRtClient(tensorflow::DeviceType(device_type));
if (!pjrt_client.ok()) {
tensorflow::Set_TF_Status_from_Status(status, pjrt_client.status());
return;
Expand Down
2 changes: 0 additions & 2 deletions tensorflow/c/experimental/next_pluggable_device/c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,6 @@ typedef struct TF_VariableInfo TF_VariableInfo;
// but in theory every this is a C API for every kind of device.
TF_CAPI_EXPORT extern TF_Device* TF_GetDevice(TF_OpKernelContext* ctx);

TF_CAPI_EXPORT extern size_t TF_GetDeviceOrdinal(TF_Device* device);

// -------------------------- Resource ---------------------------------------
// Create a `tensorflow::PluginResource` to the ResourceMgr provided by the
// `ctx`. The `tensorflow::PluginResource` wraps a resource by plugin (as a
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,6 @@ void SynchronizeAllActivity(const SP_Device* const device,
TF_Bool HostCallback(const SP_Device* const device, SP_Stream stream,
SE_StatusCallbackFn const callback_fn,
void* const callback_arg) {
TSL_Status* status_ignored = TSL_NewStatus();
callback_fn(callback_arg, status_ignored);
TSL_DeleteStatus(status_ignored);
return true;
}

Expand Down
17 changes: 0 additions & 17 deletions tensorflow/c/python_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,23 +67,6 @@ void UpdateEdge(TF_Graph* graph, TF_Output new_src, TF_Input dst,
TF_UpdateEdge(graph, new_src, dst, status);
}

void RemoveAllControlInputs(TF_Graph* graph, TF_Operation* op) {
mutex_lock l(graph->mu);
std::vector<const Edge*> control_edges;
for (const Edge* edge : op->node.in_edges()) {
if (!edge->IsControlEdge()) continue;
control_edges.push_back(edge);
}
for (const Edge* edge : control_edges) {
graph->graph.RemoveControlEdge(edge);
}
}

void SetRequireShapeInferenceFns(TF_Graph* graph, bool require) {
mutex_lock l(graph->mu);
graph->refiner.set_require_shape_inference_fns(require);
}

void ExtendSession(TF_Session* session, TF_Status* status) {
ExtendSessionGraphHelper(session, status);
session->extend_before_run = false;
Expand Down
6 changes: 0 additions & 6 deletions tensorflow/c/python_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,6 @@ void SetRequestedDevice(TF_Graph* graph, TF_Operation* op, const char* device);
void UpdateEdge(TF_Graph* graph, TF_Output new_src, TF_Input dst,
TF_Status* status);

void RemoveAllControlInputs(TF_Graph* graph, TF_Operation* op);

// Sets whether ops missing a shape inference function should trigger an
// error. The default is true.
void SetRequireShapeInferenceFns(TF_Graph* graph, bool require);

// Extends `session` with any new operations added to its associated graph.
// Usually this happens automatically in TF_SessionRun. After this is called,
// TF_SessionRun will no longer extend the session on every call.
Expand Down
1 change: 0 additions & 1 deletion tensorflow/cc/saved_model/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,6 @@ tf_cc_test(
tf_cc_fuzz_test(
name = "saved_model_fuzz",
srcs = ["saved_model_fuzz.cc"],
componentid = 893731, # Core > ML > Frameworks > TensorFlow > Core > Saved Model
deps = [
":constants",
":loader",
Expand Down
82 changes: 77 additions & 5 deletions tensorflow/compiler/jit/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,10 @@ cc_library(

XLA_DEVICE_DEPS = [
":common",
":pjrt_device_context",
":variable_info",
":variable_info_util",
":xla_compile_util",
":xla_launch_util",
":xla_tensor",
"@com_google_absl//absl/base",
Expand Down Expand Up @@ -280,6 +284,7 @@ XLA_DEVICE_DEPS = [
"//tensorflow/core/kernels/data:prefetch_dataset_op",
"//tensorflow/core/kernels/data:options_dataset_op",
"//tensorflow/core/profiler/lib:traceme",
"//tensorflow/core/tfrt/common:async_value_tensor",
"//tensorflow/compiler/xla/stream_executor:tf_allocator_adapter",
"//tensorflow/compiler/xla/stream_executor/platform",
]
Expand Down Expand Up @@ -403,6 +408,50 @@ cc_library(

# Internal targets below this point.

cc_library(
name = "variable_info",
srcs = ["variable_info.cc"],
hdrs = ["variable_info.h"],
visibility = [
":internal",
# We reuse VariableInfo in TFRT's implementation of TpuExecuteOp.
"//learning/brain/tfrt/tf_tpu:__pkg__",
"//learning/brain/tfrt/tpu_plugin:__pkg__",
"//learning/brain/tfrt/tpu_common:__pkg__",
"//tensorflow/core/common_runtime/next_pluggable_device:__pkg__",
],
deps = [
"//tensorflow/core:core_cpu_internal",
"//tensorflow/core:framework",
"//tensorflow/core:framework_internal",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
],
)

cc_library(
name = "variable_info_util",
srcs = ["variable_info_util.cc"],
hdrs = ["variable_info_util.h"],
visibility = [
":internal",
# We reuse VariableInfo in TFRT's implementation of TpuExecuteOp.
"//learning/brain/tfrt/tf_tpu:__pkg__",
"//learning/brain/tfrt/tpu_plugin:__pkg__",
"//learning/brain/tfrt/tpu_common:__pkg__",
"//tensorflow/core/common_runtime/next_pluggable_device:__pkg__",
],
deps = [
":variable_info",
"//tensorflow/core:core_cpu_internal",
"//tensorflow/core:framework",
"//tensorflow/core:framework_internal",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"@com_google_absl//absl/algorithm:container",
],
)

cc_library(
name = "xla_launch_util",
srcs = ["xla_launch_util.cc"],
Expand All @@ -416,14 +465,13 @@ cc_library(
"//tensorflow/core/common_runtime/next_pluggable_device:__pkg__",
],
deps = [
":common",
":variable_info",
":variable_info_util",
":xla_tensor",
"//tensorflow/compiler/tf2xla:common",
"//tensorflow/compiler/tf2xla:xla_compiler",
"//tensorflow/compiler/xla:shape_util",
"//tensorflow/compiler/xla:status_macros",
"//tensorflow/compiler/xla:statusor",
"//tensorflow/compiler/xla/client:client_library",
"//tensorflow/compiler/xla/client:local_client",
"//tensorflow/compiler/xla/service:shaped_buffer",
"//tensorflow/compiler/xla/stream_executor:device_memory_allocator",
Expand All @@ -436,7 +484,6 @@ cc_library(
"//tensorflow/core:protos_all_cc",
"@com_google_absl//absl/algorithm:container",
"@com_google_absl//absl/cleanup",
"@com_google_absl//absl/memory",
],
)

Expand Down Expand Up @@ -549,6 +596,8 @@ cc_library(
deps = [
":compilability_check_util",
":device_compiler",
":variable_info",
":variable_info_util",
":xla_device_no_jit_rewrite_registration",
":xla_launch_util",
"//tensorflow/compiler/tf2xla:xla_compiler",
Expand Down Expand Up @@ -1298,6 +1347,8 @@ cc_library(
":xla_device_compiler_client",
"//tensorflow/compiler/tf2xla:xla_compiler",
"//tensorflow/compiler/xla:util",
"//tensorflow/compiler/xla/client:local_client",
"//tensorflow/compiler/xla/pjrt:pjrt_client",
"//tensorflow/compiler/xla/service:hlo_proto_cc",
"//tensorflow/core:core_cpu_base",
"//tensorflow/core:framework",
Expand Down Expand Up @@ -1355,7 +1406,7 @@ cc_library(
"//tensorflow/core/platform:status",
"//tensorflow/core/profiler/lib:traceme",
"//tensorflow/core/tfrt/common:async_value_tensor",
"//tensorflow/core/tfrt/common:pjrt_util",
"//tensorflow/core/tfrt/common:create_pjrt_client_util",
],
)

Expand Down Expand Up @@ -1392,6 +1443,7 @@ tf_cc_test(
deps = [
":device_compiler_client",
":device_executable_persistor",
":pjrt_device_compiler_client",
":xla_compilation_cache_proto_cc",
":xla_cpu_device",
":xla_cpu_jit",
Expand All @@ -1402,10 +1454,14 @@ tf_cc_test(
"//tensorflow/compiler/xla/client:client_library",
"//tensorflow/compiler/xla/client:executable_build_options",
"//tensorflow/compiler/xla/client:local_client",
"//tensorflow/compiler/xla/pjrt:pjrt_client",
"//tensorflow/compiler/xla/pjrt:tfrt_cpu_pjrt_client",
"//tensorflow/core:test",
"//tensorflow/core/platform:errors",
"//tensorflow/core/platform:status_matchers",
"//tensorflow/core/platform:statusor",
"//tensorflow/core/tfrt/common:create_pjrt_client_util",
"//tensorflow/core/tfrt/common:pjrt_util",
"@com_google_googletest//:gtest_main",
],
)
Expand Down Expand Up @@ -1459,3 +1515,19 @@ tf_cuda_cc_test(
"@com_google_googletest//:gtest_main",
],
)

tf_cuda_cc_test(
name = "device_context_test",
srcs = ["device_context_test.cc"],
tags = tf_cuda_tests_tags(),
deps = [
":flags",
":xla_device",
":xla_gpu_device",
"//tensorflow/compiler/tf2xla:xla_op_registry",
"//tensorflow/core:framework_internal",
"//tensorflow/core:test",
"//tensorflow/core/framework:tensor_testutil",
"@com_google_googletest//:gtest_main",
],
)
Loading

0 comments on commit ecf7f52

Please sign in to comment.