Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fluid bytecode c++ executor. #10220

Closed
wants to merge 16 commits into from
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ option(WITH_DISTRIBUTE "Compile with grpc distributed support" OFF)
option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF)
option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF)
option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF)
option(WITH_C_DESC_EXECUTOR "Build with C++ program desc executor." OFF)

# CMAKE_BUILD_TYPE
if(NOT CMAKE_BUILD_TYPE)
Expand Down Expand Up @@ -228,3 +229,8 @@ endif()
if(WITH_DOC)
add_subdirectory(doc)
endif()

if(WITH_C_DESC_EXECUTOR)
message(STATUS "build WITH_C_DESC_EXECUTOR")
add_subdirectory(tools/execute_program_desc)
endif()
10 changes: 9 additions & 1 deletion python/paddle/fluid/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,8 @@ def run(self,
fetch_var_name='fetch',
scope=None,
return_numpy=True,
use_program_cache=False):
use_program_cache=False,
save_program_to_file=""):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still don't want to put "save_program_to_file" as a argument to Executor. Saving program should be in a separate API, such as "save_inference_model".

""" Run program by this Executor. Feed data by feed map, fetch result by fetch_list.

Python executor takes a program, add feed operators and fetch operators to this program according
Expand All @@ -294,6 +295,7 @@ def run(self,
:param scope: the scope used to run this program, you can switch it to different scope. default is global_scope
:param return_numpy: if convert the fetched tensor to numpy
:param use_program_cache: set use_program_cache to true if program not changed compare to the last step.
:param save_program_to_file: save program desc to the file before running.
:return: result according to fetch_list.
"""
if feed is None:
Expand Down Expand Up @@ -333,6 +335,12 @@ def run(self,
fetch_var_name=fetch_var_name)

self._feed_data(program, feed, feed_var_name, scope)

# TODO(gongwb): does a program should be saved in run function?
if len(save_program_to_file) > 0:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure we should do the save here...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The programdesc is modified in run function.And sometimes a user can't get chance to save it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe run() shouldn't modify the programdesc? Or we should save after run is called?

with open(save_program_to_file, 'w') as f:
f.write(program.desc.serialize_to_string())

self.executor.run(program.desc, scope, 0, True, True)
outs = self._fetch_data(fetch_list, fetch_var_name, scope)
if return_numpy:
Expand Down
1 change: 1 addition & 0 deletions python/paddle/fluid/parallel_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def __init__(self,
gradients of each device and scaled gradients would be
aggregated. Otherwise, a customized scale value should be fed
to the network.
save_program_to_file: Save the program desc which will be runed to this file.

Returns:
A ParallelExecutor object.
Expand Down
2 changes: 2 additions & 0 deletions tools/execute_program_desc/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
cc_binary(execute_program_desc SRCS execute_program_desc.cc DEPS proto_desc memory executor prune init
profiler feed_fetch_method parallel_executor ${GLOB_OP_LIB})
127 changes: 127 additions & 0 deletions tools/execute_program_desc/execute_program_desc.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <iostream>
#include <string>

#include "gflags/gflags.h"
#include "paddle/fluid/framework/details/op_registry.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/init.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/pybind/pybind.h"

DEFINE_string(start_up_proto, "", "start up proto file");
DEFINE_string(loop_proto, "", "loop proto file");
DEFINE_string(executor_device, "CPU", "executor's place:GPU or CPU");
DEFINE_int32(executor_device_id, 0, "GPU device id");

bool read_from_file(const std::string& file, char** buf, int64_t* buf_len) {
FILE* f = fopen(file.c_str(), "rb");
if (NULL == f) {
fprintf(stderr, "open %s error\n", file.c_str());
return false;
}

fseek(f, 0, SEEK_END);
int64_t fsize = ftell(f);
fseek(f, 0, SEEK_SET); // same as rewind(f);

*buf = static_cast<char*>(malloc(fsize + 1));
if (fread(*buf, fsize, 1, f) != 1) {
fclose(f);
return false;
}

*buf_len = fsize;

(*buf)[fsize] = 0;
fclose(f);
return true;
}

using namespace paddle; // NOLINT

std::unique_ptr<framework::ProgramDesc> load_desc(const std::string& file) {
char* buf = NULL;
std::unique_ptr<char[]> tmp(buf);
int64_t buf_len = 0;

if (!read_from_file(file, &buf, &buf_len)) {
return NULL;
}

framework::proto::ProgramDesc proto;
if (!proto.ParseFromArray(buf, buf_len)) {
fprintf(stderr, "parse from %s error!\n", file.c_str());
return NULL;
}

return std::unique_ptr<framework::ProgramDesc>(
new framework::ProgramDesc(proto));
}

int main(int argc, char** argv) {
// init.
google::ParseCommandLineFlags(&argc, &argv, true);
framework::InitGLOG(argv[0]);
framework::InitDevices(true);

// check arguments.
if (FLAGS_start_up_proto.empty()) {
fprintf(stderr, "please set start_up_proto's path\n");
return -1;
}

if (FLAGS_loop_proto.empty()) {
fprintf(stderr, "please set loop_proto's path\n");
return -1;
}

framework::ProgramDesc program;
framework::Scope scope;

std::unique_ptr<framework::ProgramDesc> start_up =
load_desc(FLAGS_start_up_proto);
std::unique_ptr<framework::ProgramDesc> loop = load_desc(FLAGS_loop_proto);

std::string place_str = FLAGS_executor_device;
std::transform(place_str.begin(),
place_str.end(),
place_str.begin(),
[](unsigned char ch) { return toupper(ch); });

std::unique_ptr<framework::Executor> exe;
if (place_str == "CPU") {
platform::CPUPlace place;
exe.reset(new framework::Executor(place));
} else if (place_str == "GPU") {
platform::CUDAPlace place(FLAGS_executor_device_id);
exe.reset(new framework::Executor(place));
} else {
fprintf(stderr, "unkown device:%s\n", FLAGS_executor_device.c_str());
return -1;
}

exe->Run(*start_up, &scope, 0, false, true);
exe->Run(*loop, &scope, 0, false, true);

return 0;
}