Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

logger manager #45909

Merged
merged 16 commits into from
Sep 20, 2022
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions paddle/scripts/paddle_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -991,7 +991,7 @@ function generate_upstream_develop_api_spec() {
mkdir -p ${PADDLE_ROOT}/build/pr_whl && mv ${PADDLE_ROOT}/build/python/dist/*.whl ${PADDLE_ROOT}/build/pr_whl/
echo "pr_whl_size: ${pr_whl_size}"

rm -rf ${PADDLE_ROOT}/build/Makefile ${PADDLE_ROOT}/build/CMakeCache.txt
rm -rf ${PADDLE_ROOT}/build/Makefile ${PADDLE_ROOT}/build/CMakeCache.txt ${PADDLE_ROOT}/build/python/paddle
cmake_change=`git diff --name-only upstream/$BRANCH | grep "cmake/external" || true`

cd ${PADDLE_ROOT}
Expand Down Expand Up @@ -3367,7 +3367,7 @@ function build_pr_and_develop() {
mkdir ${PADDLE_ROOT}/build/pr_whl && cp ${PADDLE_ROOT}/build/python/dist/*.whl ${PADDLE_ROOT}/build/pr_whl
rm -f ${PADDLE_ROOT}/build/python/dist/*.whl && rm -f ${PADDLE_ROOT}/build/python/build/.timestamp
if [[ ${cmake_change} ]];then
rm -rf ${PADDLE_ROOT}/build/Makefile ${PADDLE_ROOT}/build/CMakeCache.txt
rm -rf ${PADDLE_ROOT}/build/Makefile ${PADDLE_ROOT}/build/CMakeCache.txt ${PADDLE_ROOT}/build/python/paddle
rm -rf ${PADDLE_ROOT}/build/third_party
fi

Expand Down
1 change: 0 additions & 1 deletion python/paddle/distributed/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@
from paddle.fluid.dygraph.parallel import ParallelEnv # noqa: F401

from . import cloud_utils # noqa: F401
from . import utils # noqa: F401

from .sharding import * # noqa: F401

Expand Down
2 changes: 1 addition & 1 deletion python/paddle/distributed/auto_parallel/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import warnings
import logging
import numpy as np
from .utils import get_logger
from ..utils.log_utils import get_logger


class Converter(object):
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/distributed/auto_parallel/dist_saver.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from .utils import get_dist_attr
from .converter import Converter
from .process_group import _g_process_group_map
from .utils import get_logger
from ..utils.log_utils import get_logger


def check_filename(re_exp, filename):
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/distributed/auto_parallel/parallelizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import time
import paddle
from paddle.fluid.backward import append_backward
from paddle.distributed.utils import get_logger
from paddle.distributed.utils.log_utils import get_logger
from paddle.distributed.fleet import cloud_utils
import paddle.fluid.core as core
from paddle.fluid import program_guard
Expand Down
6 changes: 2 additions & 4 deletions python/paddle/distributed/cloud_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,8 @@

import os
import paddle
from paddle.distributed.utils import get_cluster
from paddle.distributed.utils import logger
from paddle.distributed.utils import get_gpus
from paddle.distributed.utils import get_cluster_from_args
from paddle.distributed.utils.launch_utils import get_cluster, get_gpus, get_cluster_from_args
from paddle.distributed.utils.launch_utils import logger

__all__ = []

Expand Down
4 changes: 4 additions & 0 deletions python/paddle/distributed/fleet/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from .model import distributed_model
from .optimizer import distributed_optimizer
from .scaler import distributed_scaler
from .utils import log_util

__all__ = [ #noqa
"CommunicateTopology", "UtilBase", "HybridCommunicateGroup",
Expand Down Expand Up @@ -90,3 +91,6 @@
shrink = fleet.shrink
get_hybrid_communicate_group = fleet.get_hybrid_communicate_group
distributed_scaler = distributed_scaler
set_log_level = log_util.set_log_level
get_log_level_code = log_util.get_log_level_code
Copy link
Contributor

@gongweibao gongweibao Sep 14, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_log_level one is enough!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

keeping

get_log_level_name = log_util.get_log_level_name
12 changes: 4 additions & 8 deletions python/paddle/distributed/fleet/elastic/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,9 @@
from paddle.distributed.fleet import cloud_utils
from paddle.distributed.fleet import launch_utils

logger = logging.getLogger("ELASTIC")
logger.setLevel(logging.INFO)
formatter = logging.Formatter(
fmt='%(name)s %(levelname)s %(asctime)s %(message)s')
ch = logging.StreamHandler()
ch.setFormatter(formatter)
logger.addHandler(ch)
from paddle.distributed.utils.log_utils import get_logger

logger = get_logger("INFO", "ELASTIC")

ELASTIC_EXIT_CODE = 101
ELASTIC_AUTO_PARALLEL_EXIT_CODE = 102
Expand Down Expand Up @@ -354,7 +350,7 @@ def pre_hook(self):
stderr=subprocess.PIPE,
shell=True).communicate()
if err:
logger.warn("pre_hook exec failed")
logger.warning("pre_hook exec failed")
else:
logger.info(f"pre_hook exec result: {out.decode('utf-8').strip()}")

Expand Down
64 changes: 40 additions & 24 deletions python/paddle/distributed/fleet/fleet.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.

import copy
import warnings
import paddle
import os
from types import MethodType
Expand All @@ -32,6 +31,8 @@
from .meta_parallel import model_parallel_random_seed
from paddle import _C_ops, _legacy_C_ops
from paddle.fluid import core
from .utils.log_util import logger, set_log_level
import logging

__all__ = []

Expand All @@ -54,7 +55,7 @@ def apply_ir_passes(main_program, startup_program, config):
# RawProgramOptimizer also inserts coalesce_tensor
# into program. These two procedures may conflict
# in which vars are to be fused.
warnings.warn(
logger.warning(
'Currently, the fuse_all_optimizer_ops pass has conflict with fuse_all_reduce_ops pass. Disable the fuse_all_optimizer_ops pass temporarily.'
)
build_strategy.fuse_all_optimizer_ops = False
Expand Down Expand Up @@ -83,7 +84,7 @@ def __impl__(*args, **kwargs):

if cls._role_maker is not None and cls._role_maker._is_non_distributed(
) is True:
warnings.warn(
logger.warning(
"%s() function doesn't work when use non_distributed fleet." %
(func.__name__))
return
Expand Down Expand Up @@ -165,7 +166,11 @@ def __init__(self):
self._context = {}
self.user_defined_optimizer = paddle.optimizer.Optimizer(0.0)

def init(self, role_maker=None, is_collective=False, strategy=None):
def init(self,
role_maker=None,
is_collective=False,
strategy=None,
log_level="INFO"):
"""
Initialize role_maker in Fleet.

Expand All @@ -183,6 +188,8 @@ def init(self, role_maker=None, is_collective=False, strategy=None):
is False.
strategy (DistributedStrategy): Extra properties for distributed training.
For details, please refer to paddle.distributed.fleet.DistributedStrategy. Default: None.
log_level (Integer, String, optional): A ``Integer`` or ``String`` Variable determining how hight
the logging level is. Default is "INFO".


Returns:
Expand Down Expand Up @@ -218,7 +225,18 @@ def init(self, role_maker=None, is_collective=False, strategy=None):
strategy = fleet.DistributedStrategy()
fleet.init(strategy=strategy)

Examples5:

.. code-block:: python

import paddle.distributed.fleet as fleet
strategy = fleet.DistributedStrategy()
fleet.init(log_level = "DEBUG")

"""

set_log_level(log_level)

if strategy is None:
strategy = DistributedStrategy()
self._user_defined_strategy = copy.deepcopy(strategy)
Expand Down Expand Up @@ -262,12 +280,12 @@ def init(self, role_maker=None, is_collective=False, strategy=None):
self._hcg = tp.HybridCommunicateGroup(self._topology)
return
if parallel_helper._is_parallel_ctx_initialized():
warnings.warn(
logger.warning(
"The dygraph parallel environment has been initialized.")
else:
# FLAGS_nccl_nrings is used for dynamic graph multi-stream communication
if "FLAGS_nccl_nrings" in os.environ:
warnings.warn(
logger.warning(
"You have set the environment variable FLAGS_nccl_nrings "
"outside the program, so the nccl_comm_num in "
"DistributedStrategy will not take effect here.")
Expand All @@ -282,7 +300,7 @@ def init(self, role_maker=None, is_collective=False, strategy=None):
if tp._HYBRID_PARALLEL_GROUP is None:
self._init_hybrid_parallel_env()
else:
warnings.warn(
logger.warning(
"The dygraph hybrid parallel environment has been initialized."
)
elif self._is_collective:
Expand Down Expand Up @@ -851,9 +869,6 @@ def save_inference_model(self,
fleet.init_server()

"""
# warnings.warn(
# "'save_inference_model' is a deprecated, will be deleted after v2.2.0, Please use fleet.save instead."
# )

self._runtime_handle._save_inference_model(executor, dirname,
feeded_var_names,
Expand Down Expand Up @@ -903,10 +918,6 @@ def save_persistables(self, executor, dirname, main_program=None, mode=0):
fleet.save_persistables(exe, "dirname", paddle.static.default_main_program())

"""
# warnings.warn(
# "'save_persistables' is a deprecated, will be deleted after v2.2.0, Please use fleet.save instead."
# )

self._runtime_handle._save_persistables(executor, dirname, main_program,
mode)

Expand Down Expand Up @@ -1016,7 +1027,7 @@ def distributed_optimizer(self, optimizer, strategy=None):

if strategy is not None:
if self._is_collective:
warnings.warn(
logger.warning(
"It is recommended to use DistributedStrategy "
"in fleet.init(). The strategy here is only for compatibility. "
"If the strategy in fleet.distributed_optimizer() is "
Expand Down Expand Up @@ -1305,8 +1316,9 @@ def _minimize_impl(self,
copy_user_defined_strategy, can_not_apply_optimizer_list)

context["valid_strategy"] = copy.deepcopy(valid_strategy)
# print("valid_strategy:", context["valid_strategy"])
# print("user_defined_strategy:", context["user_defined_strategy"])
logger.debug("valid_strategy: " + str(context["valid_strategy"]))
logger.debug("user_defined_strategy: " +
str(context["user_defined_strategy"]))

applied_meta_list = self.strategy_compiler._get_applied_meta_list()
applied_graph_list = self.strategy_compiler._get_applied_graph_list()
Expand Down Expand Up @@ -1336,17 +1348,19 @@ def _minimize_impl(self,
no_grad_set=no_grad_set)

if meta_optimizer:
# print("before minimize program id:", id(loss.block.program))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

logger.debug()?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

logger.debug("before minimize program id: " +
str(id(loss.block.program)))
optimize_ops, params_grads = meta_optimizer.minimize(
loss, startup_program, parameter_list, no_grad_set=no_grad_set)
# print("after minimize program id:", id(loss.block.program))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as before

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


logger.debug("after minimize program id: " +
str(id(loss.block.program)))
default_program = paddle.static.default_main_program()
# print("default program id:", id(default_program))
logger.debug("default program id: " + str(id(default_program)))

if id(default_program) != id(loss.block.program):
paddle.fluid.framework.switch_main_program(loss.block.program)
# print("default program id after switch:", id(default_program))
logger.debug("default program id after switch: " +
str(id(default_program)))

else:
optimize_ops, params_grads = self.user_defined_optimizer.minimize(
Expand All @@ -1356,7 +1370,8 @@ def _minimize_impl(self,
context["program_params_grads"] = params_grads

if graph_optimizer:
# print("before graph minimize program id:", id(loss.block.program))
logger.debug("before graph minimize program id: " +
str(id(loss.block.program)))
optimize_ops, params_grads = graph_optimizer.minimize(
loss, startup_program, parameter_list, no_grad_set=no_grad_set)
# since we do not encourage users to use graph operations
Expand Down Expand Up @@ -1455,7 +1470,8 @@ def _minimize_losses_impl(self,
if v or k not in opt_info:
opt_info[k] = v
program._fleet_opt = opt_info
# print("fleet base opt info:", id(program), program._fleet_opt)
logger.debug("fleet base opt info: " + str(id(program)) +
str(program._fleet_opt))

if self._runtime_handle is None:
self._runtime_handle = RuntimeFactory()._create_runtime(context)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,8 @@
from .sharding import utils
# FIXME: import *
from .sharding.utils import *

import logging

logger = logging.getLogger(__name__)
formatter = logging.Formatter(fmt='%(asctime)s %(levelname)-8s %(message)s',
datefmt='%Y-%m-%d %H:%M:%S')
ch = logging.StreamHandler()
ch.setFormatter(formatter)
logger.addHandler(ch)
from ..utils.log_util import logger

__all__ = []

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
import paddle
from paddle import nn
from paddle.distributed import collective
from paddle.distributed.utils import get_logger
from paddle.distributed.utils.log_utils import get_logger

from .group_sharded_storage import GradStorage
from .group_sharded_optimizer_stage2 import GroupShardedOptimizerStage2
Expand Down
4 changes: 2 additions & 2 deletions python/paddle/distributed/fleet/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.

import copy
import warnings
import paddle
import os
import numpy as np
Expand All @@ -22,6 +21,7 @@
from .meta_optimizers import HybridParallelOptimizer, HeterParallelOptimizer
from paddle.fluid import core
from paddle.distributed import fleet
from .utils.log_util import logger


def _dygraph_distributed_optimizer(optimizer, strategy=None):
Expand Down Expand Up @@ -52,7 +52,7 @@ def _dygraph_distributed_optimizer(optimizer, strategy=None):

if strategy is not None:
if fleet_env._is_collective:
warnings.warn(
logger.warning(
"It is recommended to use DistributedStrategy "
"in fleet_env.init(). The strategy here is only for compatibility. "
"If the strategy in fleet_env.distributed_optimizer() is "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import os
import six
import numpy as np
import warnings

from paddle import framework
import paddle
Expand Down
Loading