From 93c39779b49a40df5e05c503dba423e381d58daf Mon Sep 17 00:00:00 2001 From: Zhou Wei <52485244+zhouwei25@users.noreply.github.com> Date: Mon, 16 Nov 2020 10:43:51 +0800 Subject: [PATCH 01/56] open a part of GPU unittest for windows (#28378) * open a part of GPU unittest for windows * open a part of GPU unittest for windows --- CMakeLists.txt | 30 +++-- cmake/init.cmake | 10 +- paddle/scripts/paddle_build.bat | 223 +++++++++++++++++++++++--------- 3 files changed, 184 insertions(+), 79 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 91820123da4831..2faa0a2bbbcb3f 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -74,25 +74,39 @@ if(WIN32) endforeach(flag_var) endif() - # windows build turn off warnings. + # windows build turn off warnings, use parallel compiling. foreach(flag_var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) string(REGEX REPLACE "/W[1-4]" " /W0 " ${flag_var} "${${flag_var}}") + set(${flag_var} "${${flag_var}} /MP /bigobj") endforeach(flag_var) foreach(flag_var CMAKE_CXX_FLAGS CMAKE_C_FLAGS) set(${flag_var} "${${flag_var}} /w") endforeach(flag_var) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838 /MP") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838 /MP") - message(STATUS "Using parallel compiling (/MP)") - set(PADDLE_LINK_FLAGS "/IGNORE:4006 /IGNORE:4098 /IGNORE:4217 /IGNORE:4221") - set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} ${PADDLE_LINK_FLAGS}") - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${PADDLE_LINK_FLAGS}") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${PADDLE_LINK_FLAGS}") + # Windows Remove /Zi, /ZI for Release, MinSizeRel builds + foreach(flag_var + CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_MINSIZEREL + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_MINSIZEREL) + if(${flag_var} MATCHES "/Z[iI]") + string(REGEX REPLACE "/Z[iI]" "" ${flag_var} "${${flag_var}}") + endif() + endforeach(flag_var) + + foreach(flag_var + CMAKE_STATIC_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS + CMAKE_EXE_LINKER_FLAGS) + set(${flag_var} "${${flag_var}} /IGNORE:4006 /IGNORE:4098 /ignore:4049 /IGNORE:4217 /IGNORE:4221") + if(${flag_var} MATCHES "/INCREMENTAL" AND NOT ${flag_var} MATCHES "/INCREMENTAL:NO") + string(REGEX REPLACE "/INCREMENTAL" "/INCREMENTAL:NO" ${flag_var} "${${flag_var}}") + endif() + endforeach(flag_var) + + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838") else(WIN32) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations -Wno-deprecated-declarations") endif(WIN32) diff --git a/cmake/init.cmake b/cmake/init.cmake index 5f36a9adf1ae63..aea02088750df4 100644 --- a/cmake/init.cmake +++ b/cmake/init.cmake @@ -1,7 +1,7 @@ # Attention: cmake will append these flags to compile command automatically. # So if you want to add global option, change this file rather than flags.cmake -# NOT WIN32 +# Linux # DEBUG: default: "-g" # RELEASE: default: "-O3 -DNDEBUG" # RELWITHDEBINFO: default: "-O2 -g -DNDEBUG" @@ -17,6 +17,8 @@ if(NOT WIN32) set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DNDEBUG") set(CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG") +else() + set(WIN_PROPS ${CMAKE_SOURCE_DIR}/cmake/paddle_win.props) endif() if(WITH_GPU) @@ -25,9 +27,3 @@ if(WITH_GPU) set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-O2 -g -DNDEBUG") set(CMAKE_CUDA_FLAGS_MINSIZEREL "-O1 -DNDEBUG") endif() - -if(WIN32) - set(WIN_PROPS ${CMAKE_SOURCE_DIR}/cmake/paddle_win.props) - set(CMAKE_CXX_FLAGS_RELEASE "-O3 -Os -DNDEBUG") -endif() - diff --git a/paddle/scripts/paddle_build.bat b/paddle/scripts/paddle_build.bat index d557cad1c4c6fc..450cb7546fd4c3 100644 --- a/paddle/scripts/paddle_build.bat +++ b/paddle/scripts/paddle_build.bat @@ -1,3 +1,4 @@ +@ECHO OFF rem Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. rem rem Licensed under the Apache License, Version 2.0 (the "License"); @@ -22,15 +23,16 @@ setlocal rem -------clean up environment----------- set work_dir=%cd% set cache_dir=%work_dir:Paddle=cache% +if not exist %cache_dir%\tools ( + git clone https://github.com/zhouwei25/tools.git %cache_dir%\tools +) taskkill /f /im op_function_generator.exe wmic process where name="op_function_generator.exe" call terminate rem ------initialize common variable------ -if not defined CUDA_TOOLKIT_ROOT_DIR set CUDA_TOOLKIT_ROOT_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0" if not defined BRANCH set BRANCH=develop if not defined TENSORRT_ROOT set TENSORRT_ROOT="C:/TensorRT-5.1.5.0" if not defined WITH_MKL set WITH_MKL=ON -if not defined WITH_GPU set WITH_GPU=OFF if not defined WITH_AVX set WITH_AVX=ON if not defined WITH_TESTING set WITH_TESTING=ON if not defined WITH_PYTHON set WITH_PYTHON=ON @@ -60,7 +62,7 @@ setlocal enabledelayedexpansion git show-ref --verify --quiet refs/heads/last_pr if %ERRORLEVEL% EQU 0 ( git diff HEAD last_pr --stat --name-only - git diff HEAD last_pr --stat --name-only | findstr "cmake CMakeLists.txt paddle_build.bat" + git diff HEAD last_pr --stat --name-only | findstr ".cmake CMakeLists.txt paddle_build.bat" if !ERRORLEVEL! EQU 0 ( rmdir build /s/q ) @@ -71,19 +73,19 @@ if %ERRORLEVEL% EQU 0 ( git branch last_pr ) -for /F %%# in ('wmic os get localdatetime^|findstr 20') do set datetime=%%# -set day_now=%datetime:~6,2% -set day_before=-1 -set /p day_before=< %cache_dir%\day.txt -if %day_now% NEQ %day_before% ( - echo %day_now% > %cache_dir%\day.txt - type %cache_dir%\day.txt - rmdir build /s/q - goto :mkbuild -) +:: for /F %%# in ('wmic os get localdatetime^|findstr 20') do set datetime=%%# +:: set day_now=%datetime:~6,2% +:: set day_before=-1 +:: set /p day_before=< %cache_dir%\day.txt +:: if %day_now% NEQ %day_before% ( +:: echo %day_now% > %cache_dir%\day.txt +:: type %cache_dir%\day.txt +:: rmdir build /s/q +:: goto :mkbuild +:: ) :: git diff HEAD origin/develop --stat --name-only -:: git diff HEAD origin/develop --stat --name-only | findstr "cmake CMakeLists.txt paddle_build.bat" +:: git diff HEAD origin/develop --stat --name-only | findstr ".cmake CMakeLists.txt paddle_build.bat" :: if %ERRORLEVEL% EQU 0 ( :: rmdir build /s/q :: ) @@ -117,13 +119,12 @@ pip install gym --user pip install -U -r %work_dir%\python\requirements.txt --user pip install -U -r %work_dir%\python\unittest_py\requirements.txt --user if %ERRORLEVEL% NEQ 0 ( - call paddle_winci\Scripts\deactivate.bat 2>NUL echo pip install requirements.txt failed! exit /b 7 ) rem ------pre install clcache and init config---------- -pip install clcache +pip install clcache --user :: set USE_CLCACHE to enable clcache set USE_CLCACHE=1 :: In some scenarios, CLCACHE_HARDLINK can save one file copy. @@ -133,29 +134,9 @@ set CLCACHE_OBJECT_CACHE_TIMEOUT_MS=1000000 :: set maximum cache size to 20G clcache.exe -M 21474836480 -rem ------set cache third_party------ -if not exist %cache_dir%\tools ( - git clone https://github.com/zhouwei25/tools.git %cache_dir%\tools -) - -if "%WITH_TPCACHE%"=="OFF" ( - set THIRD_PARTY_PATH=%work_dir:\=/%/build/third_party - goto :CASE_%1 -) - -echo set -ex > cache.sh -echo md5_content=$(cat %work_dir:\=/%/cmake/external/*.cmake ^|md5sum ^| awk '{print $1}') >> cache.sh -echo echo ${md5_content}^>md5.txt >> cache.sh - -%cache_dir%\tools\busybox64.exe cat cache.sh -%cache_dir%\tools\busybox64.exe bash cache.sh - -set /p md5=< md5.txt -if "%WITH_GPU%"=="ON" ( - set THIRD_PARTY_PATH=%cache_dir:\=/%/third_party_GPU/%md5% -) else ( - set THIRD_PARTY_PATH=%cache_dir:\=/%/third_party/%md5% -) +rem ------show summary of current environment---------- +python %work_dir%\tools\summary_env.py +%cache_dir%\tools\busybox64.exe bash %work_dir%\tools\get_cpu_info.sh goto :CASE_%1 @@ -166,52 +147,88 @@ echo "wincheck_openbals: run Windows OPENBLAS/CPU CI tasks on Windows" exit /b 1 :CASE_wincheck_mkl + +rem ------initialize cmake variable for mkl------ set WITH_MKL=ON set WITH_GPU=OFF set MSVC_STATIC_CRT=ON + call :cmake || goto cmake_error call :build || goto build_error call :test_whl_pacakage || goto test_whl_pacakage_error call :unit_test || goto unit_test_error call :test_inference || goto test_inference_error -call :check_change_of_unittest || goto check_change_of_unittest_error +:: call :check_change_of_unittest || goto check_change_of_unittest_error goto:success :CASE_wincheck_openblas -set WITH_MKL=OFF + +rem ------initialize cmake variable for openblas------ +set WITH_MKL=ON set WITH_GPU=ON set MSVC_STATIC_CRT=OFF rem Temporarily turn off WITH_INFERENCE_API_TEST on GPU due to compile hang set WITH_INFERENCE_API_TEST=OFF + call :cmake || goto cmake_error call :build || goto build_error call :test_whl_pacakage || goto test_whl_pacakage_error +:: call :unit_test || goto unit_test_error :: call :test_inference || goto test_inference_error +:: call :check_change_of_unittest || goto check_change_of_unittest_error goto:success rem "Other configurations are added here" rem :CASE_wincheck_others rem call ... - rem --------------------------------------------------------------------------------------------- :cmake echo ======================================== echo Step 1. Cmake ... echo ======================================== + call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 for /F %%# in ('wmic os get localdatetime^|findstr 20') do set start=%%# set start=%start:~4,10% + +@ECHO ON +if not defined CUDA_TOOLKIT_ROOT_DIR set CUDA_TOOLKIT_ROOT_DIR=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0 +set PATH=%CUDA_TOOLKIT_ROOT_DIR%\bin;%CUDA_TOOLKIT_ROOT_DIR%\libnvvp;%PATH% +set CUDA_PATH=%CUDA_TOOLKIT_ROOT_DIR% + +rem ------set third_party cache dir------ + +if "%WITH_TPCACHE%"=="OFF" ( + set THIRD_PARTY_PATH=%work_dir:\=/%/build/third_party + goto :cmake_impl +) + +echo set -ex > cache.sh +echo md5_content=$(cat %work_dir:\=/%/cmake/external/*.cmake ^|md5sum ^| awk '{print $1}') >> cache.sh +echo echo ${md5_content}^>md5.txt >> cache.sh + +%cache_dir%\tools\busybox64.exe cat cache.sh +%cache_dir%\tools\busybox64.exe bash cache.sh + +set /p md5=< md5.txt +if "%WITH_GPU%"=="ON" ( + set THIRD_PARTY_PATH=%cache_dir:\=/%/third_party_GPU/%md5% +) else ( + set THIRD_PARTY_PATH=%cache_dir:\=/%/third_party/%md5% +) + +:cmake_impl echo cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_AVX=%WITH_AVX% -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% ^ --DWITH_TESTING=%WITH_TESTING% -DWITH_PYTHON=%WITH_PYTHON% -DCUDA_TOOLKIT_ROOT_DIR=%CUDA_TOOLKIT_ROOT_DIR% ^ --DON_INFER=%ON_INFER% -DWITH_INFERENCE_API_TEST=%WITH_INFERENCE_API_TEST% -DTHIRD_PARTY_PATH=%THIRD_PARTY_PATH% ^ +-DWITH_TESTING=%WITH_TESTING% -DWITH_PYTHON=%WITH_PYTHON% -DON_INFER=%ON_INFER% ^ +-DWITH_INFERENCE_API_TEST=%WITH_INFERENCE_API_TEST% -DTHIRD_PARTY_PATH=%THIRD_PARTY_PATH% ^ -DINFERENCE_DEMO_INSTALL_DIR=%INFERENCE_DEMO_INSTALL_DIR% -DWITH_STATIC_LIB=%WITH_STATIC_LIB% ^ -DTENSORRT_ROOT=%TENSORRT_ROOT% -DMSVC_STATIC_CRT=%MSVC_STATIC_CRT% cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_AVX=%WITH_AVX% -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% ^ --DWITH_TESTING=%WITH_TESTING% -DWITH_PYTHON=%WITH_PYTHON% -DCUDA_TOOLKIT_ROOT_DIR=%CUDA_TOOLKIT_ROOT_DIR% ^ --DON_INFER=%ON_INFER% -DWITH_INFERENCE_API_TEST=%WITH_INFERENCE_API_TEST% -DTHIRD_PARTY_PATH=%THIRD_PARTY_PATH% ^ +-DWITH_TESTING=%WITH_TESTING% -DWITH_PYTHON=%WITH_PYTHON% -DON_INFER=%ON_INFER% ^ +-DWITH_INFERENCE_API_TEST=%WITH_INFERENCE_API_TEST% -DTHIRD_PARTY_PATH=%THIRD_PARTY_PATH% ^ -DINFERENCE_DEMO_INSTALL_DIR=%INFERENCE_DEMO_INSTALL_DIR% -DWITH_STATIC_LIB=%WITH_STATIC_LIB% ^ -DTENSORRT_ROOT=%TENSORRT_ROOT% -DMSVC_STATIC_CRT=%MSVC_STATIC_CRT% goto:eof @@ -224,6 +241,7 @@ exit /b 7 rem --------------------------------------------------------------------------------------------- :build +@ECHO OFF echo ======================================== echo Step 2. Buile Paddle ... echo ======================================== @@ -270,6 +288,7 @@ exit /b 7 rem --------------------------------------------------------------------------------------------- :test_whl_pacakage +@ECHO OFF echo ======================================== echo Step 3. Test pip install whl package ... echo ======================================== @@ -282,7 +301,7 @@ call :timestamp "%start%" "%end%" "Build" tree /F %cd%\paddle_inference_install_dir\paddle %cache_dir%\tools\busybox64.exe du -h -d 0 -k %cd%\paddle_inference_install_dir\paddle\lib > lib_size.txt set /p libsize=< lib_size.txt -@ECHO OFF + for /F %%i in ("%libsize%") do ( set /a libsize_m=%%i/1024 echo "Windows Paddle_Inference Size: !libsize_m!M" @@ -303,17 +322,19 @@ if %ERRORLEVEL% NEQ 0 ( exit /b 1 ) +set CUDA_VISIBLE_DEVICES=0 python %work_dir%\paddle\scripts\installation_validate.py goto:eof :test_whl_pacakage_error -echo 1 > %cache_dir%\error_code.txt -type %cache_dir%\error_code.txt +::echo 1 > %cache_dir%\error_code.txt +::type %cache_dir%\error_code.txt echo Test import paddle failed, will exit! exit /b 1 rem --------------------------------------------------------------------------------------------- :unit_test +@ECHO OFF echo ======================================== echo Step 4. Running unit tests ... echo ======================================== @@ -339,6 +360,7 @@ if %errorlevel%==0 ( set PATH=%THIRD_PARTY_PATH:/=\%\install\openblas\lib;%THIRD_PARTY_PATH:/=\%\install\openblas\bin;^ %THIRD_PARTY_PATH:/=\%\install\zlib\bin;%THIRD_PARTY_PATH:/=\%\install\mklml\lib;^ %THIRD_PARTY_PATH:/=\%\install\mkldnn\bin;%THIRD_PARTY_PATH:/=\%\install\warpctc\bin;%PATH% + if "%NIGHTLY_MODE%"=="ON" ( set nightly_label="()" ) else ( @@ -348,12 +370,82 @@ if "%NIGHTLY_MODE%"=="ON" ( echo ======================================== ) +if "%WITH_GPU%"=="ON" ( + goto:parallel_test_base_gpu +) else ( + goto:parallel_test_base_cpu +) + +:parallel_test_base_gpu +echo ======================================== +echo Running GPU unit tests in parallel way ... +echo ======================================== + +set FLAGS_fraction_of_gpu_memory_to_use=0.75 + +nvidia-smi -L +for /F %%# in ('nvidia-smi -L ^| findstr "GPU" /C /I') do set CUDA_DEVICE_COUNT=%%# +if !errorlevel! NEQ 0 exit /b 8 + +rem TODO: fix these unittest that is bound to fail +rem /*==================Disabled Windows==============================*/ +set diable_wingpu_test=tensor_util_test^|lod_tensor_test^|selected_rows_test^|broadcast_op_test^|fused_broadcast_op_test^|assign_op_test^|save_load_op_test^|save_load_combine_op_test^|im2col_test^|^ +beam_search_test^|test_analysis_predictor^|test_model^|test_add_reader_dependency^|test_bilateral_slice_op^|test_buffer_shared_memory_reuse_pass^|test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass^|^ +test_cholesky_op^|test_dataloader_early_reset^|test_dataloader_keep_order^|test_dataloader_unkeep_order^|test_decoupled_py_reader^|test_decoupled_py_reader_data_check^|test_eager_deletion_delete_vars^|^ +test_eager_deletion_while_op^|test_feed_data_check_shape_type^|test_fetch_lod_tensor_array^|test_fetch_unmerged^|test_fleet_base_single^|test_fuse_all_reduce_pass^|test_fuse_elewise_add_act_pass^|^ +test_fuse_optimizer_pass^|test_generator_dataloader^|test_gpu_package_without_gpu_device^|test_ir_memory_optimize_ifelse_op^|test_ir_memory_optimize_nlp^|test_lr_scheduler^|^ +test_multiprocess_dataloader_iterable_dataset_dynamic^|test_multiprocess_dataloader_iterable_dataset_static^|test_nvprof^|test_parallel_dygraph_sync_batch_norm^|test_parallel_executor_drop_scope^|^ +test_parallel_executor_dry_run^|test_parallel_executor_feed_persistable_var^|test_parallel_executor_fetch_isolated_var^|test_parallel_executor_inference_feed_partial_data^|test_parallel_executor_mnist^|^ +test_parallel_executor_seresnext_base_gpu^|test_parallel_executor_seresnext_with_fuse_all_reduce_gpu^|test_parallel_executor_seresnext_with_reduce_gpu^|test_parallel_executor_test_while_train^|^ +test_parallel_ssa_graph_inference_feed_partial_data^|test_partial_eager_deletion_transformer^|test_program_prune_backward^|test_prune^|test_py_reader_combination^|test_py_reader_pin_memory^|^ +test_py_reader_push_pop^|test_py_reader_using_executor^|test_reader_reset^|test_sync_batch_norm_op^|test_update_loss_scaling_op^|test_imperative_static_runner_while^|test_parallel_executor_crf^|^ +test_parallel_executor_profiler^|test_parallel_executor_transformer^|test_parallel_executor_transformer_auto_growth^|test_parallel_executor_seresnext_base_cpu^|test_yolov3^|^ +test_parallel_executor_seresnext_with_reduce_cpu^|test_parallel_executor_seresnext_with_fuse_all_reduce_cpu^|test_flags_use_mkldnn^|test_spawn_and_init_parallel_env^|test_train_recognize_digits^|^ +test_optimizer_in_control_flow^|test_fuse_bn_act_pass^|test_fuse_bn_add_act_pass^|test_activation_mkldnn_op^|test_tsm +rem /*===============================================================*/ + +rem these unittest that cost long time, diabled temporarily, greater than 10s +set long_time_test=test_trilinear_interp_v2_op^|best_fit_allocator_test^|timer_test^|best_fit_allocator_test^|test_image_classification^|test_recognize_digits^|decorator_test^|test_callbacks^|^ +test_dataset_cifar^|test_dataset_imdb^|test_dataset_movielens^|test_datasets^|test_pretrained_model^|test_concat_op^|test_elementwise_add_op^|test_elementwise_sub_op^|test_gather_op^|test_gather_nd_op^|^ +test_sequence_concat^|test_sequence_conv^|test_sequence_pool^|test_sequence_slice_op^|test_space_to_depth_op^|test_activation_nn_grad^|test_activation_op^|test_auto_growth_gpu_memory_limit^|^ +test_bicubic_interp_op^|test_bicubic_interp_v2_op^|test_bilinear_interp_v2_op^|test_conv2d_op^|test_conv3d_op^|test_conv3d_transpose_part2_op^|test_conv_nn_grad^|test_crop_tensor_op^|^ +test_cross_entropy2_op^|test_cross_op^|test_deformable_conv_v1_op^|test_dropout_op^|test_dygraph_multi_forward^|test_elementwise_div_op^|test_elementwise_nn_grad^|test_empty_op^|^ +test_fused_elemwise_activation_op^|test_group_norm_op^|test_gru_op^|test_gru_unit_op^|test_imperative_lod_tensor_to_selected_rows^|test_imperative_optimizer^|test_imperative_ptb_rnn^|^ +test_imperative_save_load^|test_imperative_selected_rows_to_lod_tensor^|test_imperative_star_gan_with_gradient_penalty^|test_imperative_transformer_sorted_gradient^|test_layer_norm_op^|^ +test_lstm_cudnn_op^|test_masked_select_op^|test_matmul_v2_op^|test_multiclass_nms_op^|test_naive_best_fit_gpu_memory_limit^|test_nearest_interp_v2_op^|test_nn_grad^|test_norm_nn_grad^|^ +test_normal^|test_pool3d_op^|test_pool2d_op^|test_prroi_pool_op^|test_regularizer^|test_regularizer_api^|test_sgd_op^|test_softmax_with_cross_entropy_op^|test_static_save_load^|^ +test_trilinear_interp_op^|test_trilinear_interp_v2_op^|test_weight_decay^|test_bilinear_interp_op^|test_nearest_interp_op^|test_sequence_conv^|test_transformer^|test_imperative_out_scale^|^ +test_imperative_qat^|test_imperative_qat_channelwise^|test_quantization_pass^|test_beam_search_decoder^|test_argsort_op^|test_eager_deletion_gru_net^|test_lstmp_op^|test_label_semantic_roles^|^ +test_graph^|test_user_defined_quantization + +set /a end=CUDA_DEVICE_COUNT-1 + +set parallel_test='' + +for /L %%# in (0,1,%end%) do ( + set CUDA_VISIBLE_DEVICES=%%# + ctest.exe -I %%#,,%CUDA_DEVICE_COUNT% -R %parallel_test% -E "%disable_ut_quickly%|%diable_wingpu_test%|%long_time_test%" -LE %nightly_label% --output-on-failure -C Release -j 2 --repeat until-pass:4 after-timeout:4 + if !errorlevel! NEQ 0 exit /b 8 +) + +for /L %%# in (0,1,%end%) do ( + set CUDA_VISIBLE_DEVICES=%%# + ctest.exe -I %%#,,%CUDA_DEVICE_COUNT% -E "%disable_ut_quickly%|%parallel_test%|%diable_wingpu_test%|%long_time_test%" -LE %nightly_label% --output-on-failure -C Release -j 1 --repeat until-pass:4 after-timeout:4 + if !errorlevel! NEQ 0 exit /b 8 +) +goto:eof + +:parallel_test_base_cpu +echo ======================================== +echo Running CPU unit tests in parallel way ... +echo ======================================== ctest.exe -E "(%disable_ut_quickly%)" -LE %nightly_label% --output-on-failure -C Release -j 8 --repeat until-pass:4 after-timeout:4 + goto:eof :unit_test_error -echo 8 > %cache_dir%\ -type %cache_dir%\error_code.txt +:: echo 8 > %cache_dir%\error_code.txt +:: type %cache_dir%\error_code.txt for /F %%# in ('wmic os get localdatetime^|findstr 20') do set end=%%# set end=%end:~4,10% call :timestamp "%start%" "%end%" "1 card TestCases Total" @@ -363,6 +455,7 @@ exit /b 8 rem --------------------------------------------------------------------------------------------- :test_inference +@ECHO OFF echo ======================================== echo Step 5. Testing fluid library for inference ... echo ======================================== @@ -377,18 +470,18 @@ cd %work_dir%\paddle\fluid\inference\api\demo_ci goto:eof :test_inference_error -echo 1 > %cache_dir%\error_code.txt -type %cache_dir%\error_code.txt +::echo 1 > %cache_dir%\error_code.txt +::type %cache_dir%\error_code.txt echo Testing fluid library for inference failed! exit /b 1 rem --------------------------------------------------------------------------------------------- :check_change_of_unittest +@ECHO OFF echo ======================================== echo Step 6. Check whether deleting a unit test ... echo ======================================== -@ECHO OFF cd /d %work_dir%\build echo set -e> check_change_of_unittest.sh echo set +x>> check_change_of_unittest.sh @@ -398,6 +491,7 @@ echo BRANCH=%BRANCH%>> check_change_of_unittest.sh echo if [ "${GITHUB_API_TOKEN}" == "" ] ^|^| [ "${GIT_PR_ID}" == "" ];then>> check_change_of_unittest.sh echo exit 0 >> check_change_of_unittest.sh echo fi>> check_change_of_unittest.sh +echo set -x>> check_change_of_unittest.sh echo cat ^<^> check_change_of_unittest.sh echo ============================================ >> check_change_of_unittest.sh echo Generate unit tests.spec of this PR. >> check_change_of_unittest.sh @@ -411,8 +505,8 @@ echo UPSTREAM_URL='https://github.com/PaddlePaddle/Paddle'>> check_change_of_un echo origin_upstream_url=`git remote -v ^| awk '{print $1, $2}' ^| uniq ^| grep upstream ^| awk '{print $2}'`>> check_change_of_unittest.sh echo if [ "$origin_upstream_url" == "" ]; then>> check_change_of_unittest.sh echo git remote add upstream $UPSTREAM_URL.git>> check_change_of_unittest.sh -echo elif [ "$origin_upstream_url" != "$UPSTREAM_URL" ] \>> check_change_of_unittest.sh -echo ^&^& [ "$origin_upstream_url" != "$UPSTREAM_URL.git" ]; then>> check_change_of_unittest.sh +echo elif [ "$origin_upstream_url" ^!= "$UPSTREAM_URL" ] ^\>> check_change_of_unittest.sh +echo ^&^& [ "$origin_upstream_url" ^!= "$UPSTREAM_URL.git" ]; then>> check_change_of_unittest.sh echo git remote remove upstream>> check_change_of_unittest.sh echo git remote add upstream $UPSTREAM_URL.git>> check_change_of_unittest.sh echo fi>> check_change_of_unittest.sh @@ -422,9 +516,10 @@ echo fi>> check_change_of_unittest.sh echo git checkout -b origin_pr >> check_change_of_unittest.sh echo git checkout -f $BRANCH >> check_change_of_unittest.sh echo cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_AVX=%WITH_AVX% -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% ^ --DWITH_TESTING=%WITH_TESTING% -DWITH_PYTHON=%WITH_PYTHON% -DCUDA_TOOLKIT_ROOT_DIR=%CUDA_TOOLKIT_ROOT_DIR% ^ --DON_INFER=%ON_INFER% -DWITH_INFERENCE_API_TEST=%WITH_INFERENCE_API_TEST% -DTHIRD_PARTY_PATH=%THIRD_PARTY_PATH% ^ --DINFERENCE_DEMO_INSTALL_DIR=%INFERENCE_DEMO_INSTALL_DIR% >> check_change_of_unittest.sh +-DWITH_TESTING=%WITH_TESTING% -DWITH_PYTHON=%WITH_PYTHON% -DON_INFER=%ON_INFER% ^ +-DWITH_INFERENCE_API_TEST=%WITH_INFERENCE_API_TEST% -DTHIRD_PARTY_PATH=%THIRD_PARTY_PATH% ^ +-DINFERENCE_DEMO_INSTALL_DIR=%INFERENCE_DEMO_INSTALL_DIR% -DWITH_STATIC_LIB=%WITH_STATIC_LIB% ^ +-DTENSORRT_ROOT=%TENSORRT_ROOT% -DMSVC_STATIC_CRT=%MSVC_STATIC_CRT% >> check_change_of_unittest.sh echo cat ^<^> check_change_of_unittest.sh echo ============================================ >> check_change_of_unittest.sh echo Generate unit tests.spec of develop. >> check_change_of_unittest.sh @@ -433,10 +528,11 @@ echo EOF>> check_change_of_unittest.sh echo spec_path=$(pwd)/UNITTEST_DEV.spec>> check_change_of_unittest.sh echo ctest -N ^| awk -F ':' '{print $2}' ^| sed '/^^$/d' ^| sed '$d' ^> ${spec_path}>> check_change_of_unittest.sh echo unittest_spec_diff=`python $(pwd)/../tools/diff_unittest.py $(pwd)/UNITTEST_DEV.spec $(pwd)/UNITTEST_PR.spec`>> check_change_of_unittest.sh -echo if [ "$unittest_spec_diff" != "" ]; then>> check_change_of_unittest.sh -echo # approval_user_list: XiaoguangHu01 46782768,luotao1 6836917,phlrain 43953930,lanxianghit 47554610, zhouwei25 52485244, kolinwei 22165420>> check_change_of_unittest.sh +echo if [ "$unittest_spec_diff" ^!= "" ]; then>> check_change_of_unittest.sh +echo set +x>> check_change_of_unittest.sh echo approval_line=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000`>> check_change_of_unittest.sh -echo if [ "$approval_line" != "" ]; then>> check_change_of_unittest.sh +echo set -x>> check_change_of_unittest.sh +echo if [ "$approval_line" ^!= "" ]; then>> check_change_of_unittest.sh echo APPROVALS=`echo ${approval_line} ^|python $(pwd)/../tools/check_pr_approval.py 1 22165420 52485244 6836917`>> check_change_of_unittest.sh echo echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}">> check_change_of_unittest.sh echo if [ "${APPROVALS}" == "FALSE" ]; then>> check_change_of_unittest.sh @@ -458,13 +554,12 @@ echo git checkout -f origin_pr >> check_change_of_unittest.sh goto:eof :check_change_of_unittest_error -echo 1 > %cache_dir%\error_code.txt -type %cache_dir%\error_code.txt exit /b 1 :timestamp setlocal enabledelayedexpansion +@ECHO OFF set start=%~1 set dd=%start:~2,2% set /a dd=100%dd%%%100 From a24d186814f8580fa7faa155bd5db14243fbc68b Mon Sep 17 00:00:00 2001 From: danleifeng <52735331+danleifeng@users.noreply.github.com> Date: Mon, 16 Nov 2020 11:19:28 +0800 Subject: [PATCH 02/56] fix nccl init failed in parallel dygraph mode (#28497) --- paddle/fluid/imperative/nccl_context.cc | 34 ++++++++++++-------- python/paddle/distributed/parallel.py | 41 ++++++++++++++----------- 2 files changed, 44 insertions(+), 31 deletions(-) diff --git a/paddle/fluid/imperative/nccl_context.cc b/paddle/fluid/imperative/nccl_context.cc index abee311d08cf38..9c2c9925a34e80 100644 --- a/paddle/fluid/imperative/nccl_context.cc +++ b/paddle/fluid/imperative/nccl_context.cc @@ -49,16 +49,20 @@ void NCCLParallelContext::RecvNCCLID(const std::string &ep, address.sin_port = htons(port); int try_times = 0; + int retry_time = 0; while (true) { if (bind(server_fd, (struct sockaddr *)&address, sizeof(address)) < 0) { + retry_time = 3 * (try_times + 1); LOG(WARNING) << "Socket bind worker " << ep - << (try_times < 5 ? " failed, try again after 3 seconds." - : " failed, try again after 3 seconds. " - "Bind on endpoint %s failed. " - "Please confirm whether the " - "communication port or GPU card is " - "occupied."); - std::this_thread::sleep_for(std::chrono::seconds(3)); + << (try_times < 9 + ? " failed, try again after " + + std::to_string(retry_time) + " seconds." + : " failed, try again after " + + std::to_string(retry_time) + + " seconds. Bind on endpoint " + ep + + " failed. Please confirm whether the " + "communication port or GPU card is occupied."); + std::this_thread::sleep_for(std::chrono::seconds(retry_time)); ++try_times; continue; } @@ -129,16 +133,20 @@ void NCCLParallelContext::SendNCCLID(const std::string &ep, } int try_times = 0; + int retry_time = 0; while (true) { if (connect(sock, (struct sockaddr *)&serv_addr, sizeof(serv_addr)) < 0) { + retry_time = 3 * (try_times + 1); LOG(WARNING) << "Socket connect worker " << ep - << (try_times < 5 - ? " failed, try again after 3 seconds." - : " failed, try again after 3 seconds. Maybe that " - "some process is occupied the GPUs of this node " - "now, and you should kill those process manually."); - std::this_thread::sleep_for(std::chrono::seconds(3)); + << (try_times < 9 + ? " failed, try again after " + std::to_string(retry_time) + + " seconds." + : " failed, try again after " + std::to_string(retry_time) + + " seconds. Maybe that some process is occupied the " + "GPUs of this node now, and you should kill those " + "process manually."); + std::this_thread::sleep_for(std::chrono::seconds(retry_time)); ++try_times; continue; } diff --git a/python/paddle/distributed/parallel.py b/python/paddle/distributed/parallel.py index 16b031e116acdc..9b6691dac7545a 100644 --- a/python/paddle/distributed/parallel.py +++ b/python/paddle/distributed/parallel.py @@ -125,7 +125,7 @@ def _check_var_exists(var_name): if ParallelEnv().world_size < 2: return - # 3: init gloo context + # 3: init gloo context (step 1: httpsever start) ep_rank_0 = ParallelEnv().trainer_endpoints[0].split(":") ep_rank = ParallelEnv().trainer_endpoints[ParallelEnv().rank].split(":") manager = Manager() @@ -138,22 +138,6 @@ def _check_var_exists(var_name): http_server.daemon = True http_server_d["running"] = True http_server.start() - wait_server_ready([ParallelEnv().trainer_endpoints[0]]) - - gloo_strategy = core.GlooParallelStrategy() - gloo_strategy.rank = ParallelEnv().rank - gloo_strategy.rank_num = ParallelEnv().world_size - gloo_strategy.ip_address = ep_rank_0[0] - gloo_strategy.ip_port = int(ep_rank_0[1]) - default_init_timeout_seconds = 3600 - default_run_timeout_seconds = 9999999 - gloo_strategy.init_seconds = default_init_timeout_seconds - gloo_strategy.run_seconds = default_run_timeout_seconds - gloo = core.GlooParallelContext(gloo_strategy) - gloo.init() - if ParallelEnv().rank == 0: - http_server_d["running"] = False - http_server.join() # 4. init NCCL ParallelStrategy strategy = ParallelStrategy() @@ -165,7 +149,7 @@ def _check_var_exists(var_name): strategy.current_endpoint = ParallelEnv().current_endpoint # NOTE(chenweihang): [ why config global place here? ] - # the dygraph mode will be set to default mode, + # the dygraph mode will be set to default mode, # users will not call `dygraph.guard` or `enable_dygraph` # directly, if they want to switch default place, # they need to call a function to change default place, @@ -177,6 +161,27 @@ def _check_var_exists(var_name): parallel_helper._set_parallel_ctx(core.NCCLParallelContext(strategy, place)) parallel_helper._init_parallel_ctx() + # 5: init gloo context (step 2: gloo init) + # dividing init_gloo into two part beacause nccl and gloo + # are separately looking for free ports which sometimes + # leads to port-conflict. + wait_server_ready([ParallelEnv().trainer_endpoints[0]]) + + gloo_strategy = core.GlooParallelStrategy() + gloo_strategy.rank = ParallelEnv().rank + gloo_strategy.rank_num = ParallelEnv().world_size + gloo_strategy.ip_address = ep_rank_0[0] + gloo_strategy.ip_port = int(ep_rank_0[1]) + default_init_timeout_seconds = 3600 + default_run_timeout_seconds = 9999999 + gloo_strategy.init_seconds = default_init_timeout_seconds + gloo_strategy.run_seconds = default_run_timeout_seconds + gloo = core.GlooParallelContext(gloo_strategy) + gloo.init() + if ParallelEnv().rank == 0: + http_server_d["running"] = False + http_server.join() + def get_rank(): """ From 1de3cdd0abd947f2830915e5f2d9bedcb7297c98 Mon Sep 17 00:00:00 2001 From: LielinJiang <50691816+LielinJiang@users.noreply.github.com> Date: Mon, 16 Nov 2020 11:26:56 +0800 Subject: [PATCH 03/56] Fix summary api for rnn gru lstm (#28566) * fix summary for rnn gru lstm --- python/paddle/hapi/model_summary.py | 3 +++ python/paddle/tests/test_model.py | 29 +++++++++++++++++++++++++++-- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/python/paddle/hapi/model_summary.py b/python/paddle/hapi/model_summary.py index c6288ea40c59e7..babbe962a95252 100644 --- a/python/paddle/hapi/model_summary.py +++ b/python/paddle/hapi/model_summary.py @@ -244,6 +244,9 @@ def hook(layer, input, output): (not (layer == model) or depth < 1)): hooks.append(layer.register_forward_post_hook(hook)) + # For rnn, gru and lstm layer + elif hasattr(layer, 'could_use_cudnn') and layer.could_use_cudnn: + hooks.append(layer.register_forward_post_hook(hook)) if isinstance(input_size, tuple): input_size = [input_size] diff --git a/python/paddle/tests/test_model.py b/python/paddle/tests/test_model.py index a3b33d6f253be1..ab7a3654e582c9 100644 --- a/python/paddle/tests/test_model.py +++ b/python/paddle/tests/test_model.py @@ -295,6 +295,12 @@ def test_predict_without_inputs(self): np.testing.assert_equal(output[0].shape[0], len(self.test_dataset)) fluid.disable_dygraph() + def test_summary_gpu(self): + paddle.disable_static(self.device) + rnn = paddle.nn.LSTM(16, 32, 2) + params_info = paddle.summary( + rnn, [(-1, 23, 16), ((2, None, 32), (2, -1, 32))]) + class MyModel(paddle.nn.Layer): def __init__(self): @@ -512,14 +518,33 @@ def _get_param_from_state_dict(state_dict): model.summary(input_size=(20), dtype='float32') def test_summary_nlp(self): - paddle.enable_static() + def _get_param_from_state_dict(state_dict): + params = 0 + for k, v in state_dict.items(): + params += np.prod(v.numpy().shape) + return params + nlp_net = paddle.nn.GRU(input_size=2, hidden_size=3, num_layers=3, direction="bidirectional") paddle.summary(nlp_net, (1, 1, 2)) + rnn = paddle.nn.LSTM(16, 32, 2) - paddle.summary(rnn, [(-1, 23, 16), ((2, None, 32), (2, -1, 32))]) + params_info = paddle.summary( + rnn, [(-1, 23, 16), ((2, None, 32), (2, -1, 32))]) + gt_params = _get_param_from_state_dict(rnn.state_dict()) + np.testing.assert_allclose(params_info['total_params'], gt_params / 2.0) + + rnn = paddle.nn.GRU(16, 32, 2, direction='bidirectional') + params_info = paddle.summary(rnn, (4, 23, 16)) + gt_params = _get_param_from_state_dict(rnn.state_dict()) + np.testing.assert_allclose(params_info['total_params'], gt_params / 2.0) + + rnn = paddle.nn.SimpleRNN(16, 32, 2, direction='bidirectional') + params_info = paddle.summary(rnn, (4, 23, 16)) + gt_params = _get_param_from_state_dict(rnn.state_dict()) + np.testing.assert_allclose(params_info['total_params'], gt_params / 2.0) def test_summary_dtype(self): input_shape = (3, 1) From 1c3eef4cee16b327c0a305c4eebe6dc369fd1121 Mon Sep 17 00:00:00 2001 From: LielinJiang <50691816+LielinJiang@users.noreply.github.com> Date: Mon, 16 Nov 2020 11:28:03 +0800 Subject: [PATCH 04/56] Fix vgg error when num_classes is given (#28557) * fix vgg num classes --- python/paddle/tests/test_vision_models.py | 3 +++ python/paddle/vision/models/vgg.py | 5 +---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/paddle/tests/test_vision_models.py b/python/paddle/tests/test_vision_models.py index 5f35a1e0e5a4ba..a25a8f373c29c4 100644 --- a/python/paddle/tests/test_vision_models.py +++ b/python/paddle/tests/test_vision_models.py @@ -71,6 +71,9 @@ def test_resnet101(self): def test_resnet152(self): self.models_infer('resnet152') + def test_vgg16_num_classes(self): + vgg16 = models.__dict__['vgg16'](pretrained=False, num_classes=10) + def test_lenet(self): input = InputSpec([None, 1, 28, 28], 'float32', 'x') lenet = paddle.Model(models.__dict__['LeNet'](), input) diff --git a/python/paddle/vision/models/vgg.py b/python/paddle/vision/models/vgg.py index bb158569d3bc9f..00f6cccbdfe9f1 100644 --- a/python/paddle/vision/models/vgg.py +++ b/python/paddle/vision/models/vgg.py @@ -107,10 +107,7 @@ def make_layers(cfg, batch_norm=False): def _vgg(arch, cfg, batch_norm, pretrained, **kwargs): - model = VGG(make_layers( - cfgs[cfg], batch_norm=batch_norm), - num_classes=1000, - **kwargs) + model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm), **kwargs) if pretrained: assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format( From 90805e2df7b6fcd0bf78e8fa10fcbe98ef74c936 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Mon, 16 Nov 2020 11:28:52 +0800 Subject: [PATCH 05/56] Register op_version for new attribute use_addto (#28463) * register op_version for addto * upgrade pass capability * change eq to le * change eq to le * fix merge --- .../ir/conv_affine_channel_fuse_pass.cc | 4 +- .../fluid/framework/ir/conv_bn_fuse_pass.cc | 4 +- .../ir/conv_elementwise_add2_act_fuse_pass.cc | 4 +- .../ir/conv_elementwise_add_act_fuse_pass.cc | 3 +- .../ir/conv_elementwise_add_fuse_pass.cc | 3 +- .../conv_activation_mkldnn_fuse_pass.cc | 10 +-- .../ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc | 4 +- .../conv_concat_relu_mkldnn_fuse_pass.cc | 4 +- .../conv_elementwise_add_mkldnn_fuse_pass.cc | 71 ++++++++++--------- .../ir/mkldnn/depthwise_conv_mkldnn_pass.cc | 4 +- .../ir/quant_conv2d_dequant_fuse_pass.cc | 5 +- .../ir_passes/tensorrt_subgraph_pass.cc | 8 ++- paddle/fluid/operators/conv_op.cc | 35 +++++++++ 13 files changed, 106 insertions(+), 53 deletions(-) diff --git a/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc b/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc index 9c984a23e377d7..c0ebf6de9de23b 100644 --- a/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc @@ -238,11 +238,11 @@ REGISTER_PASS(conv_eltwiseadd_affine_channel_fuse_pass, REGISTER_PASS_CAPABILITY(conv_affine_channel_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() - .EQ("conv2d", 0) + .LE("conv2d", 1) .EQ("affine_channel", 0)); REGISTER_PASS_CAPABILITY(conv_eltwiseadd_affine_channel_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() - .EQ("conv2d", 0) + .LE("conv2d", 1) .EQ("elementwise_add", 0) .EQ("affine_channel", 0)); diff --git a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc index a915015bf55bd8..72ac7c3b0e8ab8 100644 --- a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc @@ -383,11 +383,11 @@ REGISTER_PASS(depthwise_conv_eltwiseadd_bn_fuse_pass, REGISTER_PASS_CAPABILITY(conv_bn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() - .EQ("conv2d", 0) + .LE("conv2d", 1) .EQ("batch_norm", 0)); REGISTER_PASS_CAPABILITY(conv_eltwiseadd_bn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() - .EQ("conv2d", 0) + .LE("conv2d", 1) .EQ("elementwise_add", 0) .EQ("batch_norm", 0)); diff --git a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc index ad6af69ae02e4f..545beb34e78df5 100644 --- a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc @@ -12,7 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.h" + #include + #include "paddle/fluid/framework/op_version_registry.h" namespace paddle { @@ -119,7 +121,7 @@ REGISTER_PASS(conv_elementwise_add2_act_fuse_pass, REGISTER_PASS_CAPABILITY(conv_elementwise_add2_act_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() - .EQ("conv2d", 0) + .LE("conv2d", 1) .EQ("elementwise_add", 0) .EQ("relu", 0) .EQ("identity", 0)); diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc index 93e6e13ff7092c..d01a2f2622347c 100644 --- a/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.h" + #include #include "paddle/fluid/framework/ir/graph_viz_pass.h" @@ -107,7 +108,7 @@ REGISTER_PASS(conv_elementwise_add_act_fuse_pass, REGISTER_PASS_CAPABILITY(conv_elementwise_add_act_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() - .EQ("conv2d", 0) + .LE("conv2d", 1) .EQ("elementwise_add", 0) .EQ("relu", 0) .EQ("identity", 0)); diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc index e4396f227f7f52..e34a2d96581531 100644 --- a/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.h" + #include #include "paddle/fluid/framework/ir/graph_viz_pass.h" @@ -93,5 +94,5 @@ REGISTER_PASS(conv_elementwise_add_fuse_pass, REGISTER_PASS_CAPABILITY(conv_elementwise_add_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() - .EQ("conv2d", 0) + .LE("conv2d", 1) .EQ("elementwise_add", 0)); diff --git a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc index c33398553ecd2c..d0bdeb9ad8c460 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h" + #include + #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" @@ -107,7 +109,7 @@ REGISTER_PASS(conv_relu_mkldnn_fuse_pass, REGISTER_PASS_CAPABILITY(conv_relu_mkldnn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() - .EQ("conv2d", 0) + .LE("conv2d", 1) .EQ("relu", 0)); REGISTER_PASS(conv_leaky_relu_mkldnn_fuse_pass, @@ -115,7 +117,7 @@ REGISTER_PASS(conv_leaky_relu_mkldnn_fuse_pass, REGISTER_PASS_CAPABILITY(conv_leaky_relu_mkldnn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() - .EQ("conv2d", 0) + .LE("conv2d", 1) .LE("leaky_relu", 1)); REGISTER_PASS(conv_relu6_mkldnn_fuse_pass, @@ -123,7 +125,7 @@ REGISTER_PASS(conv_relu6_mkldnn_fuse_pass, REGISTER_PASS_CAPABILITY(conv_relu6_mkldnn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() - .EQ("conv2d", 0) + .LE("conv2d", 1) .EQ("relu6", 0)); REGISTER_PASS(conv_swish_mkldnn_fuse_pass, @@ -131,5 +133,5 @@ REGISTER_PASS(conv_swish_mkldnn_fuse_pass, REGISTER_PASS_CAPABILITY(conv_swish_mkldnn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() - .EQ("conv2d", 0) + .LE("conv2d", 1) .EQ("swish", 0)); diff --git a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc index 716c49dcb12d9b..b0849d74b6153f 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h" + #include #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" @@ -150,7 +152,7 @@ REGISTER_PASS(conv_bias_mkldnn_fuse_pass, REGISTER_PASS_CAPABILITY(conv_bias_mkldnn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() - .EQ("conv2d", 0) + .LE("conv2d", 1) .EQ("elementwise_add", 0)); REGISTER_PASS(conv_transpose_bias_mkldnn_fuse_pass, diff --git a/paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass.cc index 76e10212550114..c4d7a12037293e 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass.h" + #include + #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" @@ -128,6 +130,6 @@ REGISTER_PASS(conv_concat_relu_mkldnn_fuse_pass, REGISTER_PASS_CAPABILITY(conv_concat_relu_mkldnn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() - .EQ("conv2d", 0) + .LE("conv2d", 1) .EQ("concat", 0) .EQ("relu", 0)); diff --git a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc index 2fb131aceaad28..a837b42b3ead48 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc @@ -13,11 +13,13 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.h" + #include #include #include #include #include + #include "paddle/fluid/framework/ir/graph_traits.h" #include "paddle/fluid/framework/op_version_registry.h" @@ -226,19 +228,20 @@ GraphWithStats ResidualConnectionMKLDNNFusePass::FuseConvAsX( pattern->NewNode(elementwise_add_pattern.elementwise_add_y_repr())); conv_output->AsIntermediate(); - auto get_node_from_elementwise_add = [&elementwise_add_pattern]( - const GraphPatternDetector::subgraph_t& subgraph) + auto get_node_from_elementwise_add = + [&elementwise_add_pattern]( + const GraphPatternDetector::subgraph_t& subgraph) -> std::tuple { - GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_op, elementwise_add_op, - elementwise_add_pattern); - GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_y, elementwise_add_y, - elementwise_add_pattern); - GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_out, elementwise_add_out, - elementwise_add_pattern); - - return std::make_tuple(elementwise_add_op, elementwise_add_y, - elementwise_add_out); - }; + GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_op, elementwise_add_op, + elementwise_add_pattern); + GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_y, elementwise_add_y, + elementwise_add_pattern); + GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_out, elementwise_add_out, + elementwise_add_pattern); + + return std::make_tuple(elementwise_add_op, elementwise_add_y, + elementwise_add_out); + }; return ExecuteHandleOnGraph( &gpd, graph_with_stats, @@ -263,19 +266,20 @@ GraphWithStats ResidualConnectionMKLDNNFusePass::FuseConvAsY( conv_output); conv_output->AsIntermediate(); - auto get_node_from_elementwise_add = [&elementwise_add_pattern]( - const GraphPatternDetector::subgraph_t& subgraph) + auto get_node_from_elementwise_add = + [&elementwise_add_pattern]( + const GraphPatternDetector::subgraph_t& subgraph) -> std::tuple { - GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_op, elementwise_add_op, - elementwise_add_pattern); - GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_x, elementwise_add_x, - elementwise_add_pattern); - GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_out, elementwise_add_out, - elementwise_add_pattern); - - return std::make_tuple(elementwise_add_op, elementwise_add_x, - elementwise_add_out); - }; + GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_op, elementwise_add_op, + elementwise_add_pattern); + GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_x, elementwise_add_x, + elementwise_add_pattern); + GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_out, elementwise_add_out, + elementwise_add_pattern); + + return std::make_tuple(elementwise_add_op, elementwise_add_x, + elementwise_add_out); + }; return ExecuteHandleOnGraph( &gpd, graph_with_stats, @@ -302,16 +306,17 @@ GraphWithStats ResidualConnectionMKLDNNFusePass::FuseProjectionConv( conv_x_output->AsIntermediate(); conv_y_output->AsIntermediate(); - auto get_node_from_elementwise_add = [&elementwise_add_pattern]( - const GraphPatternDetector::subgraph_t& subgraph) + auto get_node_from_elementwise_add = + [&elementwise_add_pattern]( + const GraphPatternDetector::subgraph_t& subgraph) -> std::tuple { - GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_op, elementwise_add_op, - elementwise_add_pattern); - GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_out, elementwise_add_out, - elementwise_add_pattern); + GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_op, elementwise_add_op, + elementwise_add_pattern); + GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_out, elementwise_add_out, + elementwise_add_pattern); - return std::make_tuple(elementwise_add_op, elementwise_add_out); - }; + return std::make_tuple(elementwise_add_op, elementwise_add_out); + }; return ExecuteHandleOnGraph( &gpd, graph_with_stats, @@ -345,5 +350,5 @@ REGISTER_PASS(conv_elementwise_add_mkldnn_fuse_pass, REGISTER_PASS_CAPABILITY(conv_elementwise_add_mkldnn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() - .EQ("conv2d", 0) + .LE("conv2d", 1) .EQ("elementwise_add", 0)); diff --git a/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.cc index b2c0afdc754fb7..39f47406a77ca9 100644 --- a/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.cc @@ -63,5 +63,5 @@ REGISTER_PASS(depthwise_conv_mkldnn_pass, paddle::framework::ir::DepthwiseConvMKLDNNPass); REGISTER_PASS_CAPABILITY(depthwise_conv_mkldnn_pass) .AddCombination( - paddle::framework::compatible::OpVersionComparatorCombination().EQ( - "depthwise_conv2d", 0)); + paddle::framework::compatible::OpVersionComparatorCombination().LE( + "depthwise_conv2d", 1)); diff --git a/paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc b/paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc index 895c396e1e614f..96c5546d21208b 100644 --- a/paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc +++ b/paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc @@ -12,13 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.h" + #include #include #include #include #include "paddle/fluid/framework/ir/graph_viz_pass.h" -#include "paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" namespace paddle { @@ -331,7 +332,7 @@ REGISTER_PASS_CAPABILITY(quant_conv2d_dequant_fuse_pass); REGISTER_PASS_CAPABILITY(tensorrt_subgraph_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() - .EQ("conv2d", 0) + .LE("conv2d", 1) .EQ("fc", 0) .LE("conv2d_transpose", 1) .EQ("fake_quantize_abs_max", 0) diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc index 08f3d609fa3e6a..bf0d87da91f534 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h" + #include #include #include @@ -20,7 +22,6 @@ #include "paddle/fluid/framework/ir/subgraph_detector.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/inference/analysis/helper.h" -#include "paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h" #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/engine.h" #include "paddle/fluid/inference/tensorrt/op_teller.h" @@ -309,6 +310,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp( min_input_shape, max_input_shape, opt_input_shape, disable_trt_plugin_fp16); trt_engine->SetUseOSS(Get("use_oss")); + trt_engine->SetWithErnie( graph->Has(framework::ir::kEmbEltwiseLayernormPass) && graph->Has(framework::ir::kMultiheadMatmulPass)); @@ -367,13 +369,13 @@ REGISTER_PASS(tensorrt_subgraph_pass, REGISTER_PASS_CAPABILITY(tensorrt_subgraph_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() - .EQ("conv2d", 0) + .LE("conv2d", 1) .EQ("pool2d", 0) .EQ("relu", 0) .EQ("softmax", 0) .EQ("sigmoid", 0) .EQ("hard_swish", 0) - .EQ("depthwise_conv2d", 0) + .LE("depthwise_conv2d", 1) .EQ("batch_norm", 0) .EQ("concat", 0) .EQ("tanh", 0) diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc index ef8a2b38f20b99..76ff1084fa61b4 100644 --- a/paddle/fluid/operators/conv_op.cc +++ b/paddle/fluid/operators/conv_op.cc @@ -18,6 +18,8 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/op_version_registry.h" + #ifdef PADDLE_WITH_CUDA #include "paddle/fluid/operators/conv_cudnn_op_cache.h" #include "paddle/fluid/platform/cudnn_helper.h" @@ -817,3 +819,36 @@ REGISTER_OP_CPU_KERNEL( conv3d_grad_grad, ops::GemmConvDoubleGradKernel, ops::GemmConvDoubleGradKernel); + +REGISTER_OP_VERSION(conv2d) + .AddCheckpoint( + R"ROC( + Upgrade conv2d, add a new attribute [use_addto]. + )ROC", + paddle::framework::compatible::OpVersionDesc().NewAttr( + "use_addto", + "In order to support new feature (inplace addto strategy) for " + "gradient accumulation.", + false)); + +REGISTER_OP_VERSION(depthwise_conv2d) + .AddCheckpoint( + R"ROC( + Upgrade depthwise_conv2d, add a new attribute [use_addto]. + )ROC", + paddle::framework::compatible::OpVersionDesc().NewAttr( + "use_addto", + "In order to support new feature (inplace addto strategy) for " + "gradient accumulation.", + false)); + +REGISTER_OP_VERSION(conv3d) + .AddCheckpoint( + R"ROC( + Upgrade conv3d, add a new attribute [use_addto]. + )ROC", + paddle::framework::compatible::OpVersionDesc().NewAttr( + "use_addto", + "In order to support new feature (inplace addto strategy) for " + "gradient accumulation.", + false)); From f962bd343217a080c45ace61e104609cc8ea1ffd Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Mon, 16 Nov 2020 12:25:59 +0800 Subject: [PATCH 06/56] Fix cudnn workspace limit in cudnn-8 (#28611) --- paddle/fluid/operators/conv_cudnn_helper.h | 49 +++++++++++++++++++--- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/operators/conv_cudnn_helper.h b/paddle/fluid/operators/conv_cudnn_helper.h index 55502eaf4e5495..2ba58a6dae5b35 100644 --- a/paddle/fluid/operators/conv_cudnn_helper.h +++ b/paddle/fluid/operators/conv_cudnn_helper.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/conv_search_cache.h" #include "paddle/fluid/framework/operator_kernel_configs.h" #include "paddle/fluid/operators/conv_cudnn_op_cache.h" @@ -101,6 +102,24 @@ inline int MaxBwdFilterAlgos(cudnnHandle_t cudnn_handle) { return max_algos; } +template +void ChooseAlgoByWorkspace(PerfType* perf_results, size_t perf_num, + size_t workspace_byte, AlgoType* algo) { + for (size_t i = 0; i < perf_num; ++i) { + auto result = perf_results[i]; + if (result.status == CUDNN_STATUS_SUCCESS && + result.memory < workspace_byte) { + *algo = result.algo; + VLOG(3) << " algo: " << result.algo << ", time: " << result.time + << " ms, wksp = " << result.memory + << ", status = " << result.status; + return; + } + } + VLOG(3) << "Can not find alog that requires memory < " + << static_cast(workspace_byte) / (1 << 20) << " MB"; +} + template void ChooseAlgo(const std::vector& perf_results, size_t workspace_byte, AlgoType* algo) { @@ -219,7 +238,10 @@ struct SearchAlgorithm { if (workspace_size > workspace_size_limit) { #if CUDNN_VERSION >= 8000 - workspace_size_limit = workspace_size; + // cudnnGetConvolutionForwardAlgorithm is removed in CUDNN-8 + ChooseAlgoByWorkspace(perf_results.get(), + kNUM_CUDNN_FWD_ALGS, + workspace_size_limit, &algo); #else VLOG(1) << "Fallback to non-v7 method to find conv algorithm becasue " "the workspace size request(" @@ -316,7 +338,6 @@ struct SearchAlgorithm { size_t workspace_size = 0; bool has_got_workspace_size = true; algo_t algo; - #if CUDA_VERSION >= 9000 && CUDNN_VERSION_MIN(7, 0, 1) auto& dev_ctx = ctx.template device_context(); if (dev_ctx.GetComputeCapability() >= 70 && dtype == CUDNN_DATA_HALF) { @@ -362,9 +383,10 @@ struct SearchAlgorithm { if (workspace_size > workspace_size_limit) { has_got_workspace_size = false; #if CUDNN_VERSION >= 8000 - // There is no cudnnGetConvolutionBackwardDataAlgorithm in CUDNN 8 - // version. - workspace_size_limit = workspace_size; + // cudnnGetConvolutionBackwardDataAlgorithm is removed in CUDNN-8 + ChooseAlgoByWorkspace(perf_results.get(), + kNUM_CUDNN_BWD_DATA_ALGS, + workspace_size_limit, &algo); #else VLOG(1) << "Fallback to non-v7 method to find conv algorithm becasue " "the workspace size request(" @@ -493,6 +515,23 @@ struct SearchAlgorithm { workspace_size = GetWorkspaceSize(args, algo); if (workspace_size > workspace_size_limit) { workspace_size = workspace_size_limit; +#if CUDNN_VERSION >= 8000 + // cudnnGetConvolutionBackwardFilterAlgorithm is removed in CUDNN-8 + ChooseAlgoByWorkspace(perf_results.get(), + kNUM_CUDNN_BWD_FILTER_ALGS, + workspace_size_limit, &algo); +#else + VLOG(1) << "Fallback to non-v7 method to find conv algorithm becasue " + "the workspace size request(" + << workspace_size << ") exceeds the limit(" + << workspace_size_limit << ")"; + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnGetConvolutionBackwardFilterAlgorithm( + args.handle, args.idesc.desc(), args.odesc.desc(), + args.cdesc.desc(), args.wdesc.desc(), + CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, + workspace_size_limit, &algo)); +#endif } #else PADDLE_ENFORCE_CUDA_SUCCESS( From 8b97bb2e1f4e8cfe5bee0f97daac266d854b73c4 Mon Sep 17 00:00:00 2001 From: Wilber Date: Mon, 16 Nov 2020 12:42:36 +0800 Subject: [PATCH 07/56] Update cmake for arm ft and fix a bug for Predictor dtor. (#28586) --- paddle/fluid/inference/api/analysis_predictor.cc | 10 ++++++++-- python/CMakeLists.txt | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 20bea8e568e467..7bfdb2107c9a99 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -175,9 +175,15 @@ bool AnalysisPredictor::PrepareScope( status_is_cloned_ = true; } else { paddle::framework::InitDevices(false); - scope_.reset(new paddle::framework::Scope(), [&](framework::Scope *scope) { + scope_.reset(new paddle::framework::Scope(), [](framework::Scope *scope) { delete scope; - memory::Release(place_); +#ifdef PADDLE_WITH_CUDA + for (int dev_id = 0; dev_id < paddle::platform::GetCUDADeviceCount(); + ++dev_id) { + memory::Release(platform::CUDAPlace(dev_id)); + } +#endif + memory::Release(platform::CPUPlace()); }); status_is_cloned_ = false; } diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 34edb0280b0ba7..0be09c1ec6340a 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -106,7 +106,7 @@ if(APPLE) message(FATAL_ERROR "install_name_tool not found, please check.\n") endif() endif() -if(LINUX AND NOT WITH_SW) +if(LINUX AND NOT WITH_SW AND NOT WITH_ARM) find_program(PATCHELF_EXECUTABLE patchelf) if(NOT PATCHELF_EXECUTABLE) message(FATAL_ERROR "patchelf not found, please install it.\n" From f7dd889ca443aaf1248947a1af65107b9779370d Mon Sep 17 00:00:00 2001 From: qingqing01 Date: Mon, 16 Nov 2020 13:52:31 +0800 Subject: [PATCH 08/56] Support squeezed label as input in paddle.metric.Accuracy (#28535) * Support squeezed label as input in paddle.metric.Accuracy * Revert cifar and fix UT --- python/paddle/metric/metrics.py | 1 + python/paddle/tests/test_metrics.py | 28 +++++++++++++++++++++++++--- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/python/paddle/metric/metrics.py b/python/paddle/metric/metrics.py index fed659562cbb0c..510b99c03008d5 100644 --- a/python/paddle/metric/metrics.py +++ b/python/paddle/metric/metrics.py @@ -244,6 +244,7 @@ def compute(self, pred, label, *args): Tensor: Correct mask, a tensor with shape [batch_size, topk]. """ pred = paddle.argsort(pred, descending=True)[:, :self.maxk] + label = paddle.reshape(label, (-1, 1)) correct = pred == label return paddle.cast(correct, dtype='float32') diff --git a/python/paddle/tests/test_metrics.py b/python/paddle/tests/test_metrics.py index f05cdf9c6da10b..b1f53168e62cec 100644 --- a/python/paddle/tests/test_metrics.py +++ b/python/paddle/tests/test_metrics.py @@ -28,6 +28,7 @@ def accuracy(pred, label, topk=(1, )): maxk = max(topk) pred = np.argsort(pred)[:, ::-1][:, :maxk] + label = label.reshape(-1, 1) correct = (pred == np.repeat(label, maxk, 1)) batch_size = label.shape[0] @@ -47,13 +48,18 @@ def convert_to_one_hot(y, C): class TestAccuracy(unittest.TestCase): - def test_acc(self): + def test_acc(self, squeeze_y=False): paddle.disable_static() x = paddle.to_tensor( np.array([[0.1, 0.2, 0.3, 0.4], [0.1, 0.4, 0.3, 0.2], [0.1, 0.2, 0.4, 0.3], [0.1, 0.2, 0.3, 0.4]])) - y = paddle.to_tensor(np.array([[0], [1], [2], [3]])) + + y = np.array([[0], [1], [2], [3]]) + if squeeze_y: + y = y.squeeze() + + y = paddle.to_tensor(y) m = paddle.metric.Accuracy(name='my_acc') @@ -61,7 +67,8 @@ def test_acc(self): self.assertEqual(m.name(), ['my_acc']) correct = m.compute(x, y) - # check results + # check shape and results + self.assertEqual(correct.shape, [4, 1]) self.assertEqual(m.update(correct), 0.75) self.assertEqual(m.accumulate(), 0.75) @@ -80,6 +87,9 @@ def test_acc(self): self.assertEqual(m.count[0], 0.0) paddle.enable_static() + def test_1d_label(self): + self.test_acc(True) + class TestAccuracyDynamic(unittest.TestCase): def setUp(self): @@ -87,12 +97,15 @@ def setUp(self): self.class_num = 5 self.sample_num = 1000 self.name = None + self.squeeze_label = False def random_pred_label(self): label = np.random.randint(0, self.class_num, (self.sample_num, 1)).astype('int64') pred = np.random.randint(0, self.class_num, (self.sample_num, 1)).astype('int32') + if self.squeeze_label: + label = label.squeeze() pred_one_hot = convert_to_one_hot(pred, self.class_num) pred_one_hot = pred_one_hot.astype('float32') @@ -123,9 +136,17 @@ def setUp(self): self.class_num = 10 self.sample_num = 1000 self.name = "accuracy" + self.squeeze_label = True class TestAccuracyStatic(TestAccuracyDynamic): + def setUp(self): + self.topk = (1, ) + self.class_num = 5 + self.sample_num = 1000 + self.name = None + self.squeeze_label = True + def test_main(self): main_prog = fluid.Program() startup_prog = fluid.Program() @@ -164,6 +185,7 @@ def setUp(self): self.class_num = 10 self.sample_num = 100 self.name = "accuracy" + self.squeeze_label = False class TestPrecision(unittest.TestCase): From c4d22c845b951438e7e84311c9e6eefa49c8f526 Mon Sep 17 00:00:00 2001 From: YUNSHEN XIE <1084314248@qq.com> Date: Mon, 16 Nov 2020 14:29:12 +0800 Subject: [PATCH 09/56] modified timeout value for some ut (#28616) --- python/paddle/fluid/tests/book/CMakeLists.txt | 1 + python/paddle/fluid/tests/unittests/CMakeLists.txt | 10 +++++++++- python/paddle/fluid/tests/unittests/rnn/CMakeLists.txt | 6 ++++-- python/paddle/tests/CMakeLists.txt | 2 +- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/python/paddle/fluid/tests/book/CMakeLists.txt b/python/paddle/fluid/tests/book/CMakeLists.txt index e78ba297bf1255..2c816a12bd3ebb 100644 --- a/python/paddle/fluid/tests/book/CMakeLists.txt +++ b/python/paddle/fluid/tests/book/CMakeLists.txt @@ -12,3 +12,4 @@ set_tests_properties(test_image_classification PROPERTIES TIMEOUT 120) set_tests_properties(test_label_semantic_roles PROPERTIES TIMEOUT 120) set_tests_properties(test_machine_translation PROPERTIES TIMEOUT 120) set_tests_properties(test_rnn_encoder_decoder PROPERTIES TIMEOUT 120) +set_tests_properties(test_fit_a_line PROPERTIES TIMEOUT 120) diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 6fcc8b9691703c..6e78f7d90149e2 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -648,7 +648,7 @@ set_tests_properties(test_activation_nn_grad PROPERTIES TIMEOUT 120) set_tests_properties(test_empty_op PROPERTIES TIMEOUT 120) set_tests_properties(test_parallel_executor_transformer PROPERTIES TIMEOUT 120) set_tests_properties(test_elementwise_div_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_regularizer_api PROPERTIES TIMEOUT 120) +set_tests_properties(test_regularizer_api PROPERTIES TIMEOUT 150) set_tests_properties(test_multiclass_nms_op PROPERTIES TIMEOUT 120) set_tests_properties(test_ir_memory_optimize_nlp PROPERTIES TIMEOUT 120) set_tests_properties(test_add_reader_dependency PROPERTIES TIMEOUT 120) @@ -754,6 +754,13 @@ set_tests_properties(test_imperative_resnet_sorted_gradient PROPERTIES TIMEOUT 2 set_tests_properties(test_imperative_se_resnext PROPERTIES TIMEOUT 200) set_tests_properties(test_matmul_v2_op PROPERTIES TIMEOUT 120) set_tests_properties(test_slice_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_strided_slice_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_translated_layer PROPERTIES TIMEOUT 120) +set_tests_properties(test_parallel_executor_inference_feed_partial_data PROPERTIES TIMEOUT 120) +set_tests_properties(test_pad3d_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_dataloader_keep_order PROPERTIES TIMEOUT 120) +set_tests_properties(test_mean_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_dataloader_unkeep_order PROPERTIES TIMEOUT 120) if(WITH_COVERAGE) set_tests_properties(test_parallel_dygraph_sparse_embedding PROPERTIES TIMEOUT 120) set_tests_properties(test_parallel_dygraph_transformer PROPERTIES TIMEOUT 120) @@ -776,5 +783,6 @@ endif() if(WITH_GPU) set_tests_properties(test_imperative_auto_mixed_precision PROPERTIES TIMEOUT 120) set_tests_properties(test_parallel_dygraph_sync_batch_norm PROPERTIES TIMEOUT 120) + set_tests_properties(test_rank_attention_op PROPERTIES TIMEOUT 120) endif() set_tests_properties(test_inplace_addto_strategy PROPERTIES TIMEOUT 120) diff --git a/python/paddle/fluid/tests/unittests/rnn/CMakeLists.txt b/python/paddle/fluid/tests/unittests/rnn/CMakeLists.txt index 0606594c8c25f3..ffc78d33347b70 100644 --- a/python/paddle/fluid/tests/unittests/rnn/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/rnn/CMakeLists.txt @@ -4,5 +4,7 @@ string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") foreach(TEST_OP ${TEST_OPS}) py_test_modules(${TEST_OP} MODULES ${TEST_OP}) endforeach(TEST_OP) -set_tests_properties(test_rnn_nets_static PROPERTIES TIMEOUT 120) -set_tests_properties(test_rnn_nets PROPERTIES TIMEOUT 120) +if(NOT WIN32) + set_tests_properties(test_rnn_nets_static PROPERTIES TIMEOUT 120) + set_tests_properties(test_rnn_nets PROPERTIES TIMEOUT 120) +endif() diff --git a/python/paddle/tests/CMakeLists.txt b/python/paddle/tests/CMakeLists.txt index 50466be0c1b1f9..b9d05261f1ce02 100644 --- a/python/paddle/tests/CMakeLists.txt +++ b/python/paddle/tests/CMakeLists.txt @@ -47,6 +47,6 @@ set_tests_properties(test_datasets PROPERTIES TIMEOUT 120) set_tests_properties(test_dataset_wmt PROPERTIES TIMEOUT 120) set_tests_properties(test_vision_models PROPERTIES TIMEOUT 120) set_tests_properties(test_dataset_uci_housing PROPERTIES TIMEOUT 120) -set_tests_properties(test_dataset_imdb PROPERTIES TIMEOUT 120) +set_tests_properties(test_dataset_imdb PROPERTIES TIMEOUT 150) set_tests_properties(test_callbacks PROPERTIES TIMEOUT 120) set_tests_properties(test_pretrained_model PROPERTIES TIMEOUT 600) From a3bc3bcd4854057079f2f9447d8872c25ed3af28 Mon Sep 17 00:00:00 2001 From: Guo Sheng Date: Mon, 16 Nov 2020 14:32:58 +0800 Subject: [PATCH 10/56] Fix scaled_params append error in AdamW. (#28633) Fix no_grad setting in AdamW. test=develop --- python/paddle/optimizer/adamw.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/python/paddle/optimizer/adamw.py b/python/paddle/optimizer/adamw.py index 2cf3881d046761..0ffff675903573 100644 --- a/python/paddle/optimizer/adamw.py +++ b/python/paddle/optimizer/adamw.py @@ -15,6 +15,7 @@ from .optimizer import Optimizer from .adam import Adam from ..fluid import framework +from ..fluid.dygraph import base as imperative_base import paddle from paddle.fluid.dygraph.parallel import apply_collective_grads @@ -171,13 +172,14 @@ def _scale_parameters(self, params_and_grads): learning_rate = self._learning_rate() with param.block.program._optimized_guard( [param, grad]), framework.name_scope('weight decay'): + scaled_params.append( + (param, grad, param * self._coeff * learning_rate)) if param.name not in self._params_name: - scaled_params.append( - (param, grad, param * self._coeff * learning_rate)) self._params_name.add(param.name) param = param * self._coeff return scaled_params + @imperative_base.no_grad def minimize(self, loss, startup_program=None, @@ -207,6 +209,7 @@ def minimize(self, return optimize_ops, params_grads @framework.dygraph_only + @imperative_base.no_grad def step(self): if paddle.distributed.get_world_size() > 1: apply_collective_grads(self._parameter_list) @@ -227,7 +230,7 @@ def step(self): [param, grad]), framework.name_scope('weight decay'): updated_param = paddle.fluid.layers.elementwise_sub( x=param, y=scaled_param) - param.set_value(updated_param.numpy()) + paddle.fluid.layers.assign(input=updated_param, output=param) self._apply_optimize( loss=None, startup_program=None, params_grads=params_grads) From 110febdc541db8dd7e75fc3aeb614dff0fede4b7 Mon Sep 17 00:00:00 2001 From: Guo Sheng Date: Mon, 16 Nov 2020 14:33:25 +0800 Subject: [PATCH 11/56] Fix gradients with ignore_idx in softmax_with_cross_entropy (#28622) * Fix gradients with ignore_idx in softmax_with_cross_entropy. test=develop * Fix gradients with ignore_idx in softmax_with_cross_entropy on cpu. Remove softmax_with_cross_entropy from op_threshold_white_list. test=develop * Fix test_softmax_cross_entropy_op.py. test=develop --- .../operators/softmax_with_cross_entropy_op.cu | 13 ++++++++++--- .../fluid/operators/softmax_with_cross_entropy_op.h | 11 +++++++++-- .../unittests/test_softmax_with_cross_entropy_op.py | 6 +++--- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op.cu b/paddle/fluid/operators/softmax_with_cross_entropy_op.cu index 3ac7a5a127b379..f86f02544dc980 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op.cu +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.cu @@ -37,11 +37,17 @@ __global__ void CrossEntropyGrad(T* logit_grad, const int64_t* labels, template __global__ void Scale(T* logit_grad, const T* loss_grad, const int num, - const int d, const int remain) { + const int d, const int remain, const int64_t* labels, + const int ignore_index) { CUDA_KERNEL_LOOP(index, num) { int idx_n = index / d; int idx_remain = index % remain; - logit_grad[index] *= loss_grad[idx_n * remain + idx_remain]; + int idx_lbl = idx_n * remain + idx_remain; + if (labels[idx_lbl] == ignore_index) { + logit_grad[index] = static_cast(0.); + } else { + logit_grad[index] *= loss_grad[idx_lbl]; + } } } @@ -260,6 +266,7 @@ struct HardLabelSoftmaxWithCrossEntropyFunctor { int idx_remain = idx % remain; // labels, loss view as [n, remain] int idx_lbl = idx_n * remain + idx_remain; + // It also would ignore labels not in range(class_num). if (idx_axis != labels_[idx_lbl]) { log_softmax_[idx] = exp_on_device(log_softmax_[idx]); } else { @@ -513,7 +520,7 @@ class SoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel { int num = n * d; grid = (num + block - 1) / block; Scale<<>>(logit_grad_data, loss_grad_data, num, - d, remain); + d, remain, label_data, ignore_index); } } }; diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op.h b/paddle/fluid/operators/softmax_with_cross_entropy_op.h index cebd466f361d1e..93f2552c3cee90 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op.h +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.h @@ -82,6 +82,7 @@ class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel { } const bool soft_label = context.Attr("soft_label"); + auto ignore_index = context.Attr("ignore_index"); const int rank = logit_grad->dims().size(); const int axis = CanonicalAxis(context.Attr("axis"), rank); @@ -115,8 +116,14 @@ class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel { for (int i = 0; i < n; ++i) { for (int j = 0; j < remain; j++) { int idx = i * remain + j; - logit_grad_data[i * d + label_data[idx] * remain + j] -= - out_grad_data[idx]; + if (label_data[idx] == ignore_index) { + for (int k = 0; k < axis_dim; ++k) { + logit_grad_data[i * d + k * remain + j] = 0; + } + } else { + logit_grad_data[i * d + label_data[idx] * remain + j] -= + out_grad_data[idx]; + } } } } diff --git a/python/paddle/fluid/tests/unittests/test_softmax_with_cross_entropy_op.py b/python/paddle/fluid/tests/unittests/test_softmax_with_cross_entropy_op.py index df2a0a523ad1ef..0ee58d5be15e60 100644 --- a/python/paddle/fluid/tests/unittests/test_softmax_with_cross_entropy_op.py +++ b/python/paddle/fluid/tests/unittests/test_softmax_with_cross_entropy_op.py @@ -83,9 +83,9 @@ def setUp(self): self.attrs = { "numeric_stable_mode": self.numeric_stable_mode, "soft_label": self.soft_label, + "ignore_index": self.ignore_index, } - if self.ignore_index >= 0: - self.attrs['ignore_index'] = self.ignore_index + if self.axis != -1: self.attrs['axis'] = self.axis @@ -93,7 +93,7 @@ def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(["Logits"], "Loss", max_relative_error=0.05) + self.check_grad(["Logits"], "Loss", max_relative_error=5e-5) class TestSoftmaxWithCrossEntropyOpNoCudnn(TestSoftmaxWithCrossEntropyOp): From cf2c42a937137a4c6d0468ba497dac3f41e010b7 Mon Sep 17 00:00:00 2001 From: YUNSHEN XIE <1084314248@qq.com> Date: Mon, 16 Nov 2020 14:36:35 +0800 Subject: [PATCH 12/56] fix exec nightly error on mac (#28567) --- paddle/scripts/paddle_build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 14bd5a7ae89326..4c74653b7a06aa 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -563,12 +563,12 @@ EOF if [ ${NIGHTLY_MODE:-OFF} == "ON" ]; then nightly_label="" else - nightly_label="RUN_TYPE=NIGHTLY|RUN_TYPE=DIST:NIGHTLY|RUN_TYPE=EXCLUSIVE:NIGHTLY" + nightly_label="(RUN_TYPE=NIGHTLY|RUN_TYPE=DIST:NIGHTLY|RUN_TYPE=EXCLUSIVE:NIGHTLY)" echo "=========================================" echo "Unittests with nightly labels are only run at night" echo "=========================================" fi - ctest -E "($disable_ut_quickly)" -LE "($nightly_label)" --output-on-failure -j $2 | tee $tmpfile + ctest -E "($disable_ut_quickly)" -LE ${nightly_label} --output-on-failure -j $2 | tee $tmpfile failed_test_lists='' collect_failed_tests mactest_error=0 From 2b1e7e5b02f6f63f6eee6b0e7dd64a8649d1a2c7 Mon Sep 17 00:00:00 2001 From: GaoWei8 <53294385+GaoWei8@users.noreply.github.com> Date: Mon, 16 Nov 2020 15:24:37 +0800 Subject: [PATCH 13/56] Polish where english doc (#28595) --- python/paddle/tensor/search.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/python/paddle/tensor/search.py b/python/paddle/tensor/search.py index 3da4228fc8b204..f5e0dc4c05bfb6 100644 --- a/python/paddle/tensor/search.py +++ b/python/paddle/tensor/search.py @@ -495,9 +495,6 @@ def sort(x, axis=-1, descending=False, name=None): def where(condition, x, y, name=None): """ - :alias_main: paddle.where - :alias: paddle.where,paddle.tensor.where,paddle.tensor.search.where - Return a tensor of elements selected from either $x$ or $y$, depending on $condition$. .. math:: @@ -510,28 +507,27 @@ def where(condition, x, y, name=None): Args: - condition(Variable): The condition to choose x or y. - x(Variable): x is a Tensor Variable with data type float32, float64, int32, int64. - y(Variable): y is a Tensor Variable with data type float32, float64, int32, int64. + condition(Tensor): The condition to choose x or y. + x(Tensor): x is a Tensor with data type float32, float64, int32, int64. + y(Tensor): y is a Tensor with data type float32, float64, int32, int64. name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. Returns: - Variable: A Tensor with the same data dype as x. + Tensor: A Tensor with the same data dype as x. Examples: .. code-block:: python import paddle - paddle.disable_static() x = paddle.to_tensor([0.9383, 0.1983, 3.2, 1.2]) y = paddle.to_tensor([1.0, 1.0, 1.0, 1.0]) out = paddle.where(x>1, x, y) - print(out.numpy()) + print(out) #out: [1.0, 1.0, 3.2, 1.2] """ if not in_dygraph_mode(): From c5c273c13e861c2f22ab6c639d2cafa2facfeb8c Mon Sep 17 00:00:00 2001 From: Huihuang Zheng Date: Mon, 16 Nov 2020 15:26:35 +0800 Subject: [PATCH 14/56] [Dy2stat] Fix Using Tuple for Transpose in Dy2stat (#28574) PaddleSeg uses tuple as parameter of transpose in dygraph code: https://github.com/PaddlePaddle/PaddleSeg/blob/release/v0.7.0/dygraph/paddleseg/models/danet.py#L152 However, in dy2stat, static code doesn't support the perm as a tuple. This PR fixed it. --- python/paddle/fluid/layers/nn.py | 21 ++++++----- .../tests/unittests/test_transpose_op.py | 35 +++++++++++++++++++ 2 files changed, 45 insertions(+), 11 deletions(-) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 760f5ce58bf268..3ac43df872e377 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -5459,7 +5459,7 @@ def transpose(x, perm, name=None): Args: x (Variable): The input Tensor. It is a N-D Tensor of data types float32, float64, int32. - perm (list): Permute the input according to the data of perm. + perm (list|tuple): Permute the input according to the data of perm. name (str): The name of this layer. It is optional. Returns: @@ -5492,14 +5492,12 @@ def transpose(x, perm, name=None): .. code-block:: python - # use append_batch_size=False to avoid prepending extra - # batch size in shape - import paddle.fluid as fluid - x = fluid.layers.data(name='x', shape=[2, 3, 4], - dtype='float32', append_batch_size=False) - x_transposed = fluid.layers.transpose(x, perm=[1, 0, 2]) - print x_transposed.shape - #(3L, 2L, 4L) + import paddle + + x = paddle.randn([2, 3, 4]) + x_transposed = paddle.transpose(x, perm=[1, 0, 2]) + print(x_transposed.shape) + # [3L, 2L, 4L] """ if in_dygraph_mode(): @@ -5509,8 +5507,9 @@ def transpose(x, perm, name=None): check_variable_and_dtype( x, 'x', ['float16', 'float32', 'float64', 'int32', 'int64'], 'transpose') - check_type(perm, 'perm', list, 'transpose') - + check_type(perm, 'perm', (list, tuple), 'transpose') + if isinstance(perm, tuple): + perm = list(perm) if len(perm) != len(x.shape): raise ValueError( "Input(perm) is the permutation of dimensions of Input(x), " diff --git a/python/paddle/fluid/tests/unittests/test_transpose_op.py b/python/paddle/fluid/tests/unittests/test_transpose_op.py index 56333211469db5..f72df8cbe46409 100644 --- a/python/paddle/fluid/tests/unittests/test_transpose_op.py +++ b/python/paddle/fluid/tests/unittests/test_transpose_op.py @@ -21,6 +21,7 @@ import paddle.fluid as fluid from paddle.fluid import Program, program_guard +paddle.enable_static() class TestTransposeOp(OpTest): def setUp(self): @@ -113,6 +114,7 @@ def initTestCase(self): class TestTransposeOpError(unittest.TestCase): def test_errors(self): + paddle.enable_static() with program_guard(Program(), Program()): x = fluid.layers.data(name='x', shape=[10, 5, 3], dtype='float64') @@ -149,6 +151,39 @@ def test_each_elem_value_check(): self.assertRaises(ValueError, test_each_elem_value_check) +class TestTransposeApi(unittest.TestCase): + def test_static_out(self): + paddle.enable_static() + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.static.data(name='x', shape=[2, 3, 4], dtype='float32') + x_trans1 = paddle.transpose(x, perm=[1, 0, 2]) + x_trans2 = paddle.transpose(x, perm=(2, 1, 0)) + place = paddle.CPUPlace() + exe = paddle.static.Executor(place) + x_np = np.random.random([2, 3, 4]).astype("float32") + result1, result2 = exe.run(feed={"x": x_np}, fetch_list=[x_trans1, x_trans2]) + expected_result1 = np.transpose(x_np, [1, 0, 2]) + expected_result2 = np.transpose(x_np, (2, 1, 0)) + + np.testing.assert_array_equal(result1, expected_result1) + np.testing.assert_array_equal(result2, expected_result2) + + def test_dygraph_out(self): + # This is an old test before 2.0 API so we need to disable static + # to trigger dygraph + paddle.disable_static() + x = paddle.randn([2, 3, 4]) + x_trans1 = paddle.transpose(x, perm=[1, 0, 2]) + x_trans2 = paddle.transpose(x, perm=(2, 1, 0)) + x_np = x.numpy() + expected_result1 = np.transpose(x_np, [1, 0, 2]) + expected_result2 = np.transpose(x_np, (2, 1, 0)) + + np.testing.assert_array_equal(x_trans1.numpy(), expected_result1) + np.testing.assert_array_equal(x_trans2.numpy(), expected_result2) + # This is an old test before 2.0 API so we enable static again after + # dygraph test + paddle.enable_static() class TestTAPI(unittest.TestCase): def test_out(self): From 89d27de90fe97abe9f1e5e12a8c42895ba2b699e Mon Sep 17 00:00:00 2001 From: Kaipeng Deng Date: Mon, 16 Nov 2020 16:23:01 +0800 Subject: [PATCH 15/56] DataLoader support not auto collate batch (#28425) * DataLoader support not auto collate batch. test=develop --- .../fluid/dataloader/dataloader_iter.py | 34 +++++--- python/paddle/fluid/dataloader/fetcher.py | 49 +++++++----- python/paddle/fluid/reader.py | 36 ++++++++- .../test_multiprocess_dataloader_dynamic.py | 45 ++++++++++- .../test_multiprocess_dataloader_exception.py | 4 +- ...ess_dataloader_iterable_dataset_dynamic.py | 43 ++++++++++- ...cess_dataloader_iterable_dataset_static.py | 75 ++++++++++++++++++ .../test_multiprocess_dataloader_static.py | 77 +++++++++++++++++++ 8 files changed, 327 insertions(+), 36 deletions(-) diff --git a/python/paddle/fluid/dataloader/dataloader_iter.py b/python/paddle/fluid/dataloader/dataloader_iter.py index d32a543eb495fa..ee30484ae9a0fb 100644 --- a/python/paddle/fluid/dataloader/dataloader_iter.py +++ b/python/paddle/fluid/dataloader/dataloader_iter.py @@ -36,6 +36,7 @@ from ..framework import in_dygraph_mode from ..multiprocess_utils import CleanupFuncRegistrar, _cleanup_mmap, _set_SIGCHLD_handler from .fetcher import _IterableDatasetFetcher, _MapDatasetFetcher +from .batch_sampler import _InfiniteIterableSampler __all__ = ['get_worker_info'] @@ -100,11 +101,13 @@ class _DatasetKind(object): ITER = 1 @staticmethod - def create_fetcher(kind, dataset, collate_fn, drop_last): + def create_fetcher(kind, dataset, auto_collate_batch, collate_fn, drop_last): if kind == _DatasetKind.MAP: - return _MapDatasetFetcher(dataset, collate_fn, drop_last) + return _MapDatasetFetcher(dataset, auto_collate_batch, + collate_fn, drop_last) elif kind == _DatasetKind.ITER: - return _IterableDatasetFetcher(dataset, collate_fn, drop_last) + return _IterableDatasetFetcher(dataset, auto_collate_batch, + collate_fn, drop_last) else: raise NotImplementedError("unknown Dataset kind {}".format(kind)) @@ -221,8 +224,7 @@ def __init__(self, loader): self._places = loader.places self._return_list = loader.return_list self._batch_sampler = loader.batch_sampler - self._sampler_iter = iter(loader.batch_sampler) - self._collate_fn = loader.collate_fn or default_collate_fn + self._auto_collate_batch = loader.auto_collate_batch self._num_workers = loader.num_workers self._use_buffer_reader = loader.use_buffer_reader self._use_shared_memory = loader.use_shared_memory @@ -231,6 +233,16 @@ def __init__(self, loader): self._dataset_kind = loader.dataset_kind self._pin_memory = loader.pin_memory + if self._auto_collate_batch: + self._sampler_iter = iter(loader.batch_sampler) + self._collate_fn = loader.collate_fn or default_collate_fn + else: + if self._dataset_kind == _DatasetKind.MAP: + self._sampler_iter = iter(list(range(len(self._dataset)))) + else: + self._sampler_iter = iter(_InfiniteIterableSampler(self._dataset, 1)) + self._collate_fn = loader.collate_fn + # LoDTensorBlockingQueue instance for create_py_reader and a thread # to put mini-batch data to self._blocking_queue, mini-batch data # will be get from: @@ -257,7 +269,8 @@ def __init__(self, loader): super(_DataLoaderIterSingleProcess, self).__init__(loader) self._dataset_fetcher = _DatasetKind.create_fetcher( - self._dataset_kind, self._dataset, self._collate_fn, True) + self._dataset_kind, self._dataset, self._auto_collate_batch, + self._collate_fn, True) # NOTE: len(self._places) batch data compose as an output # iteration, set blocking_queue can cache 2 iteration datas @@ -367,7 +380,7 @@ def __del__(self): # NOTE(chenweihang): _worker_loop must be top level method to be pickled def _worker_loop(dataset, dataset_kind, indices_queue, out_queue, done_event, - collate_fn, init_fn, worker_id, num_workers, + auto_collate_batch, collate_fn, init_fn, worker_id, num_workers, use_shared_memory): try: # NOTE: [ mmap files clear ] When the child process exits unexpectedly, @@ -388,7 +401,7 @@ def _worker_loop(dataset, dataset_kind, indices_queue, out_queue, done_event, if init_fn is not None: init_fn(worker_id) fetcher = _DatasetKind.create_fetcher(dataset_kind, dataset, - collate_fn, True) + auto_collate_batch, collate_fn, True) except: init_exception = Exception("init_fn failed in worker {}: " \ "{}".format(worker_id, sys.exc_info())) @@ -511,8 +524,9 @@ def _init_workers(self): target=_worker_loop, args=(self._dataset, self._dataset_kind, indices_queue, self._data_queue, self._workers_done_event, - self._collate_fn, self._worker_init_fn, i, - self._num_workers, self._use_shared_memory)) + self._auto_collate_batch, self._collate_fn, + self._worker_init_fn, i, self._num_workers, + self._use_shared_memory)) worker.daemon = True worker.start() self._workers.append(worker) diff --git a/python/paddle/fluid/dataloader/fetcher.py b/python/paddle/fluid/dataloader/fetcher.py index 001b8b931da233..9382a704223704 100644 --- a/python/paddle/fluid/dataloader/fetcher.py +++ b/python/paddle/fluid/dataloader/fetcher.py @@ -14,8 +14,9 @@ class _DatasetFetcher(object): - def __init__(self, dataset, collate_fn, drop_last): + def __init__(self, dataset, auto_collate_batch, collate_fn, drop_last): self.dataset = dataset + self.auto_collate_batch = auto_collate_batch self.collate_fn = collate_fn self.drop_last = drop_last @@ -25,29 +26,41 @@ def fetch(self, batch_indices): class _IterableDatasetFetcher(_DatasetFetcher): - def __init__(self, dataset, collate_fn, drop_last): - super(_IterableDatasetFetcher, self).__init__(dataset, collate_fn, - drop_last) + def __init__(self, dataset, auto_collate_batch, collate_fn, drop_last): + super(_IterableDatasetFetcher, self).__init__(dataset, auto_collate_batch, + collate_fn, drop_last) self.dataset_iter = iter(dataset) def fetch(self, batch_indices): - data = [] - for _ in batch_indices: - try: - data.append(next(self.dataset_iter)) - except StopIteration: - break - if len(data) == 0 or (self.drop_last and - len(data) < len(batch_indices)): - raise StopIteration - return self.collate_fn(data) + if self.auto_collate_batch: + data = [] + for _ in batch_indices: + try: + data.append(next(self.dataset_iter)) + except StopIteration: + break + if len(data) == 0 or (self.drop_last and + len(data) < len(batch_indices)): + raise StopIteration + else: + data = next(self.dataset_iter) + + if self.collate_fn: + data = self.collate_fn(data) + return data class _MapDatasetFetcher(_DatasetFetcher): - def __init__(self, dataset, collate_fn, drop_last): - super(_MapDatasetFetcher, self).__init__(dataset, collate_fn, drop_last) + def __init__(self, dataset, auto_collate_batch, collate_fn, drop_last): + super(_MapDatasetFetcher, self).__init__(dataset, auto_collate_batch, collate_fn, drop_last) def fetch(self, batch_indices): - data = [self.dataset[idx] for idx in batch_indices] - return self.collate_fn(data) + if self.auto_collate_batch: + data = [self.dataset[idx] for idx in batch_indices] + else: + data = self.dataset[batch_indices] + + if self.collate_fn: + data = self.collate_fn(data) + return data diff --git a/python/paddle/fluid/reader.py b/python/paddle/fluid/reader.py index 0e7fd35f5842e6..4a50b3bc0c7dc5 100644 --- a/python/paddle/fluid/reader.py +++ b/python/paddle/fluid/reader.py @@ -163,6 +163,21 @@ class DataLoader(object): For :code:`batch_sampler` please see :code:`paddle.io.BatchSampler` + **Disable automatic batching** + + In certain cases such as some NLP tasks, instead of automatic batching, + handling batching manually in dataset is needed by users. For these + cases, automatic batching is disabled if both :attr:`batch_size` and + :attr:`batch_sampler` is set as None, each data got from :attr:`dataset` + should be batched data and will be processed with function define by + :attr:`collate_fn` or :attr:`default_collate_fn`. + + + .. note:: + When automatic batching is disabled, :attr:`default_collate_fn` will + do nothing to data from dataset. + + Args: dataset(Dataset): the dataset to load data from, should be an instance of subclass of :code:`paddle.io.Dataset` or @@ -185,7 +200,7 @@ class DataLoader(object): batch_sampler(BatchSampler): an instance of `paddle.io.BatchSampler` to generate batch indices to draw samples from :attr:`dataset` and combine a batch. Default None. - batch_size(int): sample number in a mini-batch, a substitution + batch_size(int|None): sample number in a mini-batch, a substitution parameter for :attr:`batch_sampler`, if :attr:`batch_sampler` is not set, a default `paddle.io.BatchSampler` will be used and initialize by :attr:`batch_size`, :attr:`shuffle` and @@ -358,10 +373,15 @@ def __init__(self, "batch_size/shuffle/drop_last should not be set when " \ "batch_sampler is given" self.batch_sampler = batch_sampler + self.batch_size = None + elif batch_size is None: + self.batch_sampler = None + self.batch_size = None else: - assert batch_size is not None and batch_size > 0, \ - "batch_size should be a positive value when " \ + assert batch_size > 0, \ + "batch_size should be None or a positive value when " \ "batch_sampler is not given" + self.batch_size = batch_size if isinstance(dataset, IterableDataset): self.batch_sampler = _InfiniteIterableSampler(dataset, batch_size) @@ -372,13 +392,21 @@ def __init__(self, shuffle=shuffle, drop_last=drop_last) + self.auto_collate_batch = self.batch_sampler is not None + self.pin_memory = False if in_dygraph_mode(): self.pin_memory = True if use_pinned_memory( ) is None else use_pinned_memory() def __len__(self): - return len(self.batch_sampler) + if self.dataset_kind == _DatasetKind.ITER: + raise ValueError("length of IterableDataset not supported") + else: + if self.batch_size is None: + return len(self.dataset) + else: + return len(self.batch_sampler) def __iter__(self): if self.num_workers == 0: diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py index 1bb720673e4f33..c89354adf751c6 100644 --- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py +++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py @@ -27,7 +27,7 @@ from paddle.fluid.dygraph.nn import Linear from paddle.fluid.dygraph.base import to_variable -from test_multiprocess_dataloader_static import RandomDataset, prepare_places +from test_multiprocess_dataloader_static import RandomDataset, RandomBatchedDataset, prepare_places from test_multiprocess_dataloader_static import EPOCH_NUM, BATCH_SIZE, IMAGE_SIZE, SAMPLE_NUM, CLASS_NUM @@ -122,5 +122,48 @@ def test_main(self): self.assertLess(diff, 1e-2) +class TestDygraphDataLoaderWithBatchedDataset(TestDygraphDataLoader): + def run_main(self, num_workers, places): + fluid.default_startup_program().random_seed = 1 + fluid.default_main_program().random_seed = 1 + with fluid.dygraph.guard(places[0]): + fc_net = SimpleFCNet() + optimizer = fluid.optimizer.Adam(parameter_list=fc_net.parameters()) + + dataset = RandomBatchedDataset(SAMPLE_NUM, CLASS_NUM) + dataloader = DataLoader( + dataset, + num_workers=num_workers, + batch_size=None, + drop_last=True) + assert len(dataloader) == int(SAMPLE_NUM / BATCH_SIZE) + + step_list = [] + loss_list = [] + start_t = time.time() + for _ in six.moves.range(EPOCH_NUM): + step = 0 + for image, label in dataloader(): + out = fc_net(image) + loss = fluid.layers.cross_entropy(out, label) + avg_loss = fluid.layers.reduce_mean(loss) + avg_loss.backward() + optimizer.minimize(avg_loss) + fc_net.clear_gradients() + + loss_list.append(np.mean(avg_loss.numpy())) + step += 1 + step_list.append(step) + + end_t = time.time() + ret = { + "time": end_t - start_t, + "step": step_list, + "loss": np.array(loss_list) + } + print("time cost", ret['time'], 'step_list', ret['step']) + return ret + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_exception.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_exception.py index 6fd14b40bc9108..74fe359cd7d597 100644 --- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_exception.py +++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_exception.py @@ -188,7 +188,7 @@ def _collate_fn(sample_list): indices_queue.put(None) _worker_loop(loader._dataset, 0, indices_queue, loader._data_queue, loader._workers_done_event, - _collate_fn, _init_fn, 0, 1, + True, _collate_fn, _init_fn, 0, 1, loader._use_shared_memory) self.assertTrue(False) except AssertionError: @@ -232,7 +232,7 @@ def _collate_fn(sample_list): loader._workers_done_event.set() _worker_loop(loader._dataset, 0, indices_queue, loader._data_queue, loader._workers_done_event, - _collate_fn, _init_fn, 0, 1, + True, _collate_fn, _init_fn, 0, 1, loader._use_shared_memory) self.assertTrue(True) except AssertionError: diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py index af332d8e432092..0533a0d09fa0de 100644 --- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py +++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py @@ -27,7 +27,7 @@ from paddle.fluid.dygraph.nn import Linear from paddle.fluid.dygraph.base import to_variable -from test_multiprocess_dataloader_iterable_dataset_static import RandomDataset, prepare_places +from test_multiprocess_dataloader_iterable_dataset_static import RandomDataset, RandomBatchedDataset, prepare_places from test_multiprocess_dataloader_iterable_dataset_static import EPOCH_NUM, BATCH_SIZE, IMAGE_SIZE, SAMPLE_NUM, CLASS_NUM @@ -119,5 +119,46 @@ def test_main(self): 0] +class TestDygraphDataLoaderWithBatchedDataset(TestDygraphDataLoader): + def run_main(self, num_workers, places): + fluid.default_startup_program().random_seed = 1 + fluid.default_main_program().random_seed = 1 + with fluid.dygraph.guard(places[0]): + fc_net = SimpleFCNet() + optimizer = fluid.optimizer.Adam(parameter_list=fc_net.parameters()) + + dataset = RandomBatchedDataset(SAMPLE_NUM, CLASS_NUM) + dataloader = DataLoader( + dataset, + num_workers=num_workers, + batch_size=None, + drop_last=True) + + step_list = [] + loss_list = [] + start_t = time.time() + for _ in six.moves.range(EPOCH_NUM): + step = 0 + for image, label in dataloader(): + out = fc_net(image) + loss = fluid.layers.cross_entropy(out, label) + avg_loss = fluid.layers.reduce_mean(loss) + avg_loss.backward() + optimizer.minimize(avg_loss) + fc_net.clear_gradients() + + loss_list.append(np.mean(avg_loss.numpy())) + step += 1 + step_list.append(step) + + end_t = time.time() + ret = { + "time": end_t - start_t, + "step": step_list, + "loss": np.array(loss_list) + } + print("time cost", ret['time'], 'step_list', ret['step']) + return ret + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py index e64e11d156ec74..4615bf85ce69f4 100644 --- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py +++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py @@ -167,5 +167,80 @@ def test_main(self): 0] +class RandomBatchedDataset(IterableDataset): + def __init__(self, sample_num, class_num): + self.sample_num = sample_num // BATCH_SIZE + self.class_num = class_num + + def __iter__(self): + for i in range(self.sample_num): + np.random.seed(i) + images = [] + labels = [] + for _ in range(BATCH_SIZE): + image = np.random.random([IMAGE_SIZE]).astype('float32') + label = np.random.randint(0, self.class_num - 1, + (1, )).astype('int64') + images.append(image) + labels.append(label) + yield np.stack(images, axis=0), np.stack(labels, axis=0) + + +class TestStaticDataLoaderWithBatchedDataset(TestStaticDataLoader): + def run_main(self, num_workers, places): + scope = fluid.Scope() + with fluid.scope_guard(scope): + startup_prog, main_prog, image, label, loss = simple_fc_net_static() + + dataset = RandomBatchedDataset(SAMPLE_NUM, CLASS_NUM) + dataloader = DataLoader( + dataset, + feed_list=[image, label], + places=places, + num_workers=num_workers, + batch_size=None, + drop_last=True) + + exe = fluid.Executor(place=places[0]) + exe.run(startup_prog) + + prog = fluid.CompiledProgram(main_prog) + if len(places) > 1: + prog = prog.with_data_parallel( + loss_name=loss.name, places=places) + + step_list = [] + loss_list = [] + start_t = time.time() + for i in six.moves.range(EPOCH_NUM): + step = 0 + for d in dataloader: + assert len(d) == len(places), "{} != {}".format( + len(d), len(places)) + for i, item in enumerate(d): + image = item['image'] + label = item['label'] + assert image.shape() == [BATCH_SIZE, IMAGE_SIZE] + assert label.shape() == [BATCH_SIZE, 1] + assert image._place()._equals(places[i]) + assert label._place()._equals(places[i]) + L, = exe.run(program=prog, + feed=d, + fetch_list=[loss], + use_program_cache=True) + loss_list.append(np.mean(L)) + step += 1 + step_list.append(step) + + end_t = time.time() + ret = { + "time": end_t - start_t, + "step": step_list, + "loss": np.array(loss_list) + } + print("time cost", ret['time'], 'step_list', ret['step']) + return ret + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py index c01e2e75b8195c..5ec907c290b946 100644 --- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py +++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py @@ -215,5 +215,82 @@ def test_multi_place(self): assert isinstance(d[1], list) +class RandomBatchedDataset(Dataset): + def __init__(self, sample_num, class_num): + self.sample_num = int(sample_num / BATCH_SIZE) + self.class_num = class_num + + def __getitem__(self, idx): + np.random.seed(idx) + images = [] + labels = [] + for _ in range(BATCH_SIZE): + image = np.random.random([IMAGE_SIZE]).astype('float32') + label = np.random.randint(0, self.class_num - 1, (1, )).astype('int64') + images.append(image) + labels.append(label) + return np.stack(images, axis=0), np.stack(labels, axis=0) + + def __len__(self): + return self.sample_num + + +class TestStaticDataLoaderWithBatchedDataset(TestStaticDataLoader): + def run_main(self, num_workers, places): + scope = fluid.Scope() + with fluid.scope_guard(scope): + startup_prog, main_prog, image, label, loss = simple_fc_net_static() + + dataset = RandomBatchedDataset(SAMPLE_NUM, CLASS_NUM) + dataloader = DataLoader( + dataset, + feed_list=[image, label], + places=places, + num_workers=num_workers, + batch_size=None, + drop_last=True) + assert len(dataloader) == int(SAMPLE_NUM / BATCH_SIZE) + + exe = fluid.Executor(place=places[0]) + exe.run(startup_prog) + + prog = fluid.CompiledProgram(main_prog) + if len(places) > 1: + prog = prog.with_data_parallel( + loss_name=loss.name, places=places) + + step_list = [] + loss_list = [] + start_t = time.time() + for _ in six.moves.range(EPOCH_NUM): + step = 0 + for d in dataloader: + assert len(d) == len(places), "{} != {}".format( + len(d), len(places)) + for i, item in enumerate(d): + image = item['image'] + label = item['label'] + assert image.shape() == [BATCH_SIZE, IMAGE_SIZE] + assert label.shape() == [BATCH_SIZE, 1] + assert image._place()._equals(places[i]) + assert label._place()._equals(places[i]) + L, = exe.run(program=prog, + feed=d, + fetch_list=[loss], + use_program_cache=True) + loss_list.append(np.mean(L)) + step += 1 + step_list.append(step) + + end_t = time.time() + ret = { + "time": end_t - start_t, + "step": step_list, + "loss": np.array(loss_list) + } + print("time cost", ret['time'], 'step_list', ret['step']) + return ret + + if __name__ == '__main__': unittest.main() From b889a0cee25ad81f02e740f1f2942fecfca8e11b Mon Sep 17 00:00:00 2001 From: pangyoki Date: Mon, 16 Nov 2020 16:45:45 +0800 Subject: [PATCH 16/56] add gaussian_random op_version (#28602) --- paddle/fluid/operators/gaussian_random_op.cc | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/paddle/fluid/operators/gaussian_random_op.cc b/paddle/fluid/operators/gaussian_random_op.cc index 17a71c67b8a084..fd2f48265ca6f4 100644 --- a/paddle/fluid/operators/gaussian_random_op.cc +++ b/paddle/fluid/operators/gaussian_random_op.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/operators/fill_constant_op.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" @@ -197,3 +198,19 @@ REGISTER_OP_CPU_KERNEL(gaussian_random, ops::CPUGaussianRandomKernel, REGISTER_OP_CPU_KERNEL(gaussian_random_batch_size_like, ops::CPUGaussianRandomBatchSizeLikeKernel, ops::CPUGaussianRandomBatchSizeLikeKernel); +REGISTER_OP_VERSION(gaussian_random) + .AddCheckpoint( + R"ROC( + Upgrade gaussian_random add new inputs [ShapeTensor] and [ShapeTensorList] + and modify the attribute of [shape])ROC", + paddle::framework::compatible::OpVersionDesc() + .NewInput("ShapeTensor", + "The output shape supports Tensor type. ShapeTensor is " + "dispensable.") + .NewInput("ShapeTensorList", + "The output shape supports list filled with Tensor. " + "ShapeTensorList is dispensable.") + .ModifyAttr( + "shape", + "Add the default value of shape, the default value is {}.", + {})); From 72e068f1ba158fdd67511193f8fa567d6d791a8a Mon Sep 17 00:00:00 2001 From: pangyoki Date: Mon, 16 Nov 2020 18:06:39 +0800 Subject: [PATCH 17/56] fix test_multinomial (#28558) * fix test_multinomial * fix test_multinomial add 0 prob --- .../tests/unittests/test_multinomial_op.py | 64 +++++++++---------- 1 file changed, 31 insertions(+), 33 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_multinomial_op.py b/python/paddle/fluid/tests/unittests/test_multinomial_op.py index b22f6b80df79a0..957c06eca89c38 100644 --- a/python/paddle/fluid/tests/unittests/test_multinomial_op.py +++ b/python/paddle/fluid/tests/unittests/test_multinomial_op.py @@ -22,6 +22,26 @@ import numpy as np +def sample_output_one_dimension(out, dim): + # count numbers of different categories + sample_prob = np.zeros(dim).astype("float32") + sample_index_prob = np.unique(out, return_counts=True) + sample_prob[sample_index_prob[0]] = sample_index_prob[1] + sample_prob /= sample_prob.sum() + return sample_prob + + +def sample_output_two_dimension(out, shape): + num_dist = shape[0] + out_list = np.split(out, num_dist, axis=0) + sample_prob = np.zeros(shape).astype("float32") + for i in range(num_dist): + sample_index_prob = np.unique(out_list[i], return_counts=True) + sample_prob[i][sample_index_prob[0]] = sample_index_prob[1] + sample_prob /= sample_prob.sum(axis=-1, keepdims=True) + return sample_prob + + class TestMultinomialOp(OpTest): def setUp(self): paddle.enable_static() @@ -39,10 +59,7 @@ def test_check_output(self): self.check_output_customized(self.verify_output) def sample_output(self, out): - # count numbers of different categories - sample_prob = np.unique(out, return_counts=True)[1].astype("float32") - sample_prob /= sample_prob.sum() - return sample_prob + return sample_output_one_dimension(out, 4) def verify_output(self, outs): # normalize the input to get the probability @@ -62,14 +79,7 @@ def init_data(self): self.attrs = {"num_samples": 100000, "replacement": True} def sample_output(self, out): - out_list = np.split(out, 3, axis=0) - count_array = [0] * 3 - for i in range(3): - count_array[i] = np.unique( - out_list[i], return_counts=True)[1].astype("float32") - sample_prob = np.stack(count_array, axis=0) - sample_prob /= sample_prob.sum(axis=-1, keepdims=True) - return sample_prob + return sample_output_two_dimension(out, [3, 4]) class TestMultinomialOp3(TestMultinomialOp): @@ -91,15 +101,12 @@ class TestMultinomialApi(unittest.TestCase): def test_dygraph(self): # input probability is a vector, and replacement is True paddle.disable_static() - x = paddle.rand([4]) + x_numpy = np.random.rand(4) + x = paddle.to_tensor(x_numpy) out = paddle.multinomial(x, num_samples=100000, replacement=True) - x_numpy = x.numpy() paddle.enable_static() - sample_prob = np.unique( - out.numpy(), return_counts=True)[1].astype("float32") - sample_prob /= sample_prob.sum() - + sample_prob = sample_output_one_dimension(out.numpy(), 4) prob = x_numpy / x_numpy.sum(axis=-1, keepdims=True) self.assertTrue( np.allclose( @@ -109,18 +116,11 @@ def test_dygraph(self): def test_dygraph2(self): # input probability is a matrix, and replacement is True paddle.disable_static() - x = paddle.rand([3, 4]) + x_numpy = np.random.rand(3, 4) + x = paddle.to_tensor(x_numpy) out = paddle.multinomial(x, num_samples=100000, replacement=True) - x_numpy = x.numpy() - - out_list = np.split(out.numpy(), 3, axis=0) - count_array = [0] * 3 - for i in range(3): - count_array[i] = np.unique( - out_list[i], return_counts=True)[1].astype("float32") - sample_prob = np.stack(count_array, axis=0) - sample_prob /= sample_prob.sum(axis=-1, keepdims=True) + sample_prob = sample_output_two_dimension(out.numpy(), [3, 4]) prob = x_numpy / x_numpy.sum(axis=-1, keepdims=True) self.assertTrue( np.allclose( @@ -131,9 +131,9 @@ def test_dygraph2(self): def test_dygraph3(self): # replacement is False. number of samples must be less than number of categories. paddle.disable_static() - x = paddle.rand([1000]) + x_numpy = np.random.rand(1000) + x = paddle.to_tensor(x_numpy) out = paddle.multinomial(x, num_samples=100, replacement=False) - x_numpy = x.numpy() unique_out = np.unique(out.numpy()) self.assertEqual( @@ -158,9 +158,7 @@ def test_static(self): x_np = np.random.rand(4).astype('float32') out = exe.run(train_program, feed={'x': x_np}, fetch_list=[out]) - sample_prob = np.unique(out, return_counts=True)[1].astype("float32") - sample_prob /= sample_prob.sum() - + sample_prob = sample_output_one_dimension(out, 4) prob = x_np / x_np.sum(axis=-1, keepdims=True) self.assertTrue( np.allclose( From 804271cff9f43cd06409962b3bef80827374fa25 Mon Sep 17 00:00:00 2001 From: lidanqing Date: Mon, 16 Nov 2020 11:42:01 +0100 Subject: [PATCH 18/56] Op version python mkldnn_inplace test (#28354) * add mkldnn inplace op version test * update mkldnn_inplace fuse pass * update the inplace test --- .../ir/mkldnn/mkldnn_inplace_pass.cc | 8 +++ .../test_mkldnn_inplace_fuse_pass.py | 56 +++++++++++++++++++ tools/static_mode_white_list.py | 1 + 3 files changed, 65 insertions(+) create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_inplace_fuse_pass.py diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.cc index 7bd94bf55ea21f..d655837f743369 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.cc @@ -17,10 +17,12 @@ #include #include #include +#include #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_info.h" +#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" namespace paddle { @@ -215,3 +217,9 @@ void MKLDNNInPlacePass::ApplyImpl(ir::Graph* graph) const { } // namespace paddle REGISTER_PASS(mkldnn_inplace_pass, paddle::framework::ir::MKLDNNInPlacePass); +REGISTER_PASS_CAPABILITY(mkldnn_inplace_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("softmax", 0) + .EQ("elementwise_add", 0) + .EQ("tanh", 0)); diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_inplace_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_inplace_fuse_pass.py new file mode 100644 index 00000000000000..4215e56de2cc73 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_inplace_fuse_pass.py @@ -0,0 +1,56 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import AnalysisConfig +from paddle.fluid.core import PassVersionChecker + + +class MkldnnInplacePassTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + paddle.enable_static() + data = fluid.data( + name="data", shape=[-1, 3, 100, 100], dtype="float32") + conv_out_1 = fluid.layers.conv2d( + data, num_filters=3, filter_size=3, bias_attr=False) + softmax_out = fluid.layers.softmax(conv_out_1) + relu_out = fluid.layers.relu(conv_out_1) + eltwise_out = fluid.layers.elementwise_add( + softmax_out, relu_out, axis=-1) + + self.pass_name = 'mkldnn_inplace_pass' + self.feeds = { + "data": np.random.random((1, 3, 100, 100)).astype("float32") + } + self.fetch_list = [softmax_out, relu_out, eltwise_out] + self.enable_mkldnn = True + + def test_check_output(self): + use_gpu = False + self.check_output_with_option(use_gpu) + + def test_pass_compatible(self): + self.assertTrue(PassVersionChecker.IsCompatible(self.pass_name)) + + +if __name__ == "__main__": + unittest.main() diff --git a/tools/static_mode_white_list.py b/tools/static_mode_white_list.py index 6a2a121cd616f9..1f153442aff6c6 100644 --- a/tools/static_mode_white_list.py +++ b/tools/static_mode_white_list.py @@ -611,6 +611,7 @@ 'test_mkldnn_matmul_op_output_fuse_pass', 'test_mkldnn_matmul_transpose_reshape_fuse_pass', 'test_mkldnn_scale_matmul_fuse_pass', + 'test_mkldnn_inplace_fuse_pass', 'test_batch_fc_op', 'test_c_comm_init_all_op', 'test_conv2d_fusion_op', From 2cb71c0cde2835e4f7e0d6862f49ab1c56f029c4 Mon Sep 17 00:00:00 2001 From: "joanna.wozna.intel" Date: Mon, 16 Nov 2020 11:43:33 +0100 Subject: [PATCH 19/56] Add checkpoint to quantize (#28612) * Add checkpoint to quantize * Change bfloat16 option --- .../framework/ir/mkldnn/cpu_bfloat16_pass.cc | 6 ++ paddle/fluid/operators/quantize_op.cc | 8 +++ .../ir/inference/inference_pass_test.py | 5 ++ .../test_mkldnn_cpu_bfloat16_pass.py | 58 +++++++++++++++++++ 4 files changed, 77 insertions(+) create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_cpu_bfloat16_pass.py diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc index df498865245fc8..ae93025e784e38 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/string/pretty_log.h" @@ -157,3 +158,8 @@ void CPUBFloat16Pass::ApplyImpl(ir::Graph* graph) const { } // namespace paddle REGISTER_PASS(cpu_bfloat16_pass, paddle::framework::ir::CPUBFloat16Pass); + +REGISTER_PASS_CAPABILITY(cpu_bfloat16_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination().GE( + "quantize", 1)); diff --git a/paddle/fluid/operators/quantize_op.cc b/paddle/fluid/operators/quantize_op.cc index ee5829319d2a62..f21243de834177 100644 --- a/paddle/fluid/operators/quantize_op.cc +++ b/paddle/fluid/operators/quantize_op.cc @@ -13,6 +13,7 @@ * limitations under the License. */ #include "paddle/fluid/operators/quantize_op.h" +#include "paddle/fluid/framework/op_version_registry.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif @@ -54,3 +55,10 @@ void QuantOpMaker::Make() { namespace ops = paddle::operators; REGISTER_OPERATOR(quantize, ops::QuantOp, ops::QuantOpMaker); + +REGISTER_OP_VERSION(quantize) + .AddCheckpoint( + R"ROC( Add a new attribute [bfloat16])ROC", + paddle::framework::compatible::OpVersionDesc().NewAttr( + "bfloat16", "If true, float32 input is converted to bfloat16", + false)); diff --git a/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py b/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py index 0209bb344ece7c..18715f10c5cd36 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/inference_pass_test.py @@ -43,6 +43,7 @@ def __init__(self, methodName='runTest'): self.fetch_list = None self.enable_mkldnn = False + self.enable_mkldnn_bfloat16 = False self.enable_trt = False self.trt_parameters = None self.enable_lite = False @@ -125,6 +126,8 @@ def _get_analysis_config(self, self.trt_parameters.use_calib_mode) elif use_mkldnn: config.enable_mkldnn() + if self.enable_mkldnn_bfloat16: + config.enable_mkldnn_bfloat16() return config @@ -251,6 +254,8 @@ def check_output_with_option(self, len(outs) == len(mkldnn_outputs), "The number of outputs is different between CPU and MKLDNN. ") + if self.enable_mkldnn_bfloat16: + atol = 0.01 for out, mkldnn_output in zip(outs, mkldnn_outputs): self.assertTrue( np.allclose( diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_cpu_bfloat16_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_cpu_bfloat16_pass.py new file mode 100644 index 00000000000000..0a4d460d1fbbf4 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_cpu_bfloat16_pass.py @@ -0,0 +1,58 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +from paddle.fluid.core import PassVersionChecker + + +class TestMKLDNNCpuBfloat16Pass(InferencePassTest): + def setUp(self): + self.init_data() + with fluid.program_guard(self.main_program, self.startup_program): + x = fluid.data( + name='x', shape=[-1] + self.shape_x, dtype=self.d_type) + y = fluid.data( + name='y', shape=[-1] + self.shape_y, dtype=self.d_type) + out = fluid.layers.matmul(x, y) + out = fluid.layers.transpose(out, perm=[0, 1, 2, 3]) + out = fluid.layers.reshape(out, [0, 0, 0, 0]) + out = fluid.layers.fc(out, size=1) + + self.feeds = { + "x": + np.random.random([self.bs] + self.shape_x).astype(self.d_type), + "y": + np.random.random([self.bs] + self.shape_y).astype(self.d_type) + } + self.fetch_list = [out] + + def init_data(self): + self.bs = 8 + self.d_type = np.float32 + self.shape_x = [12, 10, 1] + self.shape_y = [12, 1, 64] + self.enable_mkldnn = True + self.enable_mkldnn_bfloat16 = True + + def test_check_output(self): + use_gpu = False + self.check_output_with_option(use_gpu, flatten=True) + self.assertTrue(PassVersionChecker.IsCompatible('cpu_bfloat16_pass')) + + +if __name__ == "__main__": + unittest.main() From ece1e4cd9de38375a2b5007fe5e8d69a521d9a7b Mon Sep 17 00:00:00 2001 From: Kaipeng Deng Date: Mon, 16 Nov 2020 19:19:39 +0800 Subject: [PATCH 20/56] Add weighted random sampler (#28545) * add WeightedRandomSampler. test=develop --- python/paddle/fluid/dataloader/sampler.py | 87 +++++++++++++++++- .../tests/unittests/test_batch_sampler.py | 92 +++++++++++++++++-- python/paddle/io/__init__.py | 3 +- 3 files changed, 171 insertions(+), 11 deletions(-) diff --git a/python/paddle/fluid/dataloader/sampler.py b/python/paddle/fluid/dataloader/sampler.py index 5c75fafe8b2238..7207ebcbacfdb0 100644 --- a/python/paddle/fluid/dataloader/sampler.py +++ b/python/paddle/fluid/dataloader/sampler.py @@ -16,8 +16,11 @@ from __future__ import division import numpy as np +from .. import core -__all__ = ["Sampler", "SequenceSampler", "RandomSampler"] +__all__ = [ + "Sampler", "SequenceSampler", "RandomSampler", "WeightedRandomSampler" +] class Sampler(object): @@ -234,3 +237,85 @@ def __iter__(self): def __len__(self): return self.num_samples + + +def _weighted_sample(weights, num_samples, replacement=True): + if isinstance(weights, core.LoDTensor): + weights = weights.numpy() + if isinstance(weights, (list, tuple)): + weights = np.array(weights) + assert isinstance(weights, np.ndarray), \ + "weights should be paddle.Tensor, numpy.ndarray, list or tuple" + assert len(weights.shape) <= 2, \ + "weights should be a 1-D or 2-D array" + weights = weights.reshape((-1, weights.shape[-1])) + assert np.all(weights >= 0.), \ + "weights should be positive value" + assert not np.any(weights == np.inf), \ + "weights shoule not be INF" + assert not np.any(weights == np.nan), \ + "weights shoule not be NaN" + + non_zeros = np.sum(weights > 0., axis=1) + assert np.all(non_zeros > 0), \ + "weights should have positive values" + if not replacement: + assert np.all(non_zeros >= num_samples), \ + "weights positive value number should not " \ + "less than num_samples when replacement=False" + + weights = weights / weights.sum(axis=1) + rets = [] + for i in range(weights.shape[0]): + ret = np.random.choice(weights.shape[1], num_samples, replacement, + weights[i]) + rets.append(ret) + return np.array(rets) + + +class WeightedRandomSampler(Sampler): + """ + Random sample with given weights (probabilities), sampe index will be in range + [0, len(weights) - 1], if :attr:`replacement` is True, index can be sampled + multiple times. + + Args: + weights(numpy.ndarray|paddle.Tensor|list|tuple): sequence of weights, + should be numpy array, paddle.Tensor, list or tuple + num_samples(int): set sample number to draw from sampler. + replacement(bool): Whether to draw sample with replacements, default True + + Returns: + Sampler: a Sampler yield sample index randomly by given weights + + Examples: + + .. code-block:: python + + from paddle.io import WeightedRandomSampler + + sampler = WeightedRandomSampler(weights=[0.1, 0.3, 0.5, 0.7, 0.2], + num_samples=5, + replacement=True) + + for index in sampler: + print(index) + """ + + def __init__(self, weights, num_samples, replacement=True): + if not isinstance(num_samples, int) or num_samples <= 0: + raise ValueError("num_samples should be a positive integer") + if not isinstance(replacement, bool): + raise ValueError("replacement should be a boolean value") + self.weights = weights + self.num_samples = num_samples + self.replacement = replacement + + def __iter__(self): + idxs = _weighted_sample(self.weights, self.num_samples, + self.replacement) + return iter(idxs.reshape((-1)).tolist()) + + def __len__(self): + mul = np.prod(self.weights.shape) // self.weights.shape[-1] + return self.num_samples * mul diff --git a/python/paddle/fluid/tests/unittests/test_batch_sampler.py b/python/paddle/fluid/tests/unittests/test_batch_sampler.py index 4faef77dad40dd..4c323a2511f5b6 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_sampler.py +++ b/python/paddle/fluid/tests/unittests/test_batch_sampler.py @@ -16,8 +16,10 @@ import unittest +import numpy as np import paddle.fluid as fluid -from paddle.io import BatchSampler, Dataset, Sampler, SequenceSampler, RandomSampler +from paddle.io import BatchSampler, Dataset, Sampler, SequenceSampler, \ + RandomSampler, WeightedRandomSampler from paddle.io import DistributedBatchSampler @@ -195,14 +197,86 @@ def test_main(self): pass -class TestDistributedBatchSamplerWithSampler(TestBatchSampler): - def init_batch_sampler(self): - dataset = RandomDataset(1000, 10) - bs = DistributedBatchSampler( - dataset=dataset, - batch_size=self.batch_size, - drop_last=self.drop_last) - return bs +class TestWeightedRandomSampler(unittest.TestCase): + def init_probs(self, total, pos): + pos_probs = np.random.random((pos, )).astype('float32') + probs = np.zeros((total, )).astype('float32') + probs[:pos] = pos_probs + np.random.shuffle(probs) + return probs + + def test_replacement(self): + probs = self.init_probs(20, 10) + sampler = WeightedRandomSampler(probs, 30, True) + assert len(sampler) == 30 + for idx in iter(sampler): + assert probs[idx] > 0. + + def test_no_replacement(self): + probs = self.init_probs(20, 10) + sampler = WeightedRandomSampler(probs, 10, False) + assert len(sampler) == 10 + idxs = [] + for idx in iter(sampler): + assert probs[idx] > 0. + idxs.append(idx) + assert len(set(idxs)) == len(idxs) + + def test_assert(self): + # all zeros + probs = np.zeros((10, )).astype('float32') + sampler = WeightedRandomSampler(probs, 10, True) + try: + for idx in iter(sampler): + pass + self.assertTrue(False) + except AssertionError: + self.assertTrue(True) + + # not enough pos + probs = self.init_probs(10, 5) + sampler = WeightedRandomSampler(probs, 10, False) + try: + for idx in iter(sampler): + pass + self.assertTrue(False) + except AssertionError: + self.assertTrue(True) + + # neg probs + probs = -1.0 * np.ones((10, )).astype('float32') + sampler = WeightedRandomSampler(probs, 10, True) + try: + for idx in iter(sampler): + pass + self.assertTrue(False) + except AssertionError: + self.assertTrue(True) + + def test_raise(self): + # float num_samples + probs = self.init_probs(10, 5) + try: + sampler = WeightedRandomSampler(probs, 2.3, True) + self.assertTrue(False) + except ValueError: + self.assertTrue(True) + + # neg num_samples + probs = self.init_probs(10, 5) + try: + sampler = WeightedRandomSampler(probs, -1, True) + self.assertTrue(False) + except ValueError: + self.assertTrue(True) + + # no-bool replacement + probs = self.init_probs(10, 5) + try: + sampler = WeightedRandomSampler(probs, 5, 5) + self.assertTrue(False) + except ValueError: + self.assertTrue(True) if __name__ == '__main__': diff --git a/python/paddle/io/__init__.py b/python/paddle/io/__init__.py index b4e437a97dd22b..e8b07528019c51 100644 --- a/python/paddle/io/__init__.py +++ b/python/paddle/io/__init__.py @@ -27,9 +27,10 @@ 'Sampler', 'SequenceSampler', 'RandomSampler', + 'WeightedRandomSampler', ] from ..fluid.io import DataLoader from ..fluid.dataloader import Dataset, IterableDataset, BatchSampler, get_worker_info, \ TensorDataset, Sampler, SequenceSampler, RandomSampler, DistributedBatchSampler, \ - ComposeDataset, ChainDataset + ComposeDataset, ChainDataset, WeightedRandomSampler From a972c33fd7b93a24cc199ad4f3ae01ea371d3972 Mon Sep 17 00:00:00 2001 From: wangchaochaohu Date: Mon, 16 Nov 2020 19:33:33 +0800 Subject: [PATCH 21/56] refine gather OP performance for dynamic mode (#28587) --- paddle/fluid/operators/gather_op.cc | 9 +++++++++ python/paddle/tensor/manipulation.py | 5 ++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/operators/gather_op.cc b/paddle/fluid/operators/gather_op.cc index 648afe7e8215fe..162766546b3c26 100644 --- a/paddle/fluid/operators/gather_op.cc +++ b/paddle/fluid/operators/gather_op.cc @@ -93,6 +93,15 @@ class GatherGradOp : public framework::OperatorWithKernel { ctx, framework::GradVarName("Out")), ctx.device_context()); } + framework::OpKernelType GetKernelTypeForVar( + const std::string& var_name, const framework::Tensor& tensor, + const framework::OpKernelType& expected_kernel_type) const override { + if (var_name == "Axis") { + return expected_kernel_type; + } + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), tensor.layout()); + } }; class GatherOpMaker : public framework::OpProtoAndCheckerMaker { diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index 4a01f7e7fa311e..adad9cfdc26671 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -785,9 +785,12 @@ def gather(x, index, axis=None, name=None): if axis is None: axis = 0 axis_tensor = axis + if not isinstance(axis, Variable) and axis == 0: + return paddle.fluid.layers.gather(input=x, index=index, overwrite=True) if not isinstance(axis, Variable): with device_guard("cpu"): - axis_tensor = fill_constant(shape=[1], dtype='int64', value=axis) + axis_tensor = fill_constant( + shape=[1], dtype='int64', value=axis, force_cpu=True) if in_dygraph_mode(): return core.ops.gather(x, index, axis_tensor) From 8f2656ef5ca4ab16f06d94b8ca9392d3f0f760ae Mon Sep 17 00:00:00 2001 From: wawltor Date: Mon, 16 Nov 2020 20:21:46 +0800 Subject: [PATCH 22/56] fix the gradient bug for the topk v2 fix the gradient bug for the topk v2 --- paddle/fluid/operators/top_k_function_cuda.h | 12 ++++--- .../fluid/tests/unittests/test_top_k_v2_op.py | 32 +++++++++++-------- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/paddle/fluid/operators/top_k_function_cuda.h b/paddle/fluid/operators/top_k_function_cuda.h index 57891699fd2ad7..0fd5f2ac01df3f 100644 --- a/paddle/fluid/operators/top_k_function_cuda.h +++ b/paddle/fluid/operators/top_k_function_cuda.h @@ -335,6 +335,7 @@ __global__ void AssignGrad(T* x_grad, const int64_t* indices, const T* out_grad, for (size_t j = 0; j < cols; ++j) { x_grad[i * cols + j] = 0; } + __syncthreads(); for (size_t j = 0; j < k; ++j) { size_t idx = indices[i * k + j]; x_grad[i * cols + idx] = out_grad[i * k + j]; @@ -349,15 +350,16 @@ __global__ void AssignGradWithAxis(const T* grad_out, const int64_t* indices, int raw_height, int k) { // raw_height is the length of topk axis for (int i = blockIdx.x; i < pre; i += gridDim.x) { - const int& base_index = i * post * k; - const int& base_grad = i * post * raw_height; + int base_index = i * post * k; + int base_grad = i * post * raw_height; for (int j = threadIdx.x; j < raw_height * post; j += blockDim.x) { grad_in[base_grad + j] = static_cast(0); } + __syncthreads(); for (int j = threadIdx.x; j < k * post; j += blockDim.x) { - const int64_t idx_ij = indices[base_index + j]; - const int64_t in_ij = base_grad + (idx_ij * post) + (j % post); - grad_in[in_ij] = grad_out[idx_ij]; + int64_t idx_ij = indices[base_index + j]; + int64_t in_ij = base_grad + (idx_ij * post) + (j % post); + grad_in[in_ij] = grad_out[base_index + j]; } } } diff --git a/python/paddle/fluid/tests/unittests/test_top_k_v2_op.py b/python/paddle/fluid/tests/unittests/test_top_k_v2_op.py index b9d96f329b5bb4..94dcf151150ff2 100644 --- a/python/paddle/fluid/tests/unittests/test_top_k_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_top_k_v2_op.py @@ -64,34 +64,38 @@ def test_check_grad(self): class TestTopkOp1(TestTopkOp): - def init_args(self): - self.k = 3 - self.axis = 0 - self.largest = True - - -class TestTopkOp2(TestTopkOp): def init_args(self): self.k = 3 self.axis = 0 self.largest = False -class TestTopkOp3(TestTopkOp): +class TestTopkOp2(TestTopkOp): def init_args(self): self.k = 4 self.axis = 0 self.largest = False -class TestTopkOp4(TestTopkOp): +class TestTopkOp3(OpTest): def init_args(self): - self.k = 4 - self.axis = 0 - self.largest = False + self.k = 6 + self.axis = 1 + self.largest = True + def setUp(self): + self.op_type = "top_k_v2" + self.dtype = np.float64 + self.input_data = np.random.rand(16, 100) + self.init_args() + self.inputs = {'X': self.input_data} + self.attrs = {'k': self.k, 'axis': self.axis, 'largest': self.largest} + output, indices = numpy_topk( + self.input_data, axis=self.axis, k=self.k, largest=self.largest) + self.outputs = {'Out': output, 'Indices': indices} -class TestTopkOp5(TestTopkOp): + +class TestTopkOp4(TestTopkOp): def init_args(self): self.k = 3 self.axis = 1 @@ -109,7 +113,7 @@ def setUp(self): self.outputs = {'Out': output, 'Indices': indices} -class TestTopkOp6(TestTopkOp): +class TestTopkOp5(TestTopkOp): def init_args(self): self.k = 3 self.axis = 1 From b2f7ab6636d5e7fcc3bfd655c071416f190ed619 Mon Sep 17 00:00:00 2001 From: lilong12 Date: Mon, 16 Nov 2020 20:58:29 +0800 Subject: [PATCH 23/56] bug fix, test=develop (#28648) --- paddle/fluid/operators/collective/recv_v2_op.cu.cc | 2 +- paddle/fluid/operators/collective/send_v2_op.cu.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/operators/collective/recv_v2_op.cu.cc b/paddle/fluid/operators/collective/recv_v2_op.cu.cc index f0dd8aee23588c..892056f21359dd 100644 --- a/paddle/fluid/operators/collective/recv_v2_op.cu.cc +++ b/paddle/fluid/operators/collective/recv_v2_op.cu.cc @@ -26,6 +26,7 @@ template class RecvOpV2CUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { +#if defined(PADDLE_WITH_NCCL) && NCCL_VERSION_CODE >= 2703 int rid = ctx.Attr("ring_id"); PADDLE_ENFORCE_GE( rid, 0, @@ -44,7 +45,6 @@ class RecvOpV2CUDAKernel : public framework::OpKernel { framework::proto::VarType::Type type = framework::proto::VarType::Type(data_type); -#if defined(PADDLE_WITH_NCCL) && NCCL_VERSION_CODE >= 2703 cudaStream_t stream = nullptr; auto place = ctx.GetPlace(); auto comm = platform::NCCLCommContext::Instance().Get(rid, place); diff --git a/paddle/fluid/operators/collective/send_v2_op.cu.cc b/paddle/fluid/operators/collective/send_v2_op.cu.cc index 9f925b2eede027..4de3f47ccc66b3 100644 --- a/paddle/fluid/operators/collective/send_v2_op.cu.cc +++ b/paddle/fluid/operators/collective/send_v2_op.cu.cc @@ -26,6 +26,7 @@ template class SendOpV2CUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { +#if defined(PADDLE_WITH_NCCL) && NCCL_VERSION_CODE >= 2703 auto x = ctx.Input("X"); int numel = x->numel(); @@ -42,7 +43,6 @@ class SendOpV2CUDAKernel : public framework::OpKernel { "The peer (%d) for send_v2 op must be non-negative.", peer)); cudaStream_t stream = nullptr; auto place = ctx.GetPlace(); -#if defined(PADDLE_WITH_NCCL) && NCCL_VERSION_CODE >= 2703 auto comm = platform::NCCLCommContext::Instance().Get(rid, place); if (ctx.Attr("use_calc_stream")) { auto dev_ctx = platform::DeviceContextPool::Instance().Get(place); From d1e84f3e9e46f0776653d014faf318319c56679c Mon Sep 17 00:00:00 2001 From: cc <52520497+juncaipeng@users.noreply.github.com> Date: Mon, 16 Nov 2020 21:18:37 +0800 Subject: [PATCH 24/56] Add some ops for cacluating output scale, test=develop (#28644) --- .../fluid/contrib/slim/quantization/quantization_pass.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py index eba881a2637aec..68bf9ecd80be4c 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py @@ -69,6 +69,10 @@ "hard_swish", "hard_sigmoid", "conv2d_transpose", + "gru", + "bilinear_interp", + "nearest_interp", + "trilinear_interp", ] # list op real input and output names, to avoid processing input such as AxisTensor. @@ -114,6 +118,7 @@ "scale": [["X"], ["Out"]], "hard_swish": [["X"], ["Out"]], "hard_sigmoid": [["X"], ["Out"]], + "gru": [["Input", "Weight"], ["Hidden"]], } _conv_ops = ['conv2d', 'depthwise_conv2d', 'conv2d_transpose'] From 361a53930f9162bb79af4f0d985350b44e84c762 Mon Sep 17 00:00:00 2001 From: Zhou Wei <52485244+zhouwei25@users.noreply.github.com> Date: Mon, 16 Nov 2020 22:26:00 +0800 Subject: [PATCH 25/56] fix doc of save/load (#28645) --- python/paddle/framework/io.py | 2 -- python/paddle/optimizer/lr.py | 6 ++++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/paddle/framework/io.py b/python/paddle/framework/io.py index 945c8160b47fbd..d794fce5e378dd 100644 --- a/python/paddle/framework/io.py +++ b/python/paddle/framework/io.py @@ -228,7 +228,6 @@ def save(obj, path): emb = paddle.nn.Embedding(10, 10) layer_state_dict = emb.state_dict() paddle.save(layer_state_dict, "emb.pdparams") - scheduler = paddle.optimizer.lr.NoamDecay( d_model=0.01, warmup_steps=100, verbose=True) adam = paddle.optimizer.Adam( @@ -320,7 +319,6 @@ def load(path, **configs): emb = paddle.nn.Embedding(10, 10) layer_state_dict = emb.state_dict() paddle.save(layer_state_dict, "emb.pdparams") - scheduler = paddle.optimizer.lr.NoamDecay( d_model=0.01, warmup_steps=100, verbose=True) adam = paddle.optimizer.Adam( diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index e4fb54c229f212..2d5dc5d998e638 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -1434,7 +1434,8 @@ class CosineAnnealingDecay(LRScheduler): loss.backward() sgd.step() sgd.clear_gradients() - scheduler.step() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch # train on static graph mode paddle.enable_static() @@ -1460,7 +1461,8 @@ class CosineAnnealingDecay(LRScheduler): 'y': np.random.randn(3, 4, 5).astype('float32') }, fetch_list=loss.name) - scheduler.step() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch """ def __init__(self, From a083c76ab406093a5b6ffa9befd3c7b04c991b23 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Mon, 16 Nov 2020 22:26:44 +0800 Subject: [PATCH 26/56] adjust signal failed wait time (#28640) --- .../fluid/tests/unittests/test_imperative_signal_handler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_signal_handler.py b/python/paddle/fluid/tests/unittests/test_imperative_signal_handler.py index d783a2cc752d2a..775bf7941aaff9 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_signal_handler.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_signal_handler.py @@ -49,7 +49,7 @@ def __test_process__(): test_process.start() set_child_signal_handler(id(self), test_process.pid) - time.sleep(5) + time.sleep(10) except SystemError as ex: self.assertIn("Fatal", cpt.get_exception_message(ex)) exception = ex @@ -67,7 +67,7 @@ def __test_process__(): test_process.start() set_child_signal_handler(id(self), test_process.pid) - time.sleep(3) + time.sleep(10) except SystemError as ex: self.assertIn("Segmentation fault", cpt.get_exception_message(ex)) exception = ex @@ -85,7 +85,7 @@ def __test_process__(): test_process.start() set_child_signal_handler(id(self), test_process.pid) - time.sleep(3) + time.sleep(10) except SystemError as ex: self.assertIn("Bus error", cpt.get_exception_message(ex)) exception = ex From 2cd10fc4657da0f29e4e77b6df1b25ec4707a3b2 Mon Sep 17 00:00:00 2001 From: zhupengyang Date: Tue, 17 Nov 2020 09:38:13 +0800 Subject: [PATCH 27/56] fix 2.0 api docs (#28445) --- python/paddle/fluid/layers/nn.py | 40 +++----- python/paddle/nn/functional/activation.py | 45 ++++----- python/paddle/nn/layer/activation.py | 39 +++---- python/paddle/tensor/creation.py | 54 ++++------ python/paddle/tensor/random.py | 118 ++++++++++++++-------- python/paddle/tensor/stat.py | 28 ++--- 6 files changed, 154 insertions(+), 170 deletions(-) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 3ac43df872e377..2feca60430dc04 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -9730,15 +9730,13 @@ def swish(x, beta=1.0, name=None): return out -@deprecated(since="2.0.0", update_to="paddle.nn.functional.prelu") +@deprecated(since="2.0.0", update_to="paddle.static.nn.prelu") def prelu(x, mode, param_attr=None, name=None): """ - :api_attr: Static Graph - - Equation: + prelu activation. .. math:: - y = \max(0, x) + \\alpha * \min(0, x) + prelu(x) = max(0, x) + \\alpha * min(0, x) There are three modes for the activation: @@ -9748,34 +9746,28 @@ def prelu(x, mode, param_attr=None, name=None): channel: Elements in same channel share same alpha. element: All elements do not share alpha. Each element has its own alpha. - Args: - x (Variable): The input Tensor or LoDTensor with data type float32. + Parameters: + x (Tensor): The input Tensor or LoDTensor with data type float32. mode (str): The mode for weight sharing. - param_attr(ParamAttr|None): The parameter attribute for the learnable - weight (alpha), it can be create by ParamAttr. None by default. - For detailed information, please refer to :ref:`api_fluid_ParamAttr`. - name(str|None): For detailed information, please refer - to :ref:`api_guide_Name`. Usually name is no need to set and - None by default. + param_attr (ParamAttr|None, optional): The parameter attribute for the learnable + weight (alpha), it can be create by ParamAttr. None by default. + For detailed information, please refer to :ref:`api_fluid_ParamAttr`. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. Returns: - Variable: - - output(Variable): The tensor or LoDTensor with the same shape as input. - The data type is float32. + Tensor: A tensor with the same shape and data type as x. Examples: .. code-block:: python - import paddle.fluid as fluid import paddle - paddle.enable_static() - from paddle.fluid.param_attr import ParamAttr - x = fluid.data(name="x", shape=[None,5,10,10], dtype="float32") - mode = 'channel' - output = fluid.layers.prelu( - x,mode,param_attr=ParamAttr(name='alpha')) + + x = paddle.to_tensor([-1., 2., 3.]) + param = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(0.2)) + out = paddle.static.nn.prelu(x, 'all', param) + # [-0.2, 2., 3.] """ check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'prelu') diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py index fd86c2e9fa760d..e7adc7106a4f09 100644 --- a/python/paddle/nn/functional/activation.py +++ b/python/paddle/nn/functional/activation.py @@ -79,9 +79,8 @@ def elu(x, alpha=1.0, name=None): import paddle import paddle.nn.functional as F - import numpy as np - x = paddle.to_tensor(np.array([[-1,6],[1,15.6]])) + x = paddle.to_tensor([[-1., 6.], [1., 15.6]]) out = F.elu(x, alpha=0.2) # [[-0.12642411 6. ] # [ 1. 15.6 ]] @@ -131,11 +130,14 @@ def gelu(x, approximate=False, name=None): import paddle import paddle.nn.functional as F - import numpy as np - x = paddle.to_tensor(np.array([[-1, 0.5],[1, 1.5]])) - out1 = F.gelu(x) # [-0.158655 0.345731 0.841345 1.39979] - out2 = F.gelu(x, True) # [-0.158808 0.345714 0.841192 1.39957] + x = paddle.to_tensor([[-1, 0.5], [1, 1.5]]) + out1 = F.gelu(x) + # [[-0.15865529, 0.34573123], + # [ 0.84134471, 1.39978933]] + out2 = F.gelu(x, True) + # [[-0.15880799, 0.34571400], + # [ 0.84119201, 1.39957154]] """ if in_dygraph_mode(): @@ -181,11 +183,8 @@ def hardshrink(x, threshold=0.5, name=None): import paddle import paddle.nn.functional as F - import numpy as np - - paddle.disable_static() - x = paddle.to_tensor(np.array([-1, 0.3, 2.5])) + x = paddle.to_tensor([-1, 0.3, 2.5]) out = F.hardshrink(x) # [-1., 0., 2.5] """ @@ -385,11 +384,8 @@ def leaky_relu(x, negative_slope=0.01, name=None): import paddle import paddle.nn.functional as F - import numpy as np - - paddle.disable_static() - x = paddle.to_tensor(np.array([-2, 0, 1], 'float32')) + x = paddle.to_tensor([-2., 0., 1.]) out = F.leaky_relu(x) # [-0.02, 0., 1.] """ @@ -1147,8 +1143,10 @@ def log_softmax(x, axis=-1, dtype=None, name=None): .. math:: - log\\_softmax[i, j] = log(softmax(x)) - = log(\\frac{\exp(X[i, j])}{\\sum_j(exp(X[i, j])}) + \\begin{aligned} + log\\_softmax[i, j] &= log(softmax(x)) \\\\ + &= log(\\frac{\\exp(X[i, j])}{\\sum_j(\\exp(X[i, j])}) + \\end{aligned} Parameters: x (Tensor): The input Tensor with data type float32, float64. @@ -1174,16 +1172,13 @@ def log_softmax(x, axis=-1, dtype=None, name=None): import paddle import paddle.nn.functional as F - import numpy as np - - paddle.disable_static() - x = np.array([[[-2.0, 3.0, -4.0, 5.0], - [3.0, -4.0, 5.0, -6.0], - [-7.0, -8.0, 8.0, 9.0]], - [[1.0, -2.0, -3.0, 4.0], - [-5.0, 6.0, 7.0, -8.0], - [6.0, 7.0, 8.0, 9.0]]], 'float32') + x = [[[-2.0, 3.0, -4.0, 5.0], + [3.0, -4.0, 5.0, -6.0], + [-7.0, -8.0, 8.0, 9.0]], + [[1.0, -2.0, -3.0, 4.0], + [-5.0, 6.0, 7.0, -8.0], + [6.0, 7.0, 8.0, 9.0]]] x = paddle.to_tensor(x) out1 = F.log_softmax(x) out2 = F.log_softmax(x, dtype='float64') diff --git a/python/paddle/nn/layer/activation.py b/python/paddle/nn/layer/activation.py index 32979bae34d803..520762107db07e 100644 --- a/python/paddle/nn/layer/activation.py +++ b/python/paddle/nn/layer/activation.py @@ -70,9 +70,8 @@ class ELU(layers.Layer): .. code-block:: python import paddle - import numpy as np - x = paddle.to_tensor(np.array([[-1,6],[1,15.6]])) + x = paddle.to_tensor([[-1. ,6.], [1., 15.6]]) m = paddle.nn.ELU(0.2) out = m(x) # [[-0.12642411 6. ] @@ -166,11 +165,8 @@ class Hardshrink(layers.Layer): .. code-block:: python import paddle - import numpy as np - paddle.disable_static() - - x = paddle.to_tensor(np.array([-1, 0.3, 2.5])) + x = paddle.to_tensor([-1, 0.3, 2.5]) m = paddle.nn.Hardshrink() out = m(x) # [-1., 0., 2.5] """ @@ -293,11 +289,10 @@ class Hardtanh(layers.Layer): .. code-block:: python import paddle - import numpy as np - x = paddle.to_tensor(np.array([-1.5, 0.3, 2.5])) + x = paddle.to_tensor([-1.5, 0.3, 2.5]) m = paddle.nn.Hardtanh() - out = m(x) # # [-1., 0.3, 1.] + out = m(x) # [-1., 0.3, 1.] """ def __init__(self, min=-1.0, max=1.0, name=None): @@ -397,9 +392,8 @@ class ReLU(layers.Layer): .. code-block:: python import paddle - import numpy as np - x = paddle.to_tensor(np.array([-2, 0, 1]).astype('float32')) + x = paddle.to_tensor([-2., 0., 1.]) m = paddle.nn.ReLU() out = m(x) # [0., 0., 1.] """ @@ -613,7 +607,7 @@ class Hardsigmoid(layers.Layer): import paddle - m = paddle.nn.Sigmoid() + m = paddle.nn.Hardsigmoid() x = paddle.to_tensor([-4., 5., 1.]) out = m(x) # [0., 1, 0.666667] """ @@ -1016,8 +1010,10 @@ class LogSoftmax(layers.Layer): .. math:: - Out[i, j] = log(softmax(x)) - = log(\\frac{\exp(X[i, j])}{\\sum_j(exp(X[i, j])}) + \\begin{aligned} + Out[i, j] &= log(softmax(x)) \\\\ + &= log(\\frac{\\exp(X[i, j])}{\\sum_j(\\exp(X[i, j])}) + \\end{aligned} Parameters: axis (int, optional): The axis along which to perform log_softmax @@ -1035,16 +1031,13 @@ class LogSoftmax(layers.Layer): .. code-block:: python import paddle - import numpy as np - - paddle.disable_static() - x = np.array([[[-2.0, 3.0, -4.0, 5.0], - [3.0, -4.0, 5.0, -6.0], - [-7.0, -8.0, 8.0, 9.0]], - [[1.0, -2.0, -3.0, 4.0], - [-5.0, 6.0, 7.0, -8.0], - [6.0, 7.0, 8.0, 9.0]]]) + x = [[[-2.0, 3.0, -4.0, 5.0], + [3.0, -4.0, 5.0, -6.0], + [-7.0, -8.0, 8.0, 9.0]], + [[1.0, -2.0, -3.0, 4.0], + [-5.0, 6.0, 7.0, -8.0], + [6.0, 7.0, 8.0, 9.0]]] m = paddle.nn.LogSoftmax() x = paddle.to_tensor(x) out = m(x) diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py index a69bc64c4cf669..622ae3c584ef04 100644 --- a/python/paddle/tensor/creation.py +++ b/python/paddle/tensor/creation.py @@ -300,9 +300,6 @@ def ones(shape, dtype=None, name=None): def ones_like(x, dtype=None, name=None): """ - :alias_main: paddle.ones_like - :alias: paddle.tensor.ones_like, paddle.tensor.creation.ones_like - This OP returns a Tensor filled with the value 1, with the same shape and data type (use ``dtype`` if ``dtype`` is not None) as ``x``. @@ -323,18 +320,16 @@ def ones_like(x, dtype=None, name=None): Raise: TypeError: If ``dtype`` is not None and is not bool, float16, float32, - float64, int32 or int64. + float64, int32 or int64. Examples: .. code-block:: python import paddle - paddle.disable_static() - x = paddle.to_tensor([1,2,3]) - out1 = paddle.zeros_like(x) # [1., 1., 1.] - out2 = paddle.zeros_like(x, dtype='int32') # [1, 1, 1] + out1 = paddle.ones_like(x) # [1., 1., 1.] + out2 = paddle.ones_like(x, dtype='int32') # [1, 1, 1] """ return full_like(x=x, fill_value=1, dtype=dtype, name=name) @@ -380,9 +375,6 @@ def zeros(shape, dtype=None, name=None): def zeros_like(x, dtype=None, name=None): """ - :alias_main: paddle.zeros_like - :alias: paddle.tensor.zeros_like, paddle.tensor.creation.zeros_like - This OP returns a Tensor filled with the value 0, with the same shape and data type (use ``dtype`` if ``dtype`` is not None) as ``x``. @@ -403,16 +395,14 @@ def zeros_like(x, dtype=None, name=None): Raise: TypeError: If ``dtype`` is not None and is not bool, float16, float32, - float64, int32 or int64. + float64, int32 or int64. Examples: .. code-block:: python import paddle - paddle.disable_static() - - x = paddle.to_tensor([1,2,3]) + x = paddle.to_tensor([1, 2, 3]) out1 = paddle.zeros_like(x) # [0., 0., 0.] out2 = paddle.zeros_like(x, dtype='int32') # [0, 0, 0] @@ -519,9 +509,6 @@ def full(shape, fill_value, dtype=None, name=None): def arange(start=0, end=None, step=1, dtype=None, name=None): """ - :alias_main: paddle.arange - :alias: paddle.tensor.arange, paddle.tensor.creation.arange - This OP returns a 1-D Tensor with spaced values within a given interval. Values are generated into the half-open interval [``start``, ``end``) with @@ -552,33 +539,30 @@ def arange(start=0, end=None, step=1, dtype=None, name=None): Returns: Tensor: A 1-D Tensor with values from the interval [``start``, ``end``) - taken with common difference ``step`` beginning from ``start``. Its - data type is set by ``dtype``. + taken with common difference ``step`` beginning from ``start``. Its + data type is set by ``dtype``. Raises: TypeError: If ``dtype`` is not int32, int64, float32, float64. - examples: - + Examples: .. code-block:: python - import paddle - - paddle.disable_static() + import paddle - out1 = paddle.arange(5) - # [0, 1, 2, 3, 4] + out1 = paddle.arange(5) + # [0, 1, 2, 3, 4] - out2 = paddle.arange(3, 9, 2.0) - # [3, 5, 7] + out2 = paddle.arange(3, 9, 2.0) + # [3, 5, 7] - # use 4.999 instead of 5.0 to avoid floating point rounding errors - out3 = paddle.arange(4.999, dtype='float32') - # [0., 1., 2., 3., 4.] + # use 4.999 instead of 5.0 to avoid floating point rounding errors + out3 = paddle.arange(4.999, dtype='float32') + # [0., 1., 2., 3., 4.] - start_var = paddle.to_tensor([3]) - out4 = paddle.arange(start_var, 7) - # [3, 4, 5, 6] + start_var = paddle.to_tensor([3]) + out4 = paddle.arange(start_var, 7) + # [3, 4, 5, 6] """ if dtype is None: diff --git a/python/paddle/tensor/random.py b/python/paddle/tensor/random.py index 7e4d3d7bf9279b..934008dc969f16 100644 --- a/python/paddle/tensor/random.py +++ b/python/paddle/tensor/random.py @@ -252,16 +252,14 @@ def standard_normal(shape, dtype=None, name=None): import paddle - paddle.disable_static() - # example 1: attr shape is a list which doesn't contain Tensor. out1 = paddle.standard_normal(shape=[2, 3]) # [[-2.923464 , 0.11934398, -0.51249987], # random # [ 0.39632758, 0.08177969, 0.2692008 ]] # random # example 2: attr shape is a list which contains Tensor. - dim1 = paddle.full([1], 2, "int64") - dim2 = paddle.full([1], 3, "int32") + dim1 = paddle.to_tensor([2], 'int64') + dim2 = paddle.to_tensor([3], 'int32') out2 = paddle.standard_normal(shape=[dim1, dim2, 2]) # [[[-2.8852394 , -0.25898588], # random # [-0.47420555, 0.17683524], # random @@ -272,8 +270,7 @@ def standard_normal(shape, dtype=None, name=None): # example 3: attr shape is a Tensor, the data type must be int64 or int32. shape_tensor = paddle.to_tensor([2, 3]) - result_3 = paddle.standard_normal(shape_tensor) - + out3 = paddle.standard_normal(shape_tensor) # [[-2.878077 , 0.17099959, 0.05111201] # random # [-0.3761474, -1.044801 , 1.1870178 ]] # random @@ -281,7 +278,58 @@ def standard_normal(shape, dtype=None, name=None): return gaussian(shape=shape, mean=0.0, std=1.0, dtype=dtype, name=name) -randn = standard_normal +def randn(shape, dtype=None, name=None): + """ + This OP returns a Tensor filled with random values sampled from a standard + normal distribution with mean 0 and standard deviation 1, with ``shape`` + and ``dtype``. + + Args: + shape (list|tuple|Tensor): The shape of the output Tensor. If ``shape`` + is a list or tuple, the elements of it should be integers or Tensors + (with the shape [1], and the data type int32 or int64). If ``shape`` + is a Tensor, it should be a 1-D Tensor(with the data type int32 or + int64). + dtype (str|np.dtype, optional): The data type of the output Tensor. + Supported data types: float32, float64. + Default is None, use global default dtype (see ``get_default_dtype`` + for details). + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Returns: + Tensor: A Tensor filled with random values sampled from a standard + normal distribution with mean 0 and standard deviation 1, with + ``shape`` and ``dtype``. + + Examples: + .. code-block:: python + + import paddle + + # example 1: attr shape is a list which doesn't contain Tensor. + out1 = paddle.randn(shape=[2, 3]) + # [[-2.923464 , 0.11934398, -0.51249987], # random + # [ 0.39632758, 0.08177969, 0.2692008 ]] # random + + # example 2: attr shape is a list which contains Tensor. + dim1 = paddle.to_tensor([2], 'int64') + dim2 = paddle.to_tensor([3], 'int32') + out2 = paddle.randn(shape=[dim1, dim2, 2]) + # [[[-2.8852394 , -0.25898588], # random + # [-0.47420555, 0.17683524], # random + # [-0.7989969 , 0.00754541]], # random + # [[ 0.85201347, 0.32320443], # random + # [ 1.1399018 , 0.48336947], # random + # [ 0.8086993 , 0.6868893 ]]] # random + + # example 3: attr shape is a Tensor, the data type must be int64 or int32. + shape_tensor = paddle.to_tensor([2, 3]) + out3 = paddle.randn(shape_tensor) + # [[-2.878077 , 0.17099959, 0.05111201] # random + # [-0.3761474, -1.044801 , 1.1870178 ]] # random + """ + return standard_normal(shape, dtype, name) def normal(mean=0.0, std=1.0, shape=None, name=None): @@ -322,8 +370,6 @@ def normal(mean=0.0, std=1.0, shape=None, name=None): import paddle - paddle.disable_static() - out1 = paddle.normal(shape=[2, 3]) # [[ 0.17501129 0.32364586 1.561118 ] # random # [-1.7232178 1.1545963 -0.76156676]] # random @@ -381,7 +427,7 @@ def uniform(shape, dtype=None, min=-1.0, max=1.0, seed=0, name=None): Examples: - :: + .. code-block:: text Input: shape = [1, 2] @@ -423,33 +469,27 @@ def uniform(shape, dtype=None, min=-1.0, max=1.0, seed=0, name=None): import paddle - paddle.disable_static() - # example 1: # attr shape is a list which doesn't contain Tensor. - result_1 = paddle.tensor.random.uniform(shape=[3, 4]) - # [[ 0.84524226, 0.6921872, 0.56528175, 0.71690357], - # [-0.34646994, -0.45116323, -0.09902662, -0.11397249], - # [ 0.433519, 0.39483607, -0.8660099, 0.83664286]] + out1 = paddle.uniform(shape=[3, 4]) + # [[ 0.84524226, 0.6921872, 0.56528175, 0.71690357], # random + # [-0.34646994, -0.45116323, -0.09902662, -0.11397249], # random + # [ 0.433519, 0.39483607, -0.8660099, 0.83664286]] # random # example 2: # attr shape is a list which contains Tensor. - dim_1 = paddle.full([1], 2, "int64") - dim_2 = paddle.full([1], 3, "int32") - result_2 = paddle.tensor.random.uniform(shape=[dim_1, dim_2]) - # [[-0.9951253, 0.30757582, 0.9899647 ], - # [ 0.5864527, 0.6607096, -0.8886161 ]] + dim1 = paddle.to_tensor([2], 'int64') + dim2 = paddle.to_tensor([3], 'int32') + out2 = paddle.uniform(shape=[dim1, dim2]) + # [[-0.9951253, 0.30757582, 0.9899647 ], # random + # [ 0.5864527, 0.6607096, -0.8886161]] # random # example 3: # attr shape is a Tensor, the data type must be int64 or int32. shape_tensor = paddle.to_tensor([2, 3]) - result_3 = paddle.tensor.random.uniform(shape_tensor) - # if shape_tensor's value is [2, 3] - # result_3 is: - # [[-0.8517412, -0.4006908, 0.2551912 ], - # [ 0.3364414, 0.36278176, -0.16085452]] - - + out3 = paddle.uniform(shape_tensor) + # [[-0.8517412, -0.4006908, 0.2551912 ], # random + # [ 0.3364414, 0.36278176, -0.16085452]] # random """ if dtype is None: dtype = paddle.framework.get_default_dtype() @@ -517,8 +557,6 @@ def randint(low=0, high=None, shape=[1], dtype=None, name=None): import paddle - paddle.disable_static() - # example 1: # attr shape is a list which doesn't contain Tensor. out1 = paddle.randint(low=-5, high=5, shape=[3]) @@ -526,18 +564,16 @@ def randint(low=0, high=None, shape=[1], dtype=None, name=None): # example 2: # attr shape is a list which contains Tensor. - dim1 = paddle.full([1], 2, "int64") - dim2 = paddle.full([1], 3, "int32") - out2 = paddle.randint(low=-5, high=5, shape=[dim1, dim2], dtype="int32") + dim1 = paddle.to_tensor([2], 'int64') + dim2 = paddle.to_tensor([3], 'int32') + out2 = paddle.randint(low=-5, high=5, shape=[dim1, dim2]) # [[0, -1, -3], # random # [4, -2, 0]] # random # example 3: # attr shape is a Tensor - shape_tensor = paddle.to_tensor(3) - result_3 = paddle.randint(low=-5, high=5, shape=shape_tensor) - + out3 = paddle.randint(low=-5, high=5, shape=shape_tensor) # [-2, 2, 3] # random # example 4: @@ -611,8 +647,6 @@ def randperm(n, dtype="int64", name=None): import paddle - paddle.disable_static() - out1 = paddle.randperm(5) # [4, 1, 2, 3, 0] # random @@ -668,15 +702,14 @@ def rand(shape, dtype=None, name=None): import paddle - paddle.disable_static() # example 1: attr shape is a list which doesn't contain Tensor. out1 = paddle.rand(shape=[2, 3]) # [[0.451152 , 0.55825245, 0.403311 ], # random # [0.22550228, 0.22106001, 0.7877319 ]] # random # example 2: attr shape is a list which contains Tensor. - dim1 = paddle.full([1], 2, "int64") - dim2 = paddle.full([1], 3, "int32") + dim1 = paddle.to_tensor([2], 'int64') + dim2 = paddle.to_tensor([3], 'int32') out2 = paddle.rand(shape=[dim1, dim2, 2]) # [[[0.8879919 , 0.25788337], # random # [0.28826773, 0.9712097 ], # random @@ -687,8 +720,7 @@ def rand(shape, dtype=None, name=None): # example 3: attr shape is a Tensor, the data type must be int64 or int32. shape_tensor = paddle.to_tensor([2, 3]) - result_3 = paddle.rand(shape_tensor) - + out3 = paddle.rand(shape_tensor) # [[0.22920267, 0.841956 , 0.05981819], # random # [0.4836288 , 0.24573246, 0.7516129 ]] # random diff --git a/python/paddle/tensor/stat.py b/python/paddle/tensor/stat.py index 3873d893bd7c34..9e565d4e5223cd 100644 --- a/python/paddle/tensor/stat.py +++ b/python/paddle/tensor/stat.py @@ -56,17 +56,13 @@ def mean(x, axis=None, keepdim=False, name=None): .. code-block:: python import paddle - import numpy as np - paddle.disable_static() - - x = np.array([[[1, 2, 3, 4], - [5, 6, 7, 8], - [9, 10, 11, 12]], - [[13, 14, 15, 16], - [17, 18, 19, 20], - [21, 22, 23, 24]]], 'float32') - x = paddle.to_tensor(x) + x = paddle.to_tensor([[[1., 2., 3., 4.], + [5., 6., 7., 8.], + [9., 10., 11., 12.]], + [[13., 14., 15., 16.], + [17., 18., 19., 20.], + [21., 22., 23., 24.]]]) out1 = paddle.mean(x) # [12.5] out2 = paddle.mean(x, axis=-1) @@ -145,12 +141,8 @@ def var(x, axis=None, unbiased=True, keepdim=False, name=None): .. code-block:: python import paddle - import numpy as np - - paddle.disable_static() - x = np.array([[1.0, 2.0, 3.0], [1.0, 4.0, 5.0]]) - x = paddle.to_tensor(x) + x = paddle.to_tensor([[1.0, 2.0, 3.0], [1.0, 4.0, 5.0]]) out1 = paddle.var(x) # [2.66666667] out2 = paddle.var(x, axis=1) @@ -208,12 +200,8 @@ def std(x, axis=None, unbiased=True, keepdim=False, name=None): .. code-block:: python import paddle - import numpy as np - - paddle.disable_static() - x = np.array([[1.0, 2.0, 3.0], [1.0, 4.0, 5.0]]) - x = paddle.to_tensor(x) + x = paddle.to_tensor([[1.0, 2.0, 3.0], [1.0, 4.0, 5.0]]) out1 = paddle.std(x) # [1.63299316] out2 = paddle.std(x, axis=1) From 65aac81191c00dcbe79cd191f595be942a8bb749 Mon Sep 17 00:00:00 2001 From: cc <52520497+juncaipeng@users.noreply.github.com> Date: Tue, 17 Nov 2020 10:07:30 +0800 Subject: [PATCH 28/56] Fix fake_quant error when cout > 1024, test=develop (#28603) --- paddle/fluid/operators/fake_dequantize_op.cu | 16 ++++----- paddle/fluid/operators/fake_quantize_op.cu | 34 ++++++++++++++------ 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/paddle/fluid/operators/fake_dequantize_op.cu b/paddle/fluid/operators/fake_dequantize_op.cu index 54a92b055a39d4..a89c430c7ab24e 100644 --- a/paddle/fluid/operators/fake_dequantize_op.cu +++ b/paddle/fluid/operators/fake_dequantize_op.cu @@ -62,14 +62,14 @@ __global__ void DequantizeOneScaleQuantAxis1(const T* in, const T* scale, T max_range, const int num, const int cin, const int cout, T* out) { - int cout_wh_size = num / cin; - int wh_size = cout_wh_size / cout; + int bid = blockIdx.x; + T s = scale[bid % cout]; - T s = scale[blockIdx.x]; - const T* in_current = in + threadIdx.x * cout_wh_size + blockIdx.x * wh_size; - T* out_current = out + threadIdx.x * cout_wh_size + blockIdx.x * wh_size; + int wh_size = num / (cin * cout); + const T* in_current = in + bid * wh_size; + T* out_current = out + bid * wh_size; - for (int i = 0; i < wh_size; i++) { + for (int i = threadIdx.x; i < wh_size; i += blockDim.x) { out_current[i] = in_current[i] * s / max_range; } } @@ -107,8 +107,8 @@ struct ChannelDequantizeFunctor { in_data, scale_factor, max_range, num, in_dims[0], out_data); } else if (quant_axis == 1) { // Dequantize weight of Cin * Cout * W * H - int grid = in_dims[1]; - int block = in_dims[0]; + int grid = in_dims[0] * in_dims[1]; + int block = 1024; DequantizeOneScaleQuantAxis1<<>>( in_data, scale_factor, max_range, num, in_dims[0], in_dims[1], out_data); diff --git a/paddle/fluid/operators/fake_quantize_op.cu b/paddle/fluid/operators/fake_quantize_op.cu index 8bc14dde863682..26dcf8bf39cf28 100644 --- a/paddle/fluid/operators/fake_quantize_op.cu +++ b/paddle/fluid/operators/fake_quantize_op.cu @@ -131,7 +131,7 @@ __global__ void FindChannelAbsMaxKernelQuantAxis1(const T* in, const int n, } __syncthreads(); } - if (tid == 0) { + if (tid == 0 && shared_max_data[0] > out[bid]) { out[bid] = shared_max_data[0]; } } @@ -148,20 +148,36 @@ struct FindChannelAbsMaxFunctor { quant_axis)); const int num = in_tensor.numel(); auto in_dims = in_tensor.dims(); - int channel = in_dims[quant_axis]; const T* in_data = in_tensor.data(); if (quant_axis == 0) { - int grid = channel; + int cout = in_dims[0]; + int grid = cout; int block = 1024; FindChannelAbsMaxKernelQuantAxis0< T><<>>( - in_data, num, channel, out_abs_max); + in_data, num, cout, out_abs_max); } else if (quant_axis == 1) { - int grid = in_dims[1]; - int block = in_dims[0]; - FindChannelAbsMaxKernelQuantAxis1< - T><<>>( - in_data, num, in_dims[0], in_dims[1], out_abs_max); + int cin = in_dims[0]; + int cout = in_dims[1]; + int grid = cout; + int max_threads = 1024; + + cudaMemset(out_abs_max, 0, sizeof(T) * cout); + + for (int i = 0; i < cin / max_threads; i++) { + int block = max_threads; + FindChannelAbsMaxKernelQuantAxis1< + T><<>>( + in_data, num, cin, cout, out_abs_max); + in_data += num / cin; + } + + int block = cin % max_threads; + if (block > 0) { + FindChannelAbsMaxKernelQuantAxis1< + T><<>>( + in_data, num, in_dims[0], in_dims[1], out_abs_max); + } } } }; From 68ee7f731250664077be675aac49b358ee93ed78 Mon Sep 17 00:00:00 2001 From: wangchaochaohu Date: Tue, 17 Nov 2020 10:15:04 +0800 Subject: [PATCH 29/56] fix overwrite for gather OP of API2.0(#28659) --- python/paddle/tensor/manipulation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index adad9cfdc26671..060f9a1a919041 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -786,7 +786,7 @@ def gather(x, index, axis=None, name=None): axis = 0 axis_tensor = axis if not isinstance(axis, Variable) and axis == 0: - return paddle.fluid.layers.gather(input=x, index=index, overwrite=True) + return paddle.fluid.layers.gather(input=x, index=index, overwrite=False) if not isinstance(axis, Variable): with device_guard("cpu"): axis_tensor = fill_constant( From 57dab959ca53a42ad1bccd8fd0344f32e2074a13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=9F=B3=E6=99=93=E4=BC=9F?= <39303645+Shixiaowei02@users.noreply.github.com> Date: Tue, 17 Nov 2020 10:30:49 +0800 Subject: [PATCH 30/56] add datanorm op new scale_w register (#28657) Co-authored-by: yaoxuefeng6 --- paddle/fluid/operators/data_norm_op.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/paddle/fluid/operators/data_norm_op.cc b/paddle/fluid/operators/data_norm_op.cc index 5df2bbdf95144d..45e77a99e6b3eb 100644 --- a/paddle/fluid/operators/data_norm_op.cc +++ b/paddle/fluid/operators/data_norm_op.cc @@ -19,6 +19,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif +#include "paddle/fluid/framework/op_version_registry.h" namespace paddle { namespace operators { @@ -755,3 +756,10 @@ REGISTER_OP_CPU_KERNEL( data_norm_grad, ops::DataNormGradKernel, ops::DataNormGradKernel); +REGISTER_OP_VERSION(data_norm) + .AddCheckpoint( + R"ROC( + upgrad data_norm op by adding scale_w to support scale and shift.)ROC", + paddle::framework::compatible::OpVersionDesc().NewInput( + "scale_w", + "scale_w is used to do scale duirng data_norm like batchnorm ")); From 8040fa2bca72224c66ba6700dcc7e8ae79ea0554 Mon Sep 17 00:00:00 2001 From: Aurelius84 Date: Tue, 17 Nov 2020 11:43:29 +0800 Subject: [PATCH 31/56] Fix output dtype inconsistent with input (#28649) * fix output dtyp inconsistent with input * refine code --- python/paddle/fluid/tests/unittests/test_gather_op.py | 9 +++++++++ python/paddle/tensor/manipulation.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/test_gather_op.py b/python/paddle/fluid/tests/unittests/test_gather_op.py index 2e4b52c282d567..946027a22f8838 100644 --- a/python/paddle/fluid/tests/unittests/test_gather_op.py +++ b/python/paddle/fluid/tests/unittests/test_gather_op.py @@ -19,6 +19,7 @@ from op_test import OpTest import paddle import paddle.fluid as fluid +from paddle.framework import core def gather_numpy(x, index, axis): @@ -298,5 +299,13 @@ def test_index_type(): self.assertRaises(TypeError, test_index_type) +class TestCheckOutType(unittest.TestCase): + def test_out_type(self): + data = paddle.static.data(shape=[16, 10], dtype='int64', name='x') + index = paddle.static.data(shape=[4], dtype='int64', name='index') + out = paddle.gather(data, index) + self.assertTrue(out.dtype == core.VarDesc.VarType.INT64) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index 060f9a1a919041..bdda90315ac9c7 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -804,7 +804,7 @@ def gather(x, index, axis=None, name=None): check_type(axis, 'axis', (int), 'gather') helper = LayerHelper('gather', **locals()) - dtype = helper.input_dtype() + dtype = helper.input_dtype('x') out = helper.create_variable_for_type_inference(dtype) helper.append_op( type="gather", From d71c3463b04c345520be6e14736c674c030d2d06 Mon Sep 17 00:00:00 2001 From: Double_V Date: Tue, 17 Nov 2020 13:20:30 +0800 Subject: [PATCH 32/56] fix pool exclusive and delete disable_static (#28655) * fix pool exclusive and delete disable_static, test=develop * fix pool1d exclusive, test=develop --- python/paddle/nn/functional/pooling.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py index 40166f4d36e94e..829056f5767d7c 100755 --- a/python/paddle/nn/functional/pooling.py +++ b/python/paddle/nn/functional/pooling.py @@ -200,7 +200,6 @@ def avg_pool1d(x, .. code-block:: python import paddle import paddle.nn.functional as F - paddle.disable_static() data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) out = F.avg_pool1d(data, kernel_size=2, stride=2, padding=0) # out shape: [1, 3, 16] @@ -253,7 +252,7 @@ def avg_pool1d(x, "use_cudnn": True, "ceil_mode": ceil_mode, "use_mkldnn": False, - "exclusive": not exclusive, + "exclusive": exclusive, "data_format": data_format, }) @@ -314,7 +313,6 @@ def avg_pool2d(x, import paddle import paddle.nn.functional as F import numpy as np - paddle.disable_static() # avg pool2d x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32)) out = F.avg_pool2d(x, @@ -365,7 +363,7 @@ def avg_pool2d(x, "use_cudnn": True, "ceil_mode": ceil_mode, "use_mkldnn": False, - "exclusive": not exclusive, + "exclusive": exclusive, "data_format": data_format, }) @@ -481,7 +479,7 @@ def avg_pool3d(x, "use_cudnn": True, "ceil_mode": ceil_mode, "use_mkldnn": False, - "exclusive": not exclusive, + "exclusive": exclusive, "data_format": data_format, }) @@ -538,7 +536,6 @@ def max_pool1d(x, .. code-block:: python import paddle import paddle.nn.functional as F - paddle.disable_static() data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) pool_out = F.max_pool1d(data, kernel_size=2, stride=2, padding=0) # pool_out shape: [1, 3, 16] @@ -661,7 +658,6 @@ def max_pool2d(x, import paddle import paddle.nn.functional as F import numpy as np - paddle.disable_static() # max pool2d x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32)) out = F.max_pool2d(x, @@ -791,7 +787,7 @@ def max_pool3d(x, import paddle import paddle.nn.functional as F import numpy as np - paddle.disable_static() + # max pool3d x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32)) output = F.max_pool2d(x, @@ -905,7 +901,7 @@ def adaptive_avg_pool1d(x, output_size, name=None): # import paddle import paddle.nn.functional as F - paddle.disable_static() + data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) pool_out = F.adaptive_average_pool1d(data, output_size=16) # pool_out shape: [1, 3, 16]) @@ -982,7 +978,7 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None): # import paddle import numpy as np - paddle.disable_static() + input_data = np.random.rand(2, 3, 32, 32) x = paddle.to_tensor(input_data) # x.shape is [2, 3, 32, 32] @@ -1086,7 +1082,7 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None): # avg(input[:, :, dstart:dend, hstart: hend, wstart: wend]) import paddle import numpy as np - paddle.disable_static() + input_data = np.random.rand(2, 3, 8, 32, 32) x = paddle.to_tensor(input_data) # x.shape is [2, 3, 8, 32, 32] @@ -1186,7 +1182,7 @@ def adaptive_max_pool1d(x, output_size, return_mask=False, name=None): # import paddle import paddle.nn.functional as F - paddle.disable_static() + data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) pool_out = F.adaptive_max_pool1d(data, output_size=16) # pool_out shape: [1, 3, 16]) @@ -1266,7 +1262,7 @@ def adaptive_max_pool2d(x, output_size, return_mask=False, name=None): # import paddle import numpy as np - paddle.disable_static() + input_data = np.random.rand(2, 3, 32, 32) x = paddle.to_tensor(input_data) # x.shape is [2, 3, 32, 32] @@ -1356,7 +1352,7 @@ def adaptive_max_pool3d(x, output_size, return_mask=False, name=None): # import paddle import numpy as np - paddle.disable_static() + input_data = np.random.rand(2, 3, 8, 32, 32) x = paddle.to_tensor(input_data) # x.shape is [2, 3, 8, 32, 32] From bf143652ac104f3b4ad3a5084bb91c4681a16140 Mon Sep 17 00:00:00 2001 From: Zhou Wei <52485244+zhouwei25@users.noreply.github.com> Date: Tue, 17 Nov 2020 13:38:16 +0800 Subject: [PATCH 33/56] fix lstm OP compile error on windows (#28667) * add unittest and check unittest for windows * fix lstm OP compile error on windows --- CMakeLists.txt | 11 +---------- paddle/fluid/operators/CMakeLists.txt | 3 ++- paddle/scripts/paddle_build.bat | 7 ++++++- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2faa0a2bbbcb3f..12f5b6f8bd8976 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -81,7 +81,7 @@ if(WIN32) CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) string(REGEX REPLACE "/W[1-4]" " /W0 " ${flag_var} "${${flag_var}}") - set(${flag_var} "${${flag_var}} /MP /bigobj") + set(${flag_var} "${${flag_var}} /MP") endforeach(flag_var) foreach(flag_var CMAKE_CXX_FLAGS CMAKE_C_FLAGS) set(${flag_var} "${${flag_var}} /w") @@ -96,15 +96,6 @@ if(WIN32) endif() endforeach(flag_var) - foreach(flag_var - CMAKE_STATIC_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS - CMAKE_EXE_LINKER_FLAGS) - set(${flag_var} "${${flag_var}} /IGNORE:4006 /IGNORE:4098 /ignore:4049 /IGNORE:4217 /IGNORE:4221") - if(${flag_var} MATCHES "/INCREMENTAL" AND NOT ${flag_var} MATCHES "/INCREMENTAL:NO") - string(REGEX REPLACE "/INCREMENTAL" "/INCREMENTAL:NO" ${flag_var} "${${flag_var}}") - endif() - endforeach(flag_var) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838") else(WIN32) diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index ca80ada7b6ea78..3b9d3e7e9374e6 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -64,7 +64,7 @@ if(WITH_COVERAGE OR WIN32 OR WITH_NV_JETSON) SET(OP_MKL_DEPS ${OP_MKL_DEPS} pyramid_hash_op) endif() -register_operators(EXCLUDES py_func_op warpctc_op dgc_op +register_operators(EXCLUDES py_func_op warpctc_op dgc_op lstm_op sync_batch_norm_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS} ${OP_PREFETCH_DEPS}) if (WITH_GPU) @@ -79,6 +79,7 @@ if (WITH_GPU) else() op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale) endif() +op_library(lstm_op DEPS ${OP_HEADER_DEPS} ${OP_PREFETCH_DEPS} lstm_compute) set(COMMON_OP_DEPS ${OP_HEADER_DEPS}) diff --git a/paddle/scripts/paddle_build.bat b/paddle/scripts/paddle_build.bat index 450cb7546fd4c3..ff5562a25096fd 100644 --- a/paddle/scripts/paddle_build.bat +++ b/paddle/scripts/paddle_build.bat @@ -265,7 +265,12 @@ echo Build third_party successfully! set build_times=1 :build_paddle echo Build Paddle the %build_times% time: -msbuild /m:%PARALLEL_PROJECT_COUNT% /p:TrackFileAccess=false /p:CLToolExe=clcache.exe /p:CLToolPath=%PYTHON_ROOT%\Scripts /p:Configuration=Release /verbosity:minimal paddle.sln +if "%WITH_GPU%"=="OFF" ( + msbuild /m:%PARALLEL_PROJECT_COUNT% /p:Configuration=Release /verbosity:minimal paddle.sln +) else ( + msbuild /m:%PARALLEL_PROJECT_COUNT% /p:TrackFileAccess=false /p:CLToolExe=clcache.exe /p:CLToolPath=%PYTHON_ROOT%\Scripts /p:Configuration=Release /verbosity:minimal paddle.sln +) + if %ERRORLEVEL% NEQ 0 ( set /a build_times=%build_times%+1 if %build_times% GTR 1 ( From 82f0b5ea5c6627c6d60120c65db945d690c4f450 Mon Sep 17 00:00:00 2001 From: littletomatodonkey <2120160898@bit.edu.cn> Date: Tue, 17 Nov 2020 13:57:32 +0800 Subject: [PATCH 34/56] adapt pad const (#28585) * adapt pad const * fix comment and rm fluid import * rm stdout * fix note --- .../fluid/tests/unittests/test_pad3d_op.py | 31 ++++++++++++++++++- python/paddle/nn/functional/common.py | 6 ++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/test_pad3d_op.py b/python/paddle/fluid/tests/unittests/test_pad3d_op.py index c29352bb51af68..88d3d80a14c78a 100644 --- a/python/paddle/fluid/tests/unittests/test_pad3d_op.py +++ b/python/paddle/fluid/tests/unittests/test_pad3d_op.py @@ -251,7 +251,9 @@ def _get_numpy_out(self, mode, value=0, data_format="NCDHW"): - if data_format == "NCDHW": + if mode == "constant" and len(pad) == len(input_data.shape) * 2: + pad = np.reshape(pad, (-1, 2)).tolist() + elif data_format == "NCDHW": pad = [ (0, 0), (0, 0), @@ -316,6 +318,7 @@ def test_dygraph_1(self): paddle.disable_static() input_shape = (1, 2, 3, 4, 5) pad = [1, 2, 1, 1, 3, 4] + pad_3 = [1, 2, 1, 1, 3, 4, 5, 6, 7, 8] mode = "constant" value = 100 input_data = np.random.rand(*input_shape).astype(np.float32) @@ -323,6 +326,8 @@ def test_dygraph_1(self): input_data, pad, mode, value, data_format="NCDHW") np_out2 = self._get_numpy_out( input_data, pad, mode, value, data_format="NDHWC") + np_out3 = self._get_numpy_out( + input_data, pad_3, mode, value, data_format="NCDHW") tensor_data = paddle.to_tensor(input_data) y1 = F.pad(tensor_data, @@ -335,14 +340,21 @@ def test_dygraph_1(self): mode=mode, value=value, data_format="NDHWC") + y3 = F.pad(tensor_data, + pad=pad_3, + mode=mode, + value=value, + data_format="NCDHW") self.assertTrue(np.allclose(y1.numpy(), np_out1)) self.assertTrue(np.allclose(y2.numpy(), np_out2)) + self.assertTrue(np.allclose(y3.numpy(), np_out3)) def test_dygraph_2(self): paddle.disable_static() input_shape = (2, 3, 4, 5) pad = [1, 1, 3, 4] + pad_3 = [1, 2, 1, 1, 3, 4, 5, 6] mode = "constant" value = 100 input_data = np.random.rand(*input_shape).astype(np.float32) @@ -350,6 +362,8 @@ def test_dygraph_2(self): input_data, pad, mode, value, data_format="NCHW") np_out2 = self._get_numpy_out( input_data, pad, mode, value, data_format="NHWC") + np_out3 = self._get_numpy_out( + input_data, pad_3, mode, value, data_format="NCHW") tensor_data = paddle.to_tensor(input_data) tensor_pad = paddle.to_tensor(pad, dtype="int32") @@ -364,14 +378,21 @@ def test_dygraph_2(self): mode=mode, value=value, data_format="NHWC") + y3 = F.pad(tensor_data, + pad=pad_3, + mode=mode, + value=value, + data_format="NCHW") self.assertTrue(np.allclose(y1.numpy(), np_out1)) self.assertTrue(np.allclose(y2.numpy(), np_out2)) + self.assertTrue(np.allclose(y3.numpy(), np_out3)) def test_dygraph_3(self): paddle.disable_static() input_shape = (3, 4, 5) pad = [3, 4] + pad_3 = [3, 4, 5, 6, 7, 8] mode = "constant" value = 100 input_data = np.random.rand(*input_shape).astype(np.float32) @@ -379,6 +400,8 @@ def test_dygraph_3(self): input_data, pad, mode, value, data_format="NCL") np_out2 = self._get_numpy_out( input_data, pad, mode, value, data_format="NLC") + np_out3 = self._get_numpy_out( + input_data, pad_3, mode, value, data_format="NCL") tensor_data = paddle.to_tensor(input_data) tensor_pad = paddle.to_tensor(pad, dtype="int32") @@ -392,9 +415,15 @@ def test_dygraph_3(self): mode=mode, value=value, data_format="NLC") + y3 = F.pad(tensor_data, + pad=pad_3, + mode=mode, + value=value, + data_format="NCL") self.assertTrue(np.allclose(y1.numpy(), np_out1)) self.assertTrue(np.allclose(y2.numpy(), np_out2)) + self.assertTrue(np.allclose(y3.numpy(), np_out3)) class TestPad1dAPI(unittest.TestCase): diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py index 1cf3599e846b95..5c5e3f37916da1 100644 --- a/python/paddle/nn/functional/common.py +++ b/python/paddle/nn/functional/common.py @@ -1158,6 +1158,9 @@ def alpha_dropout(x, p=0.5, training=True, name=None): def pad(x, pad, mode='constant', value=0, data_format="NCHW", name=None): """ Pad tensor according to 'pad' and 'mode'. + If mode is 'constant' and length of pad is twice as length of x dimension, + then the padding will be started from the first dimension and moved back onto x + according to 'pad' and 'value'. If mode is 'reflect', pad[0] and pad[1] must be no greater than width-1. The height and depth dimension has the same condition. @@ -1273,6 +1276,9 @@ def pad(x, pad, mode='constant', value=0, data_format="NCHW", name=None): unsqueezed_dim = [] + if mode == "constant" and isinstance(pad, list) and len(pad) == x_dim * 2: + return layers.pad(x, pad, pad_value=value) + if isinstance(pad, Variable): if data_format in ["NCL", "NCHW", "NCDHW"]: data_format = "NCDHW" From 912a5c30b44bd6d254093ee815c0596dbb94e1cd Mon Sep 17 00:00:00 2001 From: wangchaochaohu Date: Tue, 17 Nov 2020 14:36:52 +0800 Subject: [PATCH 35/56] fix the matmul_v2 test for cuda11 (#28635) --- python/paddle/fluid/tests/unittests/test_matmul_v2_op.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py b/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py index 640771df23b726..a6667db6227f98 100644 --- a/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py @@ -148,8 +148,8 @@ class TestMatMuklOp6(TestMatMulV2Op): """ def config(self): - self.x_shape = (1, 2, 100, 1) - self.y_shape = (100, ) + self.x_shape = (1, 2, 102, 1) + self.y_shape = (102, ) self.trans_x = True self.trans_y = False From 80d2024644da4a02387c6542e508b8369d7e2efc Mon Sep 17 00:00:00 2001 From: lilong12 Date: Tue, 17 Nov 2020 16:30:26 +0800 Subject: [PATCH 36/56] bug fix, test=develop (#28674) --- paddle/fluid/platform/dynload/nccl.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/paddle/fluid/platform/dynload/nccl.cc b/paddle/fluid/platform/dynload/nccl.cc index 2c40c48ee08497..cfc98561e87e9c 100644 --- a/paddle/fluid/platform/dynload/nccl.cc +++ b/paddle/fluid/platform/dynload/nccl.cc @@ -25,6 +25,14 @@ void *nccl_dso_handle; NCCL_RAND_ROUTINE_EACH(DEFINE_WRAP); +#if NCCL_VERSION_CODE >= 2212 +NCCL_RAND_ROUTINE_EACH_AFTER_2212(DEFINE_WRAP) +#endif + +#if NCCL_VERSION_CODE >= 2703 +NCCL_RAND_ROUTINE_EACH_AFTER_2703(DEFINE_WRAP) +#endif + } // namespace dynload } // namespace platform } // namespace paddle From cdc4e6620d8eb91a98c3aa5b440c369a18752f8f Mon Sep 17 00:00:00 2001 From: LielinJiang <50691816+LielinJiang@users.noreply.github.com> Date: Tue, 17 Nov 2020 17:24:38 +0800 Subject: [PATCH 37/56] fix lenet num classes (#28642) --- python/paddle/vision/models/lenet.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/paddle/vision/models/lenet.py b/python/paddle/vision/models/lenet.py index 119be85db54b90..2fb50fc17b9e9f 100644 --- a/python/paddle/vision/models/lenet.py +++ b/python/paddle/vision/models/lenet.py @@ -49,7 +49,8 @@ def __init__(self, num_classes=10): if num_classes > 0: self.fc = nn.Sequential( - nn.Linear(400, 120), nn.Linear(120, 84), nn.Linear(84, 10)) + nn.Linear(400, 120), + nn.Linear(120, 84), nn.Linear(84, num_classes)) def forward(self, inputs): x = self.features(inputs) From 6d8d3d4c22ba0bbed57912ca831a26e5340d1c92 Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Tue, 17 Nov 2020 11:59:10 +0100 Subject: [PATCH 38/56] [oneDNN] Layer norm bf16 kernel (#28619) --- .../framework/ir/graph_pattern_detector.cc | 4 +- paddle/fluid/operators/layer_norm_op.cc | 35 ++++ .../operators/mkldnn/layer_norm_mkldnn_op.cc | 177 ++++++++++++++++++ paddle/fluid/platform/mkldnn_reuse.h | 6 + .../mkldnn/test_layer_norm_bf16_mkldnn_op.py | 146 +++++++++++++++ .../mkldnn/test_layer_norm_mkldnn_op.py | 151 +++++++++++++++ .../mkldnn/test_sum_bf16_mkldnn_op.py | 2 +- .../tests/unittests/test_layer_norm_op.py | 11 +- tools/static_mode_white_list.py | 2 + 9 files changed, 528 insertions(+), 6 deletions(-) create mode 100644 paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc create mode 100644 python/paddle/fluid/tests/unittests/mkldnn/test_layer_norm_bf16_mkldnn_op.py create mode 100644 python/paddle/fluid/tests/unittests/mkldnn/test_layer_norm_mkldnn_op.py diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index 5704dd09cf287e..5546a0e372603e 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -2102,8 +2102,8 @@ PDNode *patterns::Bfloat16Placement::operator()( const std::unordered_set &bfloat16_enabled_op_types) { std::unordered_set supported_op_types = std::unordered_set({"concat", "conv2d", "fusion_gru", "gelu", - "reshape2", "softmax", "sum", - "transpose2"}); + "layer_norm", "reshape2", "softmax", + "sum", "transpose2"}); if (!bfloat16_enabled_op_types.empty()) { supported_op_types = bfloat16_enabled_op_types; } diff --git a/paddle/fluid/operators/layer_norm_op.cc b/paddle/fluid/operators/layer_norm_op.cc index 89d8b57505da24..79e3d3b90a93ae 100644 --- a/paddle/fluid/operators/layer_norm_op.cc +++ b/paddle/fluid/operators/layer_norm_op.cc @@ -15,6 +15,10 @@ limitations under the License. */ #include "paddle/fluid/operators/layer_norm_op.h" #include +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/fluid/platform/mkldnn_helper.h" +#endif + namespace paddle { namespace operators { @@ -91,6 +95,25 @@ class LayerNormOp : public framework::OperatorWithKernel { ctx->SetOutputDim("Variance", {left}); ctx->ShareLoD("X", "Y"); } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext &ctx) const { + framework::LibraryType library = framework::LibraryType::kPlain; + framework::DataLayout layout = framework::DataLayout::kAnyLayout; + +#ifdef PADDLE_WITH_MKLDNN + if (library == framework::LibraryType::kPlain && + platform::CanMKLDNNBeUsed(ctx)) { + library = framework::LibraryType::kMKLDNN; + layout = framework::DataLayout::kMKLDNN; + } +#endif + + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace(), + layout, library); + } }; class LayerNormOpMaker : public framework::OpProtoAndCheckerMaker { @@ -134,6 +157,18 @@ class LayerNormOpMaker : public framework::OpProtoAndCheckerMaker { "greater than zero. But received [%d].", begin_norm_axis)); }); + AddAttr("use_mkldnn", + "(bool, default false) Only used in mkldnn kernel") + .SetDefault(false); + AddAttr( + "mkldnn_data_type", + "(string, default \"float32\"). Data type of mkldnn kernel") + .SetDefault("float32") + .InEnum({"float32", "bfloat16"}); + AddAttr("is_test", + "(bool, default false) Set to true for inference only, false " + "for training. Some layers may run faster when this is true.") + .SetDefault(false); AddComment(R"DOC( Assume feature vectors exist on dimensions diff --git a/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc new file mode 100644 index 00000000000000..22261e948aa7b6 --- /dev/null +++ b/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc @@ -0,0 +1,177 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/layer_norm_op.h" +#include "paddle/fluid/platform/mkldnn_reuse.h" + +namespace paddle { +namespace operators { + +template +class LayerNormMKLDNNHandler + : public platform::MKLDNNHandlerT { + public: + LayerNormMKLDNNHandler(const std::vector& dims, const float& epsilon, + const dnnl::normalization_flags& flags, + const bool& is_test, const MKLDNNMemoryFormat fmt, + const platform::MKLDNNDeviceContext& dev_ctx, + platform::Place cpu_place, + const std::string& uniq_name) + : platform::MKLDNNHandlerT( + dev_ctx, dev_ctx.GetEngine(), cpu_place, + platform::CreateKey(dims, uniq_name)) { + if (!this->isCached()) { + auto md = dnnl::memory::desc(dims, platform::MKLDNNGetDataType(), fmt); + if (!is_test) { + // TODO(grygielski) Delete forcing stats_md after DNNL 1.2 is introduced + auto stats_md = dnnl::memory::desc( + {begin(dims), end(dims) - 1}, platform::MKLDNNGetDataType(), + platform::MKLDNNFormatForSize(dims.size() - 1, + MKLDNNMemoryFormat::nchw)); + this->AcquireForwardPrimitiveDescriptor( + dnnl::prop_kind::forward_training, md, stats_md, epsilon, flags); + } else { + this->AcquireForwardPrimitiveDescriptor( + dnnl::prop_kind::forward_inference, md, epsilon, flags); + } + } + } + + std::shared_ptr AcquireScaleShiftMemory() { + return this->AcquireMemoryFromPrimitive("@scaleshift_mem_p"); + } + + std::shared_ptr AcquireScaleShiftMemory( + std::vector& scaleshift_data) { + // scaleshift_data comes from temporary buffer so we need to copy it into + // created memory primitivie + auto scaleshift_mem = this->AcquireMemoryFromPrimitive( + this->fwd_pd_->weights_desc(), "@scaleshift_mem_p"); + auto data_ptr = scaleshift_mem->get_data_handle(); + std::size_t num_bytes = scaleshift_data.size() * sizeof(float); + std::memcpy(data_ptr, scaleshift_data.data(), num_bytes); + return scaleshift_mem; + } + + std::shared_ptr AcquireMeanMemory(framework::Tensor* mean) { + T* mean_data = mean->mutable_data(this->place_, + this->fwd_pd_->mean_desc().get_size()); + return this->AcquireMemoryFromPrimitive(this->fwd_pd_->mean_desc(), + mean_data, "@mean_mem_p"); + } + + std::shared_ptr AcquireVarianceMemory( + framework::Tensor* variance) { + T* variance_data = variance->mutable_data( + this->place_, this->fwd_pd_->variance_desc().get_size()); + return this->AcquireMemoryFromPrimitive(this->fwd_pd_->variance_desc(), + variance_data, "@variance_mem_p"); + } +}; + +template +class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* scale = ctx.Input("Scale"); + auto* bias = ctx.Input("Bias"); + auto* y = ctx.Output("Y"); + + const float epsilon = ctx.Attr("epsilon"); + const auto begin_norm_axis = ctx.Attr("begin_norm_axis"); + const bool is_test = ctx.Attr("is_test"); + + auto& dev_ctx = + ctx.template device_context(); + + auto src_tz = paddle::framework::vectorize(x->dims()); + PADDLE_ENFORCE_EQ(begin_norm_axis, (src_tz.size() - 1), + platform::errors::InvalidArgument( + "MKL-DNN Layer Norm supports only last logical " + "axis:%d as begin_norm_axis.", + (src_tz.size() - 1))); + + y->mutable_data(ctx.GetPlace()); + const bool with_scaleshift = (scale && bias); + dnnl::normalization_flags flags{}; + + if (with_scaleshift) { + flags |= dnnl::normalization_flags::use_scale_shift; + } + + LayerNormMKLDNNHandler handler(src_tz, epsilon, flags, is_test, + x->format(), dev_ctx, ctx.GetPlace(), + ctx.OutputName("Y")); + + auto src_memory = handler.AcquireSrcMemory(x); + auto dst_memory = handler.AcquireDstMemory(y); + + auto layer_norm_p = handler.AcquireForwardPrimitive(); + + dnnl::stream astream(dev_ctx.GetEngine()); + std::unordered_map args; + + args.insert({DNNL_ARG_SRC, *src_memory}); + args.insert({DNNL_ARG_DST, *dst_memory}); + + if (!is_test) { + auto* mean = ctx.Output("Mean"); + auto* var = ctx.Output("Variance"); + mean->mutable_data(ctx.GetPlace()); + var->mutable_data(ctx.GetPlace()); + + auto mean_memory = handler.AcquireMeanMemory(mean); + auto variance_memory = handler.AcquireVarianceMemory(var); + + args.insert({DNNL_ARG_MEAN, *mean_memory}); + args.insert({DNNL_ARG_VARIANCE, *variance_memory}); + } + + auto scaleshift_memory = handler.AcquireScaleShiftMemory(); + if (with_scaleshift) { + if (scaleshift_memory == nullptr || !is_test) { + auto scale_tz = paddle::framework::vectorize(scale->dims()); + const unsigned int C = scale_tz[0]; + + // MKLDNN requires a single piece of memory for scale and shift/bias + // data + std::vector scaleshift_data; + scaleshift_data.reserve(2 * C); + scaleshift_data.insert(scaleshift_data.begin(), scale->data(), + scale->data() + C); + + scaleshift_data.insert(scaleshift_data.end(), bias->data(), + bias->data() + C); + + scaleshift_memory = handler.AcquireScaleShiftMemory(scaleshift_data); + } + args.insert({DNNL_ARG_SCALE_SHIFT, *scaleshift_memory}); + } + + layer_norm_p->execute(astream, args); + astream.wait(); + + y->set_layout(DataLayout::kMKLDNN); + y->set_format(platform::GetMKLDNNFormat(*dst_memory)); + } +}; + +} // namespace operators +} // namespace paddle + +// TODO(jczaja): Enable FP32 when performance is good +namespace ops = paddle::operators; +REGISTER_OP_KERNEL(layer_norm, MKLDNN, ::paddle::platform::CPUPlace, + ops::LayerNormMKLDNNOpKernel); diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 54f8cb1dc88428..8649b90321c13b 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -190,6 +190,12 @@ class MKLDNNHandlerT { } } + std::shared_ptr AcquireMemoryFromPrimitive( + const std::string& suffix) { + return std::static_pointer_cast( + dev_ctx_.GetBlob(key_ + suffix)); + } + std::shared_ptr AcquireMemoryFromPrimitive( mkldnn::memory::desc md, void* ptr, const std::string& suffix) { const auto local_key = key_ + suffix; diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_layer_norm_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_layer_norm_bf16_mkldnn_op.py new file mode 100644 index 00000000000000..dc881a57521124 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_layer_norm_bf16_mkldnn_op.py @@ -0,0 +1,146 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# from paddle.fluid.tests.unittests.test_layer_norm_op import * +from __future__ import print_function +import unittest +import numpy as np + +from operator import mul +import paddle.fluid.core as core +import paddle.fluid as fluid +from paddle import enable_static +from functools import reduce + +from paddle.fluid.tests.unittests.mkldnn.test_layer_norm_mkldnn_op import TestLayerNormMKLDNNOp +from paddle.fluid.tests.unittests.mkldnn.test_layer_norm_mkldnn_op import _reference_layer_norm_naive +from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16 +from paddle.fluid.tests.unittests.op_test import _set_use_system_allocator + +np.random.random(123) + +_set_use_system_allocator(True) + + +@unittest.skipIf(not core.supports_bfloat16(), + "place does not support BF16 evaluation") +class TestLayerNormBF16MKLDNNOp(TestLayerNormMKLDNNOp): + def __assert_close(self, tensor, np_array, msg, rtol=2e-02, atol=2): + self.assertTrue( + np.allclose( + np.array(tensor), np_array, rtol=rtol, atol=atol), msg) + + def check_forward(self, + shape, + begin_norm_axis, + with_scale_bias=True, + with_is_test=False): + # attr + epsilon = 0.00001 + x_shape = shape + D = reduce(mul, x_shape[begin_norm_axis:len(x_shape)], 1) + scale_shape = [D] + + np.random.seed(123) + x = np.random.random_sample(x_shape).astype(np.float32) + x_bf16 = convert_float_to_uint16(x) + + if with_scale_bias: + scale = np.random.random_sample(scale_shape).astype(np.float32) + bias = np.random.random_sample(scale_shape).astype(np.float32) + else: + scale = np.array([]) + bias = np.array([]) + + # reference forward & backward + y, mean, variance = _reference_layer_norm_naive(x, scale, bias, epsilon, + begin_norm_axis) + + y_bf16 = convert_float_to_uint16(y) + + var_dict = locals() + var_names = ['x_bf16', 'mean', 'variance', 'y_bf16'] + if with_scale_bias: + var_names.append('scale') + var_names.append('bias') + ground_truth = {name: var_dict[name] for name in var_names} + + program = fluid.Program() + with fluid.program_guard(program): + block = program.global_block() + + # scale and bias are fp32 and other vars are of bf16 + for name in ground_truth: + if name == 'x_bf16' or name == 'y_bf16': + block.create_var( + name=name, + dtype='uint16', + shape=ground_truth[name].shape) + else: + block.create_var( + name=name, + dtype='float32', + shape=ground_truth[name].shape) + + inputs = {"X": block.var('x_bf16')} + if with_scale_bias: + inputs["Scale"] = block.var('scale') + inputs["Bias"] = block.var('bias') + + block.append_op( + type="layer_norm", + inputs=inputs, + outputs={ + "Y": block.var('y_bf16'), + "Mean": block.var('mean'), # share the same memory + "Variance": block.var('variance'), # share the same memory + }, + attrs={ + "epsilon": epsilon, + "begin_norm_axis": begin_norm_axis, + "use_mkldnn": True, + "is_test": with_is_test + }) + + exe = fluid.Executor(core.CPUPlace()) + + input_list = ['x_bf16'] + if with_scale_bias: + input_list.append('scale') + input_list.append('bias') + + out = exe.run(program, + feed={name: var_dict[name] + for name in input_list}, + fetch_list=['y_bf16', 'mean', 'variance']) + self.__assert_close(y_bf16, out[0], "y_bf16", 2) + if not with_is_test: + self.__assert_close(mean, out[1], "mean") + self.__assert_close(variance, out[2], "variance", 1e-3) + + def test_check_forward_with_is_test(self): + self.check_forward( + shape=[2, 3, 4, 5], begin_norm_axis=3, with_is_test=True) + + # TODO (jczaja): Enable those to test when enabling training using bf16 + def test_check_forward_with_scale_and_bias(self): + pass + + def test_check_forward_without_scale_and_bias(self): + pass + + +if __name__ == "__main__": + enable_static() + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_layer_norm_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_layer_norm_mkldnn_op.py new file mode 100644 index 00000000000000..d20fb003ee93b4 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_layer_norm_mkldnn_op.py @@ -0,0 +1,151 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# from paddle.fluid.tests.unittests.test_layer_norm_op import * +from __future__ import print_function +import unittest +import numpy as np + +from operator import mul +import paddle.fluid.core as core +import paddle.fluid as fluid +from paddle import enable_static +from functools import reduce + +from paddle.fluid.tests.unittests.op_test import _set_use_system_allocator + +np.random.random(123) + +_set_use_system_allocator(True) + + +def _reference_layer_norm_naive(x, scale, beta, epsilon, begin_norm_axis=1): + x_shape = x.shape + N = reduce(mul, x_shape[0:begin_norm_axis], 1) + D = reduce(mul, x_shape[begin_norm_axis:len(x_shape)], 1) + x.shape = [N, D] + if scale.size == 0 and beta.size == 0: + scale = np.ones([1, D]) + beta = np.zeros([1, D]) + else: + scale = scale.reshape([1, D]) + beta = beta.reshape([1, D]) + + mean = np.mean(x, axis=1) + var = np.var(x, axis=1) + epsilon + output = scale * np.divide((x - mean.reshape([N, 1])), + (np.sqrt(var)).reshape([N, 1])) + beta + + x.shape, output.shape = x_shape, x_shape + return output, mean, var + + +class TestLayerNormMKLDNNOp(unittest.TestCase): + def setUp(self): + self.use_mkldnn = True + + def __assert_close(self, tensor, np_array, msg, atol=1e-4): + self.assertTrue(np.allclose(np.array(tensor), np_array, atol=atol), msg) + + def check_forward(self, + shape, + begin_norm_axis, + with_scale_bias=True, + with_is_test=False): + # attr + epsilon = 0.00001 + x_shape = shape + D = reduce(mul, x_shape[begin_norm_axis:len(x_shape)], 1) + scale_shape = [D] + + np.random.seed(123) + x = np.random.random_sample(x_shape).astype(np.float32) + + if with_scale_bias: + scale = np.random.random_sample(scale_shape).astype(np.float32) + bias = np.random.random_sample(scale_shape).astype(np.float32) + else: + scale = np.array([]) + bias = np.array([]) + + # reference forward & backward + y, mean, variance = _reference_layer_norm_naive(x, scale, bias, epsilon, + begin_norm_axis) + + var_dict = locals() + var_names = ['x', 'mean', 'variance', 'y'] + if with_scale_bias: + var_names.append('scale') + var_names.append('bias') + ground_truth = {name: var_dict[name] for name in var_names} + + program = fluid.Program() + with fluid.program_guard(program): + block = program.global_block() + + for name in ground_truth: + block.create_var( + name=name, dtype='float32', shape=ground_truth[name].shape) + + inputs = {"X": block.var('x')} + if with_scale_bias: + inputs["Scale"] = block.var('scale') + inputs["Bias"] = block.var('bias') + + block.append_op( + type="layer_norm", + inputs=inputs, + outputs={ + "Y": block.var('y'), + "Mean": block.var('mean'), # share the same memory + "Variance": block.var('variance'), # share the same memory + }, + attrs={ + "epsilon": epsilon, + "begin_norm_axis": begin_norm_axis, + "use_mkldnn": True, + "is_test": with_is_test + }) + + exe = fluid.Executor(core.CPUPlace()) + + input_list = ['x'] + if with_scale_bias: + input_list.append('scale') + input_list.append('bias') + + out = exe.run(program, + feed={name: var_dict[name] + for name in input_list}, + fetch_list=['y', 'mean', 'variance']) + self.__assert_close(y, out[0], "y") + if not with_is_test: + self.__assert_close(mean, out[1], "mean") + self.__assert_close(variance, out[2], "variance", 1e-3) + + def test_check_forward_with_scale_and_bias(self): + self.check_forward(shape=[2, 3, 4, 5], begin_norm_axis=3) + + def test_check_forward_without_scale_and_bias(self): + self.check_forward( + shape=[2, 3, 4, 5], begin_norm_axis=3, with_scale_bias=False) + + def test_check_forward_with_is_test(self): + self.check_forward( + shape=[2, 3, 4, 5], begin_norm_axis=3, with_is_test=True) + + +if __name__ == "__main__": + enable_static() + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_sum_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_sum_bf16_mkldnn_op.py index 05d739ae1f3f34..c71baad0c7040a 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_sum_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_sum_bf16_mkldnn_op.py @@ -25,7 +25,7 @@ @unittest.skipIf(not core.supports_bfloat16(), "place does not support BF16 evaluation") -class TestSumMKLDNN(TestSumOp): +class TestSumBF16MKLDNN(TestSumOp): def setUp(self): self.op_type = "sum" self.use_mkldnn = True diff --git a/python/paddle/fluid/tests/unittests/test_layer_norm_op.py b/python/paddle/fluid/tests/unittests/test_layer_norm_op.py index 8df7ea35ec1164..d2c07c185dd992 100644 --- a/python/paddle/fluid/tests/unittests/test_layer_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_layer_norm_op.py @@ -117,8 +117,12 @@ def check_forward_backward(self, begin_norm_axis, has_scale=True, has_bias=True, - y_grad_scale=1.0): - def test_with_place(place, shape, begin_norm_axis): + y_grad_scale=1.0, + use_mkldnn=False): + def test_with_place(place, + shape, + begin_norm_axis, + use_mkldnn=use_mkldnn): # attr epsilon = 0.00001 x_shape = shape @@ -181,7 +185,8 @@ def test_with_place(place, shape, begin_norm_axis): }, attrs={ "epsilon": epsilon, - "begin_norm_axis": begin_norm_axis + "begin_norm_axis": begin_norm_axis, + "use_mkldnn": use_mkldnn }) # generate backward op_desc grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( diff --git a/tools/static_mode_white_list.py b/tools/static_mode_white_list.py index 1f153442aff6c6..5fe1cc722e8753 100644 --- a/tools/static_mode_white_list.py +++ b/tools/static_mode_white_list.py @@ -293,6 +293,8 @@ 'test_label_smooth_op', 'test_lamb_op', 'test_layer_norm_op', + 'test_layer_norm_mkldnn_op', + 'test_layer_norm_bf16_mkldnn_op', 'test_layer_norm_op_v2', 'test_learning_rate_scheduler', 'test_linear_interp_op', From e4f9415338d27fff9bf34424acdd8c19608be5c6 Mon Sep 17 00:00:00 2001 From: lilong12 Date: Tue, 17 Nov 2020 20:17:14 +0800 Subject: [PATCH 39/56] update doc, test=document_fix (#28498) --- python/paddle/distributed/collective.py | 6 ------ python/paddle/tensor/manipulation.py | 3 --- 2 files changed, 9 deletions(-) diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py index b631f7bbe9d110..cb3c37975ddf44 100644 --- a/python/paddle/distributed/collective.py +++ b/python/paddle/distributed/collective.py @@ -107,7 +107,6 @@ def broadcast(tensor, src, group=0): import paddle from paddle.distributed import init_parallel_env - paddle.disable_static() paddle.set_device('gpu:%d'%paddle.distributed.ParallelEnv().dev_id) init_parallel_env() if paddle.distributed.ParallelEnv().local_rank == 0: @@ -165,7 +164,6 @@ def all_reduce(tensor, op=ReduceOp.SUM, group=0): from paddle.distributed import ReduceOp from paddle.distributed import init_parallel_env - paddle.disable_static() paddle.set_device('gpu:%d'%paddle.distributed.ParallelEnv().dev_id) init_parallel_env() if paddle.distributed.ParallelEnv().local_rank == 0: @@ -240,7 +238,6 @@ def reduce(tensor, dst, op=ReduceOp.SUM, group=0): import paddle from paddle.distributed import init_parallel_env - paddle.disable_static() paddle.set_device('gpu:%d'%paddle.distributed.ParallelEnv().dev_id) init_parallel_env() if paddle.distributed.ParallelEnv().local_rank == 0: @@ -323,7 +320,6 @@ def all_gather(tensor_list, tensor, group=0): import paddle from paddle.distributed import init_parallel_env - paddle.disable_static() paddle.set_device('gpu:%d'%paddle.distributed.ParallelEnv().dev_id) init_parallel_env() tensor_list = [] @@ -397,7 +393,6 @@ def scatter(tensor, tensor_list=None, src=0, group=0): import paddle from paddle.distributed import init_parallel_env - paddle.disable_static() paddle.set_device('gpu:%d'%paddle.distributed.ParallelEnv().dev_id) init_parallel_env() if paddle.distributed.ParallelEnv().local_rank == 0: @@ -463,7 +458,6 @@ def barrier(group=0): import paddle from paddle.distributed import init_parallel_env - paddle.disable_static() paddle.set_device('gpu:%d'%paddle.distributed.ParallelEnv().dev_id) init_parallel_env() paddle.distributed.barrier() diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index bdda90315ac9c7..a0e5e681c76e9f 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -1098,7 +1098,6 @@ def tile(x, repeat_times, name=None): import paddle - paddle.disable_static() data = paddle.to_tensor([1, 2, 3], dtype='int32') out = paddle.tile(data, repeat_times=[2, 1]) np_out = out.numpy() @@ -1193,8 +1192,6 @@ def expand_as(x, y, name=None): import paddle - paddle.disable_static() - data_x = paddle.to_tensor([1, 2, 3], 'int32') data_y = paddle.to_tensor([[1, 2, 3], [4, 5, 6]], 'int32') out = paddle.expand_as(data_x, data_y) From b6f86b849138d56a2e170a619979e81420bdea19 Mon Sep 17 00:00:00 2001 From: Huihuang Zheng Date: Tue, 17 Nov 2020 20:47:42 +0800 Subject: [PATCH 40/56] Fix Using "isinstance" in Loop, test=develop (#28641) Fix a bug that used in PaddleGAN model which used `isinstance` in a for loop --- .../dygraph_to_static/loop_transformer.py | 23 ++++ .../dygraph_to_static/test_isinstance.py | 112 ++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 python/paddle/fluid/tests/unittests/dygraph_to_static/test_isinstance.py diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/loop_transformer.py b/python/paddle/fluid/dygraph/dygraph_to_static/loop_transformer.py index 0b6c7c45b3804f..b25ff8360be0ca 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/loop_transformer.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/loop_transformer.py @@ -22,6 +22,7 @@ from paddle.fluid.dygraph.dygraph_to_static.static_analysis import AstNodeWrapper from paddle.fluid.dygraph.dygraph_to_static.static_analysis import NodeVarType from paddle.fluid.dygraph.dygraph_to_static.static_analysis import StaticAnalysisVisitor +from paddle.fluid.dygraph.dygraph_to_static.utils import ast_to_source_code from paddle.fluid.dygraph.dygraph_to_static.utils import generate_name_node from paddle.fluid.dygraph.dygraph_to_static.utils import get_attribute_full_name from paddle.fluid.dygraph.dygraph_to_static.utils import ForNodeVisitor @@ -84,6 +85,9 @@ def __init__(self, root_node): self.condition_vars = defaultdict(set) self.in_condition = False + # Some names are types, we shouldn't record them as loop var names. + self.type_vars = set() + self.static_analysis_visitor = StaticAnalysisVisitor(root_node) self.node_to_wrapper_map = self.static_analysis_visitor.get_node_to_wrapper_map( ) @@ -249,6 +253,18 @@ def visit_While(self, node): self.generic_visit(node) self.current_loop.pop() + def visit_Call(self, node): + # Store type var names such as "isinstance(x, some_type_names)" and + # Remove them later + if isinstance(node.func, gast.Name) and node.func.id == 'isinstance': + type_node = node.args[1] + if isinstance(type_node, gast.Tuple): + for element in type_node.elts: + self.type_vars.add(ast_to_source_code(element)) + else: + self.type_vars.add(ast_to_source_code(type_node)) + self.generic_visit(node) + def _var_nodes_to_names(self, node_set, ctx_filter_set=None): ret = set() for node in node_set: @@ -290,6 +306,7 @@ def _remove_unnecessary_vars(self, loop_vars, loop_node): Remove unnecessary vars from before_loop_vars, after_loop_vars or in_loop_vars about loop_node. 1. Remove target vars of gast.For from before_loop_vars or after_loop_vars. 2. Remove vars only in gast.comprehension. + 3. Remove vars that are type names, for example: "isinstance(x, var_type_name)" :param loop_vars: before_loop_vars, after_loop_vars or in_loop_vars of loop_node. :param loop_node: Current loop node. """ @@ -361,6 +378,12 @@ def _remove_unnecessary_vars(self, loop_vars, loop_node): target_vars_of_for_node.add(var) removed_vars = target_vars_of_for_node | vars_of_list_generator + + # 3. Remove var type names which are stored in self.type_vars + for var in loop_vars: + if ast_to_source_code(var) in self.type_vars: + removed_vars.add(var) + return loop_vars - removed_vars diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_isinstance.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_isinstance.py new file mode 100644 index 00000000000000..a838ac6842aba9 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_isinstance.py @@ -0,0 +1,112 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License + +import numpy as np +import unittest + +import paddle +import paddle.nn as nn + + +class SimpleReturnLayer(nn.Layer): + def forward(self, x): + return x + + +class AddAttrLayer(nn.Layer): + def __init__(self): + super(AddAttrLayer, self).__init__() + self.attr = None + + def forward(self, x): + out = x + self.attr + return out + + +class IsInstanceLayer(nn.Layer): + def __init__(self, layer): + super(IsInstanceLayer, self).__init__() + self.layer = layer + + @paddle.jit.to_static + def forward(self, x): + if isinstance(self.layer, (AddAttrLayer, )): + self.layer.attr = x + res = self.layer(x) + return res + + +class SequentialLayer(nn.Layer): + def __init__(self, layers): + super(SequentialLayer, self).__init__() + self.layers = nn.LayerList(layers) + + @paddle.jit.to_static + def forward(self, x): + res = x + for layer in self.layers: + if isinstance(layer, AddAttrLayer): + layer.attr = x + res = layer(res) + return res + + +def train(model, to_static): + prog_trans = paddle.jit.ProgramTranslator.get_instance() + prog_trans.enable(to_static) + + x = paddle.ones(shape=[2, 3], dtype='int32') + out = model(x) + + return out.numpy() + + +class TestIsinstance(unittest.TestCase): + def test_isinstance_simple_return_layer(self): + model = IsInstanceLayer(SimpleReturnLayer()) + self._test_model(model) + + def test_isinstance_add_attr_layer(self): + model = IsInstanceLayer(AddAttrLayer()) + self._test_model(model) + + def test_sequential_layer(self): + layers = [] + for i in range(5): + layers.append(SimpleReturnLayer()) + layers.append(AddAttrLayer()) + model = SequentialLayer(layers) + self._test_model(model) + + def _test_model(self, model): + st_out = train(model, to_static=True) + dy_out = train(model, to_static=False) + self.assertTrue( + np.allclose(dy_out, st_out), + msg="dy_out:\n {}\n st_out:\n{}".format(dy_out, st_out)) + + +if __name__ == "__main__": + unittest.main() From 11e32baf1e0f83374e3563e7541aab085e18b1cc Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Wed, 18 Nov 2020 00:18:17 +0800 Subject: [PATCH 41/56] Add matmtl_v2 to amp list (#28693) * add matmtl_v2 to amp list * support dygraph --- python/paddle/fluid/contrib/mixed_precision/fp16_lists.py | 1 + python/paddle/fluid/dygraph/amp/auto_cast.py | 1 + 2 files changed, 2 insertions(+) diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py b/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py index a9f080c514dff0..8c467a4969e295 100644 --- a/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py +++ b/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py @@ -74,6 +74,7 @@ def _update_list(self): white_list = { 'conv2d', 'matmul', + 'matmul_v2', 'mul', } diff --git a/python/paddle/fluid/dygraph/amp/auto_cast.py b/python/paddle/fluid/dygraph/amp/auto_cast.py index ffb4d9f16f29f3..4ff08337875c03 100644 --- a/python/paddle/fluid/dygraph/amp/auto_cast.py +++ b/python/paddle/fluid/dygraph/amp/auto_cast.py @@ -27,6 +27,7 @@ WHITE_LIST = { 'conv2d', 'matmul', + 'matmul_v2', 'mul', } From 5050e761b83446b9642cf1fe586c7ae59c5ec2d8 Mon Sep 17 00:00:00 2001 From: Bai Yifan Date: Wed, 18 Nov 2020 11:20:04 +0800 Subject: [PATCH 42/56] Support user-defined activation/weight quantize and preprocess. (#28570) * support user-defined quant and preprocess --- .../slim/quantization/imperative/qat.py | 46 +++- .../slim/quantization/imperative/quant_nn.py | 118 ++++++--- .../tests/test_imperative_qat_user_defined.py | 248 ++++++++++++++++++ 3 files changed, 373 insertions(+), 39 deletions(-) create mode 100644 python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py index 7fc177e7ad7654..cae24177232675 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py @@ -59,7 +59,11 @@ def __init__(self, weight_quantize_type='abs_max', activation_quantize_type='moving_average_abs_max', moving_rate=0.9, - quantizable_layer_type=['Conv2D', 'Linear']): + quantizable_layer_type=['Conv2D', 'Linear'], + weight_preprocess_layer=None, + act_preprocess_layer=None, + weight_quantize_layer=None, + act_quantize_layer=None): """ The constructor for ImperativeQuantAware. @@ -81,7 +85,28 @@ def __init__(self, quantizable_op_type(list[str]): List the type of layers that will be quantized. Default is ['Conv2D', 'Linear']. The quantizable_op_type in QuantizationFreezePass and ConvertToInt8Pass must be the same as this. - + weight_preprocess_layer(paddle.nn.Layer, optional): A paddle Layer that defines how to preprocess + weight before quantization. Using this can quickly test if user's + preprocess method works or not. The input is non-quantized + weight and function returns processed weight to be quantized. + If None, the weight will be quantized directly. Default is None. + act_preprocess_layer(paddle.nn.Layer, optional): A paddle Layer that defines how to preprocess + activation before quantization. Using this can quickly test if user's + preprocess method works or not. The input is non-quantized + activation and function returns processed activation to be quantized. + If None, the activation will be quantized directly. Default is None. + weight_quantize_layer(paddle.nn.Layer, optional): A paddle Layer that defines how to quantize weight. + Using this can quickly test if user's quantization method works or not. + In this layer, user should both define quantization method and + dequantization method, that is, the function's input is non-quantized + weight and returns dequantized weight. If None, will use + quantization op defined by 'weight_quantize_type'. Default is None. + act_quantize_layer(paddle.nn.Layer, optional): A paddle Layer that defines how to quantize activation. + Using this can quickly test if user's quantization method works or not. + In this layer, user should both define quantization method and + dequantization method, that is, the function's input is non-quantized + activation and returns dequantized activation. If None, will use + quantization op defined by 'activation_quantize_type'. Default is None. Examples: .. code-block:: python @@ -118,6 +143,19 @@ def __init__(self, self._activation_bits = activation_bits self._moving_rate = moving_rate + self._weight_pre_layer = weight_preprocess_layer + self._act_pre_layer = act_preprocess_layer + self._weight_quant_layer = weight_quantize_layer + self._act_quant_layer = act_quantize_layer + + t_check = lambda method: method is None or issubclass(method, dygraph.layers.Layer) + assert t_check( + self._weight_pre_layer), "weight_preprocess should be nn.Layer" + assert t_check(self._act_pre_layer), "act_preprocess should be nn.Layer" + assert t_check( + self._weight_quant_layer), "weight_quantize should be nn.Layer" + assert t_check(self._act_quant_layer), "act_quantize should be nn.Layer" + quant_type = { 'abs_max', 'moving_average_abs_max', 'channel_wise_abs_max' } @@ -189,7 +227,9 @@ def _get_quantized_counterpart(self, layer): quantized_layer = quant_nn.__dict__[quantized_counterpart[index]]( layer, self._weight_bits, self._activation_bits, self._moving_rate, - self._weight_quantize_type, self._activation_quantize_type) + self._weight_quantize_type, self._activation_quantize_type, + self._weight_pre_layer, self._act_pre_layer, + self._weight_quant_layer, self._act_quant_layer) return quantized_layer diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py index bbaae56439eb66..79138febd0ce87 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py @@ -332,7 +332,11 @@ def __init__(self, activation_bits=8, moving_rate=0.9, weight_quantize_type='abs_max', - activation_quantize_type='abs_max'): + activation_quantize_type='abs_max', + weight_pre_layer=None, + act_pre_layer=None, + weight_quant_layer=None, + act_quant_layer=None): super(QuantizedConv2D, self).__init__() # For Conv2D self._groups = getattr(layer, '_groups') @@ -347,26 +351,44 @@ def __init__(self, self.bias = getattr(layer, 'bias') # For FakeQuant self._conv2d_quant_axis = 0 - self._fake_quant_weight = _get_fake_quant_type( - weight_quantize_type, - name=self.weight.name, - moving_rate=moving_rate, - quant_bits=weight_bits, - dtype=self._dtype, - quant_on_weight=True, - channel_num=self.weight.shape[self._conv2d_quant_axis], - quant_axis=self._conv2d_quant_axis) - self._fake_quant_input = _get_fake_quant_type( - activation_quantize_type, - name=layer.full_name(), - moving_rate=moving_rate, - quant_bits=activation_bits, - dtype=self._dtype, - quant_on_weight=False) + + if weight_quant_layer is not None: + self._fake_quant_weight = weight_quant_layer() + else: + self._fake_quant_weight = _get_fake_quant_type( + weight_quantize_type, + name=self.weight.name, + moving_rate=moving_rate, + quant_bits=weight_bits, + dtype=self._dtype, + quant_on_weight=True, + channel_num=self.weight.shape[self._conv2d_quant_axis], + quant_axis=self._conv2d_quant_axis) + if act_quant_layer is not None: + self._fake_quant_input = act_quant_layer() + else: + self._fake_quant_input = _get_fake_quant_type( + activation_quantize_type, + name=layer.full_name(), + moving_rate=moving_rate, + quant_bits=activation_bits, + dtype=self._dtype, + quant_on_weight=False) + + self._act_preprocess = act_pre_layer( + ) if act_pre_layer is not None else None + self._weight_preprocess = weight_pre_layer( + ) if weight_pre_layer is not None else None def forward(self, input): + if self._act_preprocess is not None: + input = self._act_preprocess(input) quant_input = self._fake_quant_input(input) - quant_weight = self._fake_quant_weight(self.weight) + + weight = self.weight + if self._weight_preprocess is not None: + weight = self._weight_preprocess(self.weight) + quant_weight = self._fake_quant_weight(weight) if in_dygraph_mode() and self._l_type == 'conv2d': attrs = ('strides', self._stride, 'paddings', self._padding, @@ -428,7 +450,11 @@ def __init__(self, activation_bits=8, moving_rate=0.9, weight_quantize_type='abs_max', - activation_quantize_type='abs_max'): + activation_quantize_type='abs_max', + weight_pre_layer=None, + act_pre_layer=None, + weight_quant_layer=None, + act_quant_layer=None): super(QuantizedLinear, self).__init__() # For Linear self._act = getattr(layer, '_act') @@ -437,26 +463,46 @@ def __init__(self, self.bias = getattr(layer, 'bias') # For FakeQuant self._linear_quant_axis = 1 - self._fake_quant_weight = _get_fake_quant_type( - weight_quantize_type, - name=self.weight.name, - moving_rate=moving_rate, - quant_bits=weight_bits, - dtype=self._dtype, - quant_on_weight=True, - channel_num=self.weight.shape[self._linear_quant_axis], - quant_axis=self._linear_quant_axis) - self._fake_quant_input = _get_fake_quant_type( - activation_quantize_type, - name=layer.full_name(), - moving_rate=moving_rate, - quant_bits=activation_bits, - dtype=self._dtype, - quant_on_weight=False) + + if weight_quant_layer is not None: + self._fake_quant_weight = weight_quant_layer() + else: + self._fake_quant_weight = _get_fake_quant_type( + weight_quantize_type, + name=self.weight.name, + moving_rate=moving_rate, + quant_bits=weight_bits, + dtype=self._dtype, + quant_on_weight=True, + channel_num=self.weight.shape[self._linear_quant_axis], + quant_axis=self._linear_quant_axis) + + if act_quant_layer is not None: + self._fake_quant_input = act_quant_layer() + else: + self._fake_quant_input = _get_fake_quant_type( + activation_quantize_type, + name=layer.full_name(), + moving_rate=moving_rate, + quant_bits=activation_bits, + dtype=self._dtype, + quant_on_weight=False) + + self._act_preprocess = act_pre_layer( + ) if act_pre_layer is not None else None + self._weight_preprocess = weight_pre_layer( + ) if weight_pre_layer is not None else None def forward(self, input): + if self._act_preprocess is not None: + input = self._act_preprocess(input) quant_input = self._fake_quant_input(input) - quant_weight = self._fake_quant_weight(self.weight) + + weight = self.weight + if self._weight_preprocess is not None: + weight = self._weight_preprocess(self.weight) + quant_weight = self._fake_quant_weight(weight) + if in_dygraph_mode(): pre_bias = _varbase_creator(dtype=input.dtype) core.ops.matmul(quant_input, quant_weight, pre_bias, 'transpose_X', diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py new file mode 100644 index 00000000000000..29b69bbe0f8ea2 --- /dev/null +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py @@ -0,0 +1,248 @@ +# copyright (c) 2020 paddlepaddle authors. all rights reserved. +# +# licensed under the apache license, version 2.0 (the "license"); +# you may not use this file except in compliance with the license. +# you may obtain a copy of the license at +# +# http://www.apache.org/licenses/license-2.0 +# +# unless required by applicable law or agreed to in writing, software +# distributed under the license is distributed on an "as is" basis, +# without warranties or conditions of any kind, either express or implied. +# see the license for the specific language governing permissions and +# limitations under the license. + +from __future__ import print_function + +import os +import numpy as np +import random +import unittest +import logging +import paddle +import paddle.nn as nn +from paddle.optimizer import Adam +from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware +from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass +from paddle.nn import Sequential +from paddle.fluid.dygraph import Conv2D +from paddle.nn import Pool2D +from paddle.fluid.dygraph import Linear +from paddle.fluid.log_helper import get_logger + +os.environ["CPU_NUM"] = "1" + +_logger = get_logger( + __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') + + +class PACT(nn.Layer): + def __init__(self, init_value=20): + super(PACT, self).__init__() + alpha_attr = paddle.ParamAttr( + name=self.full_name() + ".pact", + initializer=paddle.nn.initializer.Constant(value=init_value)) + self.alpha = self.create_parameter( + shape=[1], attr=alpha_attr, dtype='float32') + + def forward(self, x): + out_left = paddle.nn.functional.relu(x - self.alpha) + out_right = paddle.nn.functional.relu(-self.alpha - x) + x = x - out_left + out_right + return x + + +class CustomQAT(nn.Layer): + def __init__(self): + super(CustomQAT, self).__init__() + attr = paddle.ParamAttr( + initializer=paddle.nn.initializer.Constant(value=1.0)) + self.u_param = self.create_parameter( + shape=[1], attr=attr, dtype='float32') + self.l_param = self.create_parameter( + shape=[1], attr=attr, dtype='float32') + self.alpha_param = self.create_parameter( + shape=[1], attr=attr, dtype='float32') + self.upper = self.create_parameter( + shape=[1], attr=attr, dtype='float32') + self.upper.stop_gradient = True + self.lower = self.create_parameter( + shape=[1], attr=attr, dtype='float32') + self.lower.stop_gradient = True + + def forward(self, x): + def clip(x, upper, lower): + x = x + paddle.nn.functional.relu(lower - x) + x = x - paddle.nn.functional.relu(x - upper) + return x + + def phi_function(x, mi, alpha, delta): + s = 1 / (1 - alpha) + k = paddle.log(2 / alpha - 1) * (1 / delta) + x = (paddle.tanh((x - mi) * k)) * s + return x + + def dequantize(x, lower_bound, delta, interval): + x = ((x + 1) / 2 + interval) * delta + lower_bound + return x + + bit = 8 + bit_range = 2**bit - 1 + + paddle.assign(self.upper * 0.9 + self.u_param * 0.1, self.upper) + paddle.assign(self.lower * 0.9 + self.l_param * 0.1, self.lower) + x = clip(x, self.upper, self.lower) + delta = (self.upper - self.lower) / bit_range + interval = (x - self.lower) / delta + mi = (interval + 0.5) * delta + self.l_param + x = phi_function(x, mi, self.alpha_param, delta) + x = dequantize(x, self.l_param, delta, interval) + return x + + +class ImperativeLenet(paddle.nn.Layer): + def __init__(self, num_classes=10, classifier_activation='softmax'): + super(ImperativeLenet, self).__init__() + self.features = Sequential( + Conv2D( + num_channels=1, + num_filters=6, + filter_size=3, + stride=1, + padding=1), + Pool2D( + pool_size=2, pool_type='max', pool_stride=2), + Conv2D( + num_channels=6, + num_filters=16, + filter_size=5, + stride=1, + padding=0), + Pool2D( + pool_size=2, pool_type='max', pool_stride=2)) + + self.fc = Sequential( + Linear( + input_dim=400, output_dim=120), + Linear( + input_dim=120, output_dim=84), + Linear( + input_dim=84, output_dim=num_classes, + act=classifier_activation)) + + def forward(self, inputs): + x = self.features(inputs) + + x = paddle.flatten(x, 1) + x = self.fc(x) + return x + + +class TestUserDefinedActPreprocess(unittest.TestCase): + def setUp(self): + _logger.info("test act_preprocess") + self.imperative_qat = ImperativeQuantAware(act_preprocess_layer=PACT) + + def test_quant_aware_training(self): + imperative_qat = self.imperative_qat + seed = 1 + np.random.seed(seed) + paddle.static.default_main_program().random_seed = seed + paddle.static.default_startup_program().random_seed = seed + lenet = ImperativeLenet() + fixed_state = {} + param_init_map = {} + for name, param in lenet.named_parameters(): + p_shape = param.numpy().shape + p_value = param.numpy() + if name.endswith("bias"): + value = np.zeros_like(p_value).astype('float32') + else: + value = np.random.normal( + loc=0.0, scale=0.01, + size=np.product(p_shape)).reshape(p_shape).astype('float32') + fixed_state[name] = value + param_init_map[param.name] = value + lenet.set_dict(fixed_state) + + imperative_qat.quantize(lenet) + adam = Adam(learning_rate=0.001, parameters=lenet.parameters()) + dynamic_loss_rec = [] + + def train(model): + adam = Adam(learning_rate=0.001, parameters=model.parameters()) + epoch_num = 1 + for epoch in range(epoch_num): + model.train() + for batch_id, data in enumerate(train_reader()): + x_data = np.array([x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + y_data = np.array( + [x[1] for x in data]).astype('int64').reshape(-1, 1) + + img = paddle.to_tensor(x_data) + label = paddle.to_tensor(y_data) + out = model(img) + acc = paddle.metric.accuracy(out, label, k=1) + loss = nn.functional.loss.cross_entropy(out, label) + avg_loss = paddle.mean(loss) + avg_loss.backward() + adam.minimize(avg_loss) + model.clear_gradients() + if batch_id % 50 == 0: + _logger.info( + "Train | At epoch {} step {}: loss = {:}, acc= {:}". + format(epoch, batch_id, + avg_loss.numpy(), acc.numpy())) + break + + def test(model): + model.eval() + avg_acc = [[], []] + for batch_id, data in enumerate(test_reader()): + x_data = np.array([x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + y_data = np.array( + [x[1] for x in data]).astype('int64').reshape(-1, 1) + + img = paddle.to_tensor(x_data) + label = paddle.to_tensor(y_data) + + out = model(img) + acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) + acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) + avg_acc[0].append(acc_top1.numpy()) + avg_acc[1].append(acc_top5.numpy()) + if batch_id % 100 == 0: + _logger.info( + "Test | step {}: acc1 = {:}, acc5 = {:}".format( + batch_id, acc_top1.numpy(), acc_top5.numpy())) + + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=512, drop_last=True) + test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=512) + train(lenet) + test(lenet) + + +class TestUserDefinedWeightPreprocess(TestUserDefinedActPreprocess): + def setUp(self): + _logger.info("test weight_preprocess") + self.imperative_qat = ImperativeQuantAware(weight_preprocess_layer=PACT) + + +class TestUserDefinedActQuantize(TestUserDefinedActPreprocess): + def setUp(self): + _logger.info("test act_quantize") + self.imperative_qat = ImperativeQuantAware(act_quantize_layer=CustomQAT) + + +class TestUserDefinedWeightQuantize(TestUserDefinedActPreprocess): + def setUp(self): + _logger.info("test weight_quantize") + self.imperative_qat = ImperativeQuantAware( + weight_quantize_layer=CustomQAT) + + +if __name__ == '__main__': + unittest.main() From 358d6bc90f0c4b463d66bc7ae25116b0f814dfb0 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Wed, 18 Nov 2020 12:46:35 +0800 Subject: [PATCH 43/56] Fix test_weight_decay_extend random failed on windows (#28643) * add debuging code * change seed & add debug message --- .../contrib/tests/test_weight_decay_extend.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py b/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py index 65d400c63262bf..9eb2fe6cbd1a15 100644 --- a/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py +++ b/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py @@ -24,6 +24,8 @@ paddle.enable_static() +SEED = 2020 + def fake_imdb_reader(word_dict_size, sample_num, @@ -87,6 +89,11 @@ def bow_net(data, class TestWeightDecay(unittest.TestCase): def setUp(self): + # set seed + np.random.seed(SEED) + paddle.seed(SEED) + paddle.framework.random._manual_program_seed(SEED) + # configs self.word_dict_len = 5147 batch_size = 2 reader = fake_imdb_reader(self.word_dict_len, batch_size * 100) @@ -114,8 +121,6 @@ def run_program(self, place, feed_list): return param_sum def check_weight_decay(self, place, model): - paddle.seed(1) - paddle.framework.random._manual_program_seed(1) main_prog = fluid.framework.Program() startup_prog = fluid.framework.Program() @@ -137,8 +142,6 @@ def check_weight_decay(self, place, model): return param_sum def check_weight_decay2(self, place, model): - paddle.seed(1) - paddle.framework.random._manual_program_seed(1) main_prog = fluid.framework.Program() startup_prog = fluid.framework.Program() @@ -173,7 +176,12 @@ def test_weight_decay(self): param_sum2 = self.check_weight_decay2(place, model) for i in range(len(param_sum1)): - assert np.isclose(a=param_sum1[i], b=param_sum2[i], rtol=5e-5) + self.assertTrue( + np.allclose(param_sum1[i], param_sum2[i]), + "Current place: {}, i: {}, sum1: {}, sum2: {}".format( + place, i, param_sum1[i][~np.isclose(param_sum1[ + i], param_sum2[i])], param_sum2[i][~np.isclose( + param_sum1[i], param_sum2[i])])) if __name__ == '__main__': From f78211d08233b6b66b706979f867a462d8aa8920 Mon Sep 17 00:00:00 2001 From: chalsliu <45041955+chalsliu@users.noreply.github.com> Date: Wed, 18 Nov 2020 12:51:06 +0800 Subject: [PATCH 44/56] Add delta file for precision test --- tools/get_pr_ut.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/get_pr_ut.py b/tools/get_pr_ut.py index 970f89551c579b..b166573ffe4db5 100644 --- a/tools/get_pr_ut.py +++ b/tools/get_pr_ut.py @@ -65,6 +65,12 @@ def get_pr_ut(self): else: ut_list.extend(file_ut_map.get(f)) ut_list = list(set(ut_list)) + cmd = 'wget -q --no-check-certificate https://sys-p0.bj.bcebos.com/prec/prec_delta' + os.system(cmd) + with open('prec_delta') as delta: + for ut in delta: + ut_list.append(ut.rstrip('\r\n')) + return ' '.join(ut_list) From 858ffa0c8b6ff6c10b7f62a0a47d56fa7e37362f Mon Sep 17 00:00:00 2001 From: Guo Sheng Date: Wed, 18 Nov 2020 13:04:10 +0800 Subject: [PATCH 45/56] Fix the dropout setting when not initialized in rnn_op. (#28561) test=develop --- paddle/fluid/operators/rnn_op.cu.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/operators/rnn_op.cu.cc b/paddle/fluid/operators/rnn_op.cu.cc index 568db79722324f..f38bfd5968884c 100644 --- a/paddle/fluid/operators/rnn_op.cu.cc +++ b/paddle/fluid/operators/rnn_op.cu.cc @@ -89,15 +89,16 @@ class RNNDescriptors { // ------------------- cudnn dropout descriptors --------------------- size_t state_size; - if (!is_test_ && !dropout_state->IsInitialized()) { + bool is_initialized = dropout_state->IsInitialized(); + if (!is_test_ && !is_initialized) { PADDLE_ENFORCE_CUDA_SUCCESS( platform::dynload::cudnnDropoutGetStatesSize(handle, &state_size)); dropout_state->mutable_data({static_cast(state_size)}, place); } - dropout_desc_.descriptor(handle, place, dropout_state->IsInitialized(), - dropout_prob_, is_test_ ? nullptr : dropout_state, - seed_, state_size); + dropout_desc_.descriptor(handle, place, is_initialized, dropout_prob_, + is_test_ ? nullptr : dropout_state, seed_, + state_size); // ------------------- cudnn rnn descriptors --------------------- #if CUDNN_VERSION >= 6000 From 7eeb99fe025c0946014956300930461bf3ad8fe9 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Wed, 18 Nov 2020 13:09:21 +0800 Subject: [PATCH 46/56] Add basic hook classes for dygraph & implement reduce hook (#28584) * add base hook classes and reduce hook impl * fix constructor typo * polish comment format * refactor baisc hook class design * polish design details --- paddle/fluid/imperative/basic_engine.cc | 21 +- paddle/fluid/imperative/basic_engine.h | 3 + .../fluid/imperative/gradient_accumulator.cc | 9 +- .../fluid/imperative/gradient_accumulator.h | 49 ++++ paddle/fluid/imperative/hooks.h | 233 +++++++++++++++++ paddle/fluid/imperative/op_base.h | 2 +- paddle/fluid/imperative/tests/CMakeLists.txt | 1 + paddle/fluid/imperative/tests/test_hooks.cc | 240 ++++++++++++++++++ paddle/fluid/imperative/variable_wrapper.h | 82 ++++++ 9 files changed, 637 insertions(+), 3 deletions(-) create mode 100644 paddle/fluid/imperative/hooks.h create mode 100644 paddle/fluid/imperative/tests/test_hooks.cc diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc index 9ad30506b2c3a0..e9214a8fea8174 100644 --- a/paddle/fluid/imperative/basic_engine.cc +++ b/paddle/fluid/imperative/basic_engine.cc @@ -114,6 +114,16 @@ void BasicEngine::PrepareGradAccumulators(const OpBase& op) { accumulator->IncreaseRefCnt(); + if (var->HasLeafHooks()) { + VLOG(3) << "Grad variable wrapper (" << var->Name() + << ") has leaf grad hooks."; + PADDLE_ENFORCE_NE(var->HasGradNode(), true, + platform::errors::PermissionDenied( + "Only leaf Tensor's gradient can append hook to " + "Gradientaccumulator.")); + accumulator->SetPostHooks(var->GetLeafHooks()); + } + VLOG(3) << "Prepare to acccumulate variable grad " << var->Name() << "(" << var.get() << ") with reference count " << accumulator->RefCnt(); @@ -204,6 +214,7 @@ void BasicEngine::Execute() { var->Name())); if (!var->OverridedStopGradient() && iter->second->RefCnt() == 1) { + no_need_run_accumulators_.emplace_back(iter->second.get()); continue; } @@ -220,12 +231,19 @@ void BasicEngine::Execute() { cur_op.place()); } - // Step 2: Sum Gradient + // Step 2: Sum Gradient & Call Accumulator Hooks + for (auto* accumulator : no_need_run_accumulators_) { + if (accumulator->HasPostHooks()) { + accumulator->CallBackwardPostHooks(); + } + } + for (auto& pair : need_accu_var_list_) { pair.first->Add(std::move(pair.second), cur_op.id()); } need_accu_var_list_.clear(); + no_need_run_accumulators_.clear(); VLOG(3) << "Remove op after op " << cur_op.Type() << " runs"; if (!retain_graph_) { @@ -258,6 +276,7 @@ void BasicEngine::Clear() { node_deps_.clear(); accumulators_.clear(); need_accu_var_list_.clear(); + no_need_run_accumulators_.clear(); } } // namespace imperative diff --git a/paddle/fluid/imperative/basic_engine.h b/paddle/fluid/imperative/basic_engine.h index 0906dd4f9236ec..92e7fe7eb8cd79 100644 --- a/paddle/fluid/imperative/basic_engine.h +++ b/paddle/fluid/imperative/basic_engine.h @@ -49,6 +49,9 @@ class BasicEngine : public Engine { accumulators_; std::vector>> need_accu_var_list_; + // Accumulators that does not need to perform accumulation operations, + // the ref_cnt_=1, corresponding to need_accu_var_list_ + std::vector no_need_run_accumulators_; bool retain_graph_; }; diff --git a/paddle/fluid/imperative/gradient_accumulator.cc b/paddle/fluid/imperative/gradient_accumulator.cc index 07f1868b7fa299..00fd18e5e2564c 100644 --- a/paddle/fluid/imperative/gradient_accumulator.cc +++ b/paddle/fluid/imperative/gradient_accumulator.cc @@ -401,13 +401,15 @@ void EagerGradientAccumulator::Add(std::shared_ptr var, } } } - ++cur_cnt_; if (var_->Var().IsType()) { var_->SetType(framework::proto::VarType::LOD_TENSOR); } else if (var_->Var().IsType()) { var_->SetType(framework::proto::VarType::SELECTED_ROWS); } + + // Increase count & call post hooks + IncreaseCurCnt(); } void SortedGradientAccumulator::Add(std::shared_ptr var, @@ -520,6 +522,11 @@ void SortedGradientAccumulator::Add(std::shared_ptr var, } else if (var_->Var().IsType()) { var_->SetType(framework::proto::VarType::SELECTED_ROWS); } + + // call post hooks + if (HasPostHooks()) { + CallBackwardPostHooks(); + } } } // namespace imperative diff --git a/paddle/fluid/imperative/gradient_accumulator.h b/paddle/fluid/imperative/gradient_accumulator.h index a8ccb2a38d3c33..2d0cc6e8921590 100644 --- a/paddle/fluid/imperative/gradient_accumulator.h +++ b/paddle/fluid/imperative/gradient_accumulator.h @@ -17,6 +17,8 @@ #include #include #include + +#include "paddle/fluid/imperative/hooks.h" #include "paddle/fluid/imperative/layer.h" namespace paddle { @@ -35,9 +37,43 @@ class GradientAccumulator { inline size_t RefCnt() const { return ref_cnt_; } + /* Hook related methods */ + inline bool HasPostHooks() const { return !post_hooks_.expired(); } + + void SetPostHooks(const std::shared_ptr& hooks) { + PADDLE_ENFORCE_NOT_NULL( + hooks, platform::errors::InvalidArgument( + "The hook set to GradientAccumulator is nullptr.")); + + auto shared_hooks = post_hooks_.lock(); + if (shared_hooks != hooks) { + PADDLE_ENFORCE_EQ( + shared_hooks, nullptr, + platform::errors::PermissionDenied( + "Cannot set post hooks twice to GradientAccumulator.")); + post_hooks_ = hooks; + } + } + + // call backward post hooks, such as reduce hook + void CallBackwardPostHooks() { + PADDLE_ENFORCE_NE( + post_hooks_.expired(), true, + platform::errors::NotFound( + "The post hooks of GradientAccumulator for Tensor `%s` expired.", + var_->Name())); + auto shared_hooks = post_hooks_.lock(); + for (const auto& hook : shared_hooks->backward_hooks()) { + VLOG(3) << "call gradient accumulator backward hooks."; + (*hook)(var_); + } + } + protected: VariableWrapper* var_; size_t ref_cnt_{0}; + + std::weak_ptr post_hooks_; }; class EagerGradientAccumulator : public GradientAccumulator { @@ -47,6 +83,19 @@ class EagerGradientAccumulator : public GradientAccumulator { void Add(std::shared_ptr var, size_t trace_id, bool unchange_input) override; + private: + inline bool AccumulateCompleted() const { return cur_cnt_ == ref_cnt_; } + + void IncreaseCurCnt() { + ++cur_cnt_; + VLOG(3) << "IncreaseCurCnt: cur_cnt " << cur_cnt_ << ", ref_cnt " + << ref_cnt_; + // After all tmp gradient being accumulated to grad var, run hooks + if (AccumulateCompleted() && HasPostHooks()) { + CallBackwardPostHooks(); + } + } + private: size_t cur_cnt_{0}; }; diff --git a/paddle/fluid/imperative/hooks.h b/paddle/fluid/imperative/hooks.h new file mode 100644 index 00000000000000..1211ec6ae6c7bd --- /dev/null +++ b/paddle/fluid/imperative/hooks.h @@ -0,0 +1,233 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +#include "paddle/fluid/imperative/type_defs.h" +#include "paddle/fluid/platform/macros.h" + +namespace paddle { +namespace imperative { + +class VariableWrapper; + +/** [ Basic hook classes ] + * s + * @brief OpBasePreHook is executed before the grad OpBase is executed, + * taking the input of the current grad OpBase as input, and + * executing python hooks (user-defined) or C++ hooks (developer-defined) + * to achieve the purpose of custom operations on the interior VarBase + * gradient. + * + * @note OpBasePreHook will not change the input gradient VarBase. + * + * @note [Why need to be OpBase `PreHook`, why not `PostHook`?] + * + * If set OpBase post hook, when the op executed end, the op's output + * gradient may not be the final state, because it may need other op's + * gradient output to accumulated to it. But before op can be executed, + * the gradient output must have been accumulated to final value. + * + * @note [Why only can be used for interior VarBase?] + * + * Because the leaf VarBase's GradVarBase has no GradOpNode, so leaf + * GradVarBase has no next OpBase to executed, so if need to deal with + * the leaf GradVarBase, cannot use OpBasePreHook. For this case, we + * deal with by GradAccumulatorPostHook. + */ +class OpBasePreHook { + public: + virtual ~OpBasePreHook() = default; + virtual VariableWrapperList operator()( + const VariableWrapperList& grad_inputs) = 0; +}; + +/** + * @brief GradAccumulatorPostHook is the Hook that operates on the current + * gradientafter the GradientAccumulator has accumulated the gradient. + * Leaf GradVarBase has no next OpBase, if we want to register hook + * for it, we also need to wait until the leaf GradVarBase accumulation + * is completed, so we can add post hook to GradientAccumulator. + * + * @note GradAccumulatorPostHook will change the grad VarBase value. + * + * @note Only allow leaf VarBase hold GradientAccumulatorPostHook. + */ +class GradAccumulatorPostHook { + public: + virtual ~GradAccumulatorPostHook() = default; + virtual void operator()(VariableWrapper* var) = 0; +}; + +/** [ Hook for cpp functions ] + * + * Here we design three C++ hooksï¼› + * 1. CppOpBasePreHook (Implement later): + * - used for developer-defined C++ interior VarBase hooks + * 2. CppGradAccumulatorPostHook (Implement later): + * - used for developer-defined C++ leaf VarBase hooks + * 3. LambdaGradAccumulatorPostHook: + * - used for VarBase reduce in parallel training + * + * @note [Why need two types of GradAccumulatorPostHook? ] + * + * There are two types of gradient accumulation: + * 1. Gradient accumulation in same batch + * 2. Gradient accumulation across batchs + * The order of execution between Hooks and gradient accumulation: + * + * [ Gradient accumulation in same batch] + * | + * [ leaf GradVarBase hooks ] + * | + * [ Gradient accumulation across batchs ] + * | + * [ Gradient reduce / allreduce] + * + * Because we currently intend to accumulate these two gradient + * accumulation in one GradientAccumulator, We must distinguish between + * two types of hooks. + * + * And the LambdaGradAccumulatorPostHook does not allow users to register + * directly, and is currently only used to support the reduce strategy of + * parallel multi-card training. + */ +class LambdaGradAccumulatorPostHook : public GradAccumulatorPostHook { + public: + explicit LambdaGradAccumulatorPostHook( + std::function fn) + : fn_(std::move(fn)) {} + + void operator()(VariableWrapper* var) override { fn_(var); } + + private: + std::function fn_; +}; + +/* Hooks for python function: in pybind/imperative.cc */ + +/** Add Python Hooks later: + * - PyOpBasePreHook (Implement later): used for user-defined interior python + * VarBase hooks + * - PyGradAccumulatorPostHook (Implement later): used for user-defined leaf + * python VarBase hooks + */ + +/** [ Hook Pipeline classes ] + * + * @note [Why need hook pipeline classes?] + * + * There are 2 purposes for adding Hook pipeline here: + * + * 1. Make the code implementation cleaner. + * + * If there are no Hook pipeline, we need to add 3 hook vector into + * VariableWrapper, 1 hook vector into OpBase, 2 hook vector into + * GradientAccumulator, like: + * + * - VariableWrapper: + * std::vector> + * interior_var_hooks_; + * std::vector> + * leaf_var_hooks_; + * std::vector> + * backward_hooks_; + * + * - OpBase: + * std::vector> + * interior_var_hooks_; + * + * - GradientAccumulator: + * std::vector> + * leaf_var_hooks_; + * std::vector> + * backward_hooks_; + * + * This seems more complicated, and std::vector> + * is not easy to destruct. + * + * 2. Make the code easier to understand. + * + * From these two packages, we can clearly understand that we + * have two types of Hooks, respectively for the interior + * gradient var and leaf gradient var inside the backward + * calculation graph. + */ + +class InteriorVarHookPipeline { + public: + InteriorVarHookPipeline() = default; + + void add_hook(std::unique_ptr&& hook) { + hooks_.emplace_back(std::move(hook)); + } + + const std::vector>& hooks() const { + return hooks_; + } + + std::vector>& hooks() { return hooks_; } + + private: + std::vector> hooks_; + + DISABLE_COPY_AND_ASSIGN(InteriorVarHookPipeline); +}; + +class LeafVarHookPipeline { + public: + LeafVarHookPipeline() = default; + + void add_hook(std::unique_ptr&& hook) { + hooks_.emplace_back(std::move(hook)); + } + + const std::vector>& hooks() const { + return hooks_; + } + + std::vector>& hooks() { + return hooks_; + } + + void add_backward_hook(std::unique_ptr&& hook) { + backward_hooks_.emplace_back(std::move(hook)); + } + + const std::vector>& backward_hooks() + const { + return backward_hooks_; + } + + std::vector>& backward_hooks() { + return backward_hooks_; + } + + private: + std::vector> hooks_; + // NOTE: the `backward` here means the `whole backward process`, + // the `backward_hooks_` need to be executed after the `whole backward + // process`. + std::vector> backward_hooks_; + + DISABLE_COPY_AND_ASSIGN(LeafVarHookPipeline); +}; + +} // namespace imperative +} // namespace paddle diff --git a/paddle/fluid/imperative/op_base.h b/paddle/fluid/imperative/op_base.h index a4b57c404ce00b..36185af3a25257 100644 --- a/paddle/fluid/imperative/op_base.h +++ b/paddle/fluid/imperative/op_base.h @@ -176,7 +176,7 @@ class OpBase { platform::Place place_; size_t id_{-1UL}; - std::vector> backward_hooks_; + std::weak_ptr pre_hooks_; }; class GradOpNode { diff --git a/paddle/fluid/imperative/tests/CMakeLists.txt b/paddle/fluid/imperative/tests/CMakeLists.txt index e3c82474e09e2d..a8de1e6b039268 100644 --- a/paddle/fluid/imperative/tests/CMakeLists.txt +++ b/paddle/fluid/imperative/tests/CMakeLists.txt @@ -11,3 +11,4 @@ cc_test(test_gradient_accmulator SRCS test_gradient_accmulator.cc DEPS memcpy se cc_test(test_layer SRCS test_layer.cc DEPS layer proto_desc operator op_registry variable_helper mul_op memcpy) cc_test(test_prepare_op SRCS test_prepare_op.cc DEPS prepared_operator op_info split_op layer concat_and_split activation_op place) cc_test(test_tracer SRCS test_tracer.cc DEPS tracer layer proto_desc operator op_registry variable_helper mul_op reduce_sum_op elementwise_add_op memcpy) +cc_test(test_hooks SRCS test_hooks.cc DEPS tracer basic_engine layer proto_desc operator op_registry variable_helper mul_op elementwise_add_op memcpy) diff --git a/paddle/fluid/imperative/tests/test_hooks.cc b/paddle/fluid/imperative/tests/test_hooks.cc new file mode 100644 index 00000000000000..7bf5f876681bab --- /dev/null +++ b/paddle/fluid/imperative/tests/test_hooks.cc @@ -0,0 +1,240 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "glog/logging.h" +#include "gtest/gtest.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/imperative/basic_engine.h" +#include "paddle/fluid/imperative/hooks.h" +#include "paddle/fluid/imperative/tracer.h" +#include "paddle/fluid/memory/memcpy.h" + +namespace platform = paddle::platform; +namespace framework = paddle::framework; +namespace memory = paddle::memory; + +DECLARE_bool(sort_sum_gradient); + +namespace paddle { +namespace imperative { + +using vb_vector = std::vector>; +using var_pair = std::pair; + +TEST(TestHooks, TestGradVarLeafBackwardHook) { + // 1. prepare + Tracer tracer; + std::shared_ptr x(new VarBase(true, "x")); + std::shared_ptr y(new VarBase(true, "y")); + std::shared_ptr out(new VarBase(true, "out")); + x->SetOverridedStopGradient(false); + y->SetOverridedStopGradient(false); + + platform::CPUPlace place; + std::vector src_data(10, 2.0); + std::vector x_dims = {2, 5}; + std::vector y_dims = {5, 2}; + + auto* x_tensor = x->MutableVar()->GetMutable(); + auto* y_tensor = y->MutableVar()->GetMutable(); + + x_tensor->Resize(framework::make_ddim(x_dims)); + auto* mutable_x = x_tensor->mutable_data(place); + memory::Copy(place, mutable_x, place, src_data.data(), + sizeof(float) * src_data.size()); + + y_tensor->Resize(framework::make_ddim(y_dims)); + auto* mutable_y = y_tensor->mutable_data(place); + memory::Copy(place, mutable_y, place, src_data.data(), + sizeof(float) * src_data.size()); + + var_pair x_pair = var_pair("X", vb_vector(1, x)); + var_pair y_pair = var_pair("Y", vb_vector(1, y)); + var_pair out_pair = var_pair("Out", vb_vector(1, out)); + + NameVarBaseMap ins = {x_pair, y_pair}; + NameVarBaseMap outs = {out_pair}; + framework::AttributeMap mul_attr_map; + mul_attr_map["use_mkldnn"] = false; + + // add GradAccumulatorPostHook + auto x_var_wrapper = x->SharedVar(); + x_var_wrapper->AddGradVarLeafBackwardHook( + std::unique_ptr( + new LambdaGradAccumulatorPostHook([=](VariableWrapper* grad) { + auto* grad_tensor = + grad->MutableVar()->GetMutable(); + for (int i = 0; i < grad_tensor->numel(); ++i) { + grad_tensor->mutable_data(place)[i] *= 2.0; + } + }))); + + // 2. forward + tracer.TraceOp("mul", ins, outs, mul_attr_map, place, true); + + ASSERT_EQ(x->GradVarBase()->GradOpNum(), 0UL); + ASSERT_EQ(y->GradVarBase()->GradOpNum(), 0UL); + ASSERT_EQ(out->GradVarBase()->GradOpNum(), 1UL); + + // 3. backward + BasicEngine engine; + engine.Init(out.get()); + engine.Execute(); + + framework::LoDTensor x_grad; + framework::TensorCopySync(x->GradVar().Get(), place, + &x_grad); + for (int i = 0; i < x_grad.numel(); ++i) { + ASSERT_EQ(x_grad.data()[i], 8.0); + } + + framework::LoDTensor y_grad; + framework::TensorCopySync(y->GradVar().Get(), place, + &y_grad); + + for (int i = 0; i < y_grad.numel(); ++i) { + ASSERT_EQ(y_grad.data()[i], 4.0); + } +} + +void GradVarLeafBackwardHookWithGradAccmulatedTest() { + // 1. prepare + Tracer tracer; + std::shared_ptr x(new VarBase(true, "x")); + std::shared_ptr y(new VarBase(true, "y")); + std::shared_ptr z(new VarBase(true, "z")); + std::shared_ptr out_xy(new VarBase(true, "out_xy")); + std::shared_ptr out_xz(new VarBase(true, "out_xz")); + std::shared_ptr out(new VarBase(true, "out")); + x->SetOverridedStopGradient(false); + y->SetOverridedStopGradient(false); + z->SetOverridedStopGradient(false); + + platform::CPUPlace place; + std::vector src_data(10, 2.0); + std::vector x_dims = {2, 5}; + std::vector y_dims = {5, 2}; + std::vector z_dims = {5, 2}; + + auto* x_tensor = x->MutableVar()->GetMutable(); + auto* y_tensor = y->MutableVar()->GetMutable(); + auto* z_tensor = z->MutableVar()->GetMutable(); + + x_tensor->Resize(framework::make_ddim(x_dims)); + auto* mutable_x = x_tensor->mutable_data(place); + memory::Copy(place, mutable_x, place, src_data.data(), + sizeof(float) * src_data.size()); + + y_tensor->Resize(framework::make_ddim(y_dims)); + auto* mutable_y = y_tensor->mutable_data(place); + memory::Copy(place, mutable_y, place, src_data.data(), + sizeof(float) * src_data.size()); + + z_tensor->Resize(framework::make_ddim(z_dims)); + auto* mutable_z = z_tensor->mutable_data(place); + memory::Copy(place, mutable_z, place, src_data.data(), + sizeof(float) * src_data.size()); + + // add GradAccumulatorPostHook + auto x_var_wrapper = x->SharedVar(); + x_var_wrapper->AddGradVarLeafBackwardHook( + std::unique_ptr( + new LambdaGradAccumulatorPostHook([=](VariableWrapper* grad) { + auto* grad_tensor = + grad->MutableVar()->GetMutable(); + for (int i = 0; i < grad_tensor->numel(); ++i) { + grad_tensor->mutable_data(place)[i] *= 2.0; + } + }))); + + // 2. forward + var_pair x_pair = var_pair("X", vb_vector(1, x)); + var_pair y_pair = var_pair("Y", vb_vector(1, y)); + var_pair out_xy_pair = var_pair("Out", vb_vector(1, out_xy)); + NameVarBaseMap ins = {x_pair, y_pair}; + NameVarBaseMap outs = {out_xy_pair}; + framework::AttributeMap mul_attr_map; + mul_attr_map["use_mkldnn"] = false; + tracer.TraceOp("mul", ins, outs, mul_attr_map, place, true); + + var_pair z_pair = var_pair("Y", vb_vector(1, z)); + var_pair out_xz_pair = var_pair("Out", vb_vector(1, out_xz)); + ins = {x_pair, z_pair}; + outs = {out_xz_pair}; + tracer.TraceOp("mul", ins, outs, mul_attr_map, place, true); + + var_pair xy_pair = var_pair("X", vb_vector(1, out_xy)); + var_pair xz_pair = var_pair("Y", vb_vector(1, out_xz)); + var_pair out_pair = var_pair("Out", vb_vector(1, out)); + ins = {xy_pair, xz_pair}; + outs = {out_pair}; + framework::AttributeMap add_attr_map; + tracer.TraceOp("elementwise_add", ins, outs, add_attr_map, place, true); + + ASSERT_EQ(x->GradVarBase()->GradOpNum(), 0UL); + ASSERT_EQ(y->GradVarBase()->GradOpNum(), 0UL); + ASSERT_EQ(z->GradVarBase()->GradOpNum(), 0UL); + ASSERT_EQ(out->GradVarBase()->GradOpNum(), 1UL); + + // 3. backward + BasicEngine engine; + engine.Init(out.get()); + engine.Execute(); + + framework::LoDTensor x_grad; + framework::TensorCopySync(x->GradVar().Get(), place, + &x_grad); + for (int i = 0; i < x_grad.numel(); ++i) { + ASSERT_EQ(x_grad.data()[i], 16.0); + } + + framework::LoDTensor y_grad; + framework::TensorCopySync(y->GradVar().Get(), place, + &y_grad); + + for (int i = 0; i < y_grad.numel(); ++i) { + ASSERT_EQ(y_grad.data()[i], 4.0); + } + + framework::LoDTensor z_grad; + framework::TensorCopySync(z->GradVar().Get(), place, + &z_grad); + + for (int i = 0; i < z_grad.numel(); ++i) { + ASSERT_EQ(z_grad.data()[i], 4.0); + } +} + +TEST(TestHooks, TestGradVarLeafBackwardHookWithGradAccmulated) { + GradVarLeafBackwardHookWithGradAccmulatedTest(); +} + +TEST(TestHooks, TestGradVarLeafBackwardHookWithSortedGradAccmulated) { + FLAGS_sort_sum_gradient = true; + GradVarLeafBackwardHookWithGradAccmulatedTest(); + FLAGS_sort_sum_gradient = false; +} + +} // namespace imperative +} // namespace paddle + +USE_OP(mul); +USE_OP(mul_grad); +USE_OP(elementwise_add); +USE_OP(elementwise_add_grad); diff --git a/paddle/fluid/imperative/variable_wrapper.h b/paddle/fluid/imperative/variable_wrapper.h index d730ddc12d1053..e9b1ccc860df0f 100644 --- a/paddle/fluid/imperative/variable_wrapper.h +++ b/paddle/fluid/imperative/variable_wrapper.h @@ -16,11 +16,16 @@ #include #include +#include + #include "paddle/fluid/framework/variable.h" +#include "paddle/fluid/imperative/hooks.h" namespace paddle { namespace imperative { +class InteriorVarHookPipeline; +class LeafVarHookPipeline; class VarBase; class GradOpNode; @@ -133,6 +138,42 @@ class VariableWrapper { } } + /* Hook related method: only can be call by GradVarBase */ + + bool HasInteriorHooks() const { return interior_hooks_ != nullptr; } + + bool HasLeafHooks() const { return leaf_hooks_ != nullptr; } + + void AddGradVarInteriorHook(std::unique_ptr&& hook) { + auto interior_hooks = GetGradVarInteriorHooksSafely(); + interior_hooks->add_hook(std::move(hook)); + } + + void AddGradVarLeafHook(std::unique_ptr&& hook) { + auto leaf_hooks = GetGradVarLeafHooksSafely(); + leaf_hooks->add_hook(std::move(hook)); + } + + void AddGradVarLeafBackwardHook( + std::unique_ptr&& hook) { + auto leaf_hooks = GetGradVarLeafHooksSafely(); + leaf_hooks->add_backward_hook(std::move(hook)); + } + + const std::shared_ptr& GetInteriorHooks() const { + return interior_hooks_; + } + + std::shared_ptr& GetInteriorHooks() { + return interior_hooks_; + } + + const std::shared_ptr& GetLeafHooks() const { + return leaf_hooks_; + } + + std::shared_ptr& GetLeafHooks() { return leaf_hooks_; } + private: void SetGradVar(const std::shared_ptr& var) { auto shared_var = grad_var_.lock(); @@ -159,6 +200,41 @@ class VariableWrapper { } } + /* Hook related private methods */ + std::shared_ptr GetGradVarSafely() const { + auto shared_grad_var = grad_var_.lock(); + PADDLE_ENFORCE_NOT_NULL( + shared_grad_var, + platform::errors::PermissionDenied( + "Cannot add gradient hook on Tensor without gradient.")); + return shared_grad_var; + } + + std::shared_ptr& GetGradVarInteriorHooksSafely() { + auto shared_grad_var = GetGradVarSafely(); + PADDLE_ENFORCE_EQ(HasGradNode(), true, + platform::errors::PermissionDenied( + "Only interior Tensor in backward can register " + "interior gradient hook.")); + if (shared_grad_var->interior_hooks_ == nullptr) { + shared_grad_var->interior_hooks_ = + std::make_shared(); + } + return shared_grad_var->interior_hooks_; + } + + std::shared_ptr& GetGradVarLeafHooksSafely() { + auto shared_grad_var = GetGradVarSafely(); + PADDLE_ENFORCE_EQ( + HasGradNode(), false, + platform::errors::PermissionDenied( + "Only leaf Tensor in backward can register leaf gradient hook.")); + if (shared_grad_var->leaf_hooks_ == nullptr) { + shared_grad_var->leaf_hooks_ = std::make_shared(); + } + return shared_grad_var->leaf_hooks_; + } + private: framework::Variable var_; std::string name_; @@ -173,6 +249,12 @@ class VariableWrapper { std::weak_ptr grad_var_; std::weak_ptr grad_node_; + + // NOTE: only grad var can hold hooks now + // only interior var can hold interior hooks + std::shared_ptr interior_hooks_; + // only leaf var can hold leaf hooks + std::shared_ptr leaf_hooks_; }; } // namespace imperative From db2e6cee620113590ef6f29eb65c495b7bab2d19 Mon Sep 17 00:00:00 2001 From: Shibo Tao <62922815+T8T9@users.noreply.github.com> Date: Wed, 18 Nov 2020 14:05:10 +0800 Subject: [PATCH 47/56] add two paddle-2.0 apis: paddle.static.io.save_inference_model and paddle.static.io.load_inference_model (#28606) * add two apis: paddle.static.io.save_inference_model and paddle.static.io.load_inference_mode, which are campatible with paddle.fluid.io.save_inference_model and paddle.fluid.io.load_inference_model respectively. * add unittest for new save_inference_model and load_inference_model. test=develop * enhance doc. test=develop * add paddle.enable_static() to test_inference_model_io.py. test=develop --- python/paddle/fluid/io.py | 61 +++- .../tests/unittests/rnn/test_rnn_nets.py | 8 +- .../unittests/test_inference_model_io.py | 116 +++++- python/paddle/static/__init__.py | 4 +- python/paddle/static/io.py | 335 ++++++++++++++++++ 5 files changed, 507 insertions(+), 17 deletions(-) create mode 100644 python/paddle/static/io.py diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index bb55aeb70d1f2d..29a6dcb13551a7 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -43,6 +43,8 @@ from .dataloader import * from . import core from .. import compat as cpt +from paddle.utils import deprecated +from paddle.fluid.framework import static_only batch = paddle.batch @@ -82,7 +84,10 @@ def is_parameter(var): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + + paddle.enable_static() param = fluid.default_main_program().global_block().var('fc.w') res = fluid.io.is_parameter(param) """ @@ -103,7 +108,10 @@ def is_persistable(var): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + + paddle.enable_static() param = fluid.default_main_program().global_block().var('fc.b') res = fluid.io.is_persistable(param) """ @@ -137,7 +145,10 @@ def get_program_parameter(program): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + + paddle.enable_static() data = fluid.data(name="img", shape=[64, 784]) w = fluid.layers.create_parameter(shape=[784, 200], dtype='float32', name='fc_w') b = fluid.layers.create_parameter(shape=[200], dtype='float32', name='fc_b') @@ -162,7 +173,10 @@ def get_program_persistable_vars(program): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + + paddle.enable_static() data = fluid.data(name="img", shape=[64, 784]) w = fluid.layers.create_parameter(shape=[784, 200], dtype='float32', name='fc_w') b = fluid.layers.create_parameter(shape=[200], dtype='float32', name='fc_b') @@ -202,7 +216,7 @@ def _load_program_scope(main=None, startup=None, scope=None): yield -def _get_valid_program(main_program): +def _get_valid_program(main_program=None): if main_program is None: main_program = default_main_program() elif isinstance(main_program, CompiledProgram): @@ -268,8 +282,10 @@ def save_vars(executor, Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + paddle.enable_static() main_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(main_prog, startup_prog): @@ -417,8 +433,11 @@ def save_params(executor, dirname, main_program=None, filename=None): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + + paddle.enable_static() params_path = "./my_paddle_model" image = fluid.data(name='img', shape=[None, 28, 28], dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') @@ -465,7 +484,10 @@ def _save_distributed_persistables(executor, dirname, main_program): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + + paddle.enable_static() exe = fluid.Executor(fluid.CPUPlace()) param_path = "./my_paddle_model" t = distribute_transpiler.DistributeTranspiler() @@ -634,8 +656,10 @@ def save_persistables(executor, dirname, main_program=None, filename=None): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + paddle.enable_static() dir_path = "./my_paddle_model" file_name = "persistables" image = fluid.data(name='img', shape=[None, 28, 28], dtype='float32') @@ -711,8 +735,10 @@ def load_vars(executor, Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + paddle.enable_static() main_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(main_prog, startup_prog): @@ -946,8 +972,10 @@ def load_params(executor, dirname, main_program=None, filename=None): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + paddle.enable_static() exe = fluid.Executor(fluid.CPUPlace()) param_path = "./my_paddle_model" prog = fluid.default_main_program() @@ -995,8 +1023,10 @@ def load_persistables(executor, dirname, main_program=None, filename=None): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + paddle.enable_static() exe = fluid.Executor(fluid.CPUPlace()) param_path = "./my_paddle_model" prog = fluid.default_main_program() @@ -1034,7 +1064,10 @@ def _load_distributed_persistables(executor, dirname, main_program=None): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + + paddle.enable_static() exe = fluid.Executor(fluid.CPUPlace()) param_path = "./my_paddle_model" t = distribute_transpiler.DistributeTranspiler() @@ -1160,7 +1193,8 @@ def append_fetch_ops(inference_program, attrs={'col': i}) -@dygraph_not_support +@static_only +@deprecated(since="2.0.0", update_to="paddle.static.save_inference_model") def save_inference_model(dirname, feeded_var_names, target_vars, @@ -1226,8 +1260,10 @@ def save_inference_model(dirname, Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + paddle.enable_static() path = "./infer_model" # User defined network, here a softmax regession example @@ -1370,7 +1406,8 @@ def save_inference_model(dirname, return target_var_name_list -@dygraph_not_support +@static_only +@deprecated(since="2.0.0", update_to="paddle.static.load_inference_model") def load_inference_model(dirname, executor, model_filename=None, @@ -1422,9 +1459,11 @@ def load_inference_model(dirname, Examples: .. code-block:: python + import paddle import paddle.fluid as fluid import numpy as np + paddle.enable_static() # Build the model main_prog = fluid.Program() startup_prog = fluid.Program() @@ -1540,7 +1579,10 @@ def get_parameter_value(para, executor): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + + paddle.enable_static() exe = fluid.Executor(fluid.CPUPlace()) param = fluid.default_main_program().global_block().var('fc.w') p = fluid.io.get_parameter_value(param, exe) @@ -1578,7 +1620,10 @@ def get_parameter_value_by_name(name, executor, program=None): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + + paddle.enable_static() exe = fluid.Executor(fluid.CPUPlace()) p = fluid.io.get_parameter_value('fc.w', exe) """ @@ -1686,8 +1731,10 @@ def save(program, model_path): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + paddle.enable_static() prog = fluid.default_main_program() fluid.save( prog, "./temp") @@ -1753,8 +1800,10 @@ def load(program, model_path, executor=None, var_list=None): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + paddle.enable_static() prog = fluid.default_main_program() fluid.save( prog, "./temp") @@ -1914,7 +1963,10 @@ def load_program_state(model_path, var_list=None): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + + paddle.enable_static() x = fluid.data( name="x", shape=[10, 10], dtype='float32') y = fluid.layers.fc( x, 10) z = fluid.layers.fc( y, 10) @@ -2047,7 +2099,10 @@ def set_program_state(program, state_dict): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + + paddle.enable_static() x = fluid.data( name="x", shape=[10, 10], dtype='float32') y = fluid.layers.fc( x, 10) z = fluid.layers.fc( y, 10) diff --git a/python/paddle/fluid/tests/unittests/rnn/test_rnn_nets.py b/python/paddle/fluid/tests/unittests/rnn/test_rnn_nets.py index 87bdee8a91d21b..639605a64ed289 100644 --- a/python/paddle/fluid/tests/unittests/rnn/test_rnn_nets.py +++ b/python/paddle/fluid/tests/unittests/rnn/test_rnn_nets.py @@ -323,10 +323,7 @@ def forward(self, input): exe = paddle.static.Executor(place) [inference_program, feed_target_names, fetch_targets] = paddle.static.load_inference_model( - dirname="./inference", - executor=exe, - model_filename="%s_infer.pdmodel" % mode, - params_filename="%s_infer.pdiparams" % mode) + "./inference/%s_infer" % mode, exe) results = exe.run(inference_program, feed={feed_target_names[0]: x.numpy()}, fetch_list=fetch_targets) @@ -345,3 +342,6 @@ def load_tests(loader, tests, pattern): for test_class in [TestSimpleRNN, TestLSTM, TestGRU]: suite.addTest(test_class(time_major, direction, device)) return suite + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_inference_model_io.py b/python/paddle/fluid/tests/unittests/test_inference_model_io.py index aa408aedf66e16..a82bc3f0f62028 100644 --- a/python/paddle/fluid/tests/unittests/test_inference_model_io.py +++ b/python/paddle/fluid/tests/unittests/test_inference_model_io.py @@ -23,6 +23,7 @@ import paddle.fluid as fluid import warnings +import paddle import paddle.fluid.executor as executor import paddle.fluid.layers as layers import paddle.fluid.optimizer as optimizer @@ -30,15 +31,17 @@ from paddle.fluid.framework import Program, program_guard from paddle.fluid.io import save_inference_model, load_inference_model, save_persistables from paddle.fluid.transpiler import memory_optimize +paddle.enable_static() -class TestBook(unittest.TestCase): - class InferModel(object): - def __init__(self, list): - self.program = list[0] - self.feed_var_names = list[1] - self.fetch_vars = list[2] +class InferModel(object): + def __init__(self, list): + self.program = list[0] + self.feed_var_names = list[1] + self.fetch_vars = list[2] + +class TestBook(unittest.TestCase): def test_fit_line_inference_model(self): MODEL_DIR = "./tmp/inference_model" UNI_MODEL_DIR = "./tmp/inference_model1" @@ -88,10 +91,10 @@ def test_fit_line_inference_model(self): six.moves.reload_module(executor) # reload to build a new scope - model_0 = self.InferModel(load_inference_model(MODEL_DIR, exe)) + model_0 = InferModel(load_inference_model(MODEL_DIR, exe)) with open(os.path.join(UNI_MODEL_DIR, 'model'), "rb") as f: model_str = f.read() - model_1 = self.InferModel( + model_1 = InferModel( load_inference_model(None, exe, model_str, params_str)) for model in [model_0, model_1]: @@ -192,6 +195,103 @@ def test_save_inference_model(self): [MODEL_DIR, ["x", "y"], [avg_cost], [], cp_prog]) +class TestSaveInferenceModelNew(unittest.TestCase): + def test_save_and_load_inference_model(self): + MODEL_DIR = "./tmp/inference_model5" + init_program = fluid.default_startup_program() + program = fluid.default_main_program() + + # fake program without feed/fetch + with program_guard(program, init_program): + x = layers.data(name='x', shape=[2], dtype='float32') + y = layers.data(name='y', shape=[1], dtype='float32') + + y_predict = layers.fc(input=x, size=1, act=None) + + cost = layers.square_error_cost(input=y_predict, label=y) + avg_cost = layers.mean(cost) + + sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) + sgd_optimizer.minimize(avg_cost, init_program) + + place = core.CPUPlace() + exe = executor.Executor(place) + exe.run(init_program, feed={}, fetch_list=[]) + + tensor_x = np.array([[1, 1], [1, 2], [5, 2]]).astype("float32") + tensor_y = np.array([[-2], [-3], [-7]]).astype("float32") + for i in six.moves.xrange(3): + exe.run(program, + feed={'x': tensor_x, + 'y': tensor_y}, + fetch_list=[avg_cost]) + + self.assertRaises(ValueError, paddle.static.save_inference_model, + None, ['x', 'y'], [avg_cost], exe) + self.assertRaises(ValueError, paddle.static.save_inference_model, + MODEL_DIR + "/", [x, y], [avg_cost], exe) + self.assertRaises(ValueError, paddle.static.save_inference_model, + MODEL_DIR, ['x', 'y'], [avg_cost], exe) + self.assertRaises(ValueError, paddle.static.save_inference_model, + MODEL_DIR, 'x', [avg_cost], exe) + self.assertRaises(ValueError, paddle.static.save_inference_model, + MODEL_DIR, [x, y], ['avg_cost'], exe) + self.assertRaises(ValueError, paddle.static.save_inference_model, + MODEL_DIR, [x, y], 'avg_cost', exe) + + model_path = MODEL_DIR + "_isdir.pdmodel" + os.makedirs(model_path) + self.assertRaises(ValueError, paddle.static.save_inference_model, + MODEL_DIR + "_isdir", [x, y], [avg_cost], exe) + os.rmdir(model_path) + + params_path = MODEL_DIR + "_isdir.pdmodel" + os.makedirs(params_path) + self.assertRaises(ValueError, paddle.static.save_inference_model, + MODEL_DIR + "_isdir", [x, y], [avg_cost], exe) + os.rmdir(params_path) + + paddle.static.io.save_inference_model(MODEL_DIR, [x, y], [avg_cost], exe) + + self.assertTrue(os.path.exists(MODEL_DIR + ".pdmodel")) + self.assertTrue(os.path.exists(MODEL_DIR + ".pdiparams")) + + expected = exe.run(program, + feed={'x': tensor_x, + 'y': tensor_y}, + fetch_list=[avg_cost])[0] + + six.moves.reload_module(executor) # reload to build a new scope + + self.assertRaises(ValueError, paddle.static.load_inference_model, + None, exe) + self.assertRaises(ValueError, paddle.static.load_inference_model, + MODEL_DIR + "/", exe) + self.assertRaises(ValueError, paddle.static.load_inference_model, + [MODEL_DIR], exe) + self.assertRaises(ValueError, paddle.static.load_inference_model, + MODEL_DIR, exe, pserver_endpoints=None) + self.assertRaises(ValueError, paddle.static.load_inference_model, + MODEL_DIR, exe, unsupported_param=None) + self.assertRaises((TypeError, ValueError), paddle.static.load_inference_model, + None, exe, model_filename="illegal", params_filename="illegal") + + model = InferModel(paddle.static.io.load_inference_model(MODEL_DIR, exe)) + + outs = exe.run(model.program, + feed={ + model.feed_var_names[0]: tensor_x, + model.feed_var_names[1]: tensor_y + }, + fetch_list=model.fetch_vars) + actual = outs[0] + + self.assertEqual(model.feed_var_names, ["x", "y"]) + self.assertEqual(len(model.fetch_vars), 1) + self.assertEqual(expected, actual) + + + class TestLoadInferenceModelError(unittest.TestCase): def test_load_model_not_exist(self): place = core.CPUPlace() diff --git a/python/paddle/static/__init__.py b/python/paddle/static/__init__.py index a6ce4379824f07..bca045852fd06e 100644 --- a/python/paddle/static/__init__.py +++ b/python/paddle/static/__init__.py @@ -23,6 +23,8 @@ ] from . import nn +from .io import save_inference_model +from .io import load_inference_model from ..fluid import Scope #DEFINE_ALIAS from .input import data #DEFINE_ALIAS from .input import InputSpec #DEFINE_ALIAS @@ -48,8 +50,6 @@ from ..fluid.param_attr import WeightNormParamAttr #DEFINE_ALIAS from ..fluid.io import save #DEFINE_ALIAS from ..fluid.io import load #DEFINE_ALIAS -from ..fluid.io import save_inference_model #DEFINE_ALIAS -from ..fluid.io import load_inference_model #DEFINE_ALIAS from ..fluid.io import load_program_state #DEFINE_ALIAS from ..fluid.io import set_program_state #DEFINE_ALIAS from ..fluid.layers import create_parameter #DEFINE_ALIAS diff --git a/python/paddle/static/io.py b/python/paddle/static/io.py new file mode 100644 index 00000000000000..b30dfa8429fd97 --- /dev/null +++ b/python/paddle/static/io.py @@ -0,0 +1,335 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + + +import errno +import inspect +import logging +import os +import six + +import paddle +from paddle.fluid import core, Variable, CompiledProgram, program_guard, default_main_program, Program +from paddle.fluid.framework import static_only +from paddle.fluid import layers + +from paddle.fluid.io import _get_valid_program, save_vars, _save_distributed_persistables +from paddle.fluid.io import prepend_feed_ops, append_fetch_ops, save_persistables +from paddle.fluid.io import load_persistables, _endpoints_replacement +from paddle.fluid.log_helper import get_logger + + +__all__ = [ + 'save_inference_model', + 'load_inference_model', +] + +_logger = get_logger( + __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') + + +def _check_args(caller, args, supported_args=[], deprecated_args=[]): + for arg in args: + if arg in deprecated_args: + raise ValueError("argument '{}' in function '{}' is deprecated, only {} are supported.".format(arg, caller, supported_args)) + elif arg not in supported_args: + raise ValueError( + "function '{}' doesn't support argument '{}',\n only {} are supported.".format(caller, arg, supported_args)) + + +@static_only +def save_inference_model(path_prefix, feed_vars, fetch_vars, executor): + """ + :api_attr: Static Graph + + Save current model and its parameters to given path. i.e. + Given path_prefix = "/path/to/modelname", after invoking + save_inference_model(path_prefix, feed_vars, fetch_vars, executor), + you will find two files named modelname.pdmodel and modelname.pdiparams + under "/path/to", which represent your model and parameters respectively. + + Args: + path_prefix(str): Directory path to save model + model name without suffix. + feed_vars(Variable | list[Variable]): Variables needed by inference. + fetch_vars(Variable | list[Variable]): Variables returned by inference. + executor(Executor): The executor that saves the inference model. You can refer + to :ref:`api_guide_executor_en` for more details. + Returns: + None + + Raises: + ValueError: If `feed_vars` is not a Variable or a list of Variable, an exception is thrown. + ValueError: If `fetch_vars` is not a Variable or a list of Variable, an exception is thrown. + + Examples: + .. code-block:: python + + import paddle + import paddle.fluid as fluid + + paddle.enable_static() + + path_prefix = "./infer_model" + + # User defined network, here a softmax regession example + image = fluid.data(name='img', shape=[None, 28, 28], dtype='float32') + label = fluid.data(name='label', shape=[None, 1], dtype='int64') + feeder = fluid.DataFeeder(feed_list=[image, label], place=fluid.CPUPlace()) + predict = fluid.layers.fc(input=image, size=10, act='softmax') + + loss = fluid.layers.cross_entropy(input=predict, label=label) + avg_loss = fluid.layers.mean(loss) + + exe = fluid.Executor(fluid.CPUPlace()) + exe.run(fluid.default_startup_program()) + + # Feed data and train process + + # Save inference model. Note we don't save label and loss in this example + paddle.static.io.save_inference_model(path_prefix, [image], [predict], exe) + + # In this example, the save_inference_mode inference will prune the default + # main program according to the network's input node (img) and output node(predict). + # The pruned inference program is going to be saved in file "./infer_model.pdmodel" + # and parameters are going to be saved in file "./infer_model.pdiparams". + + """ + # check path_prefix, set model_path and params_path + if not isinstance(path_prefix, six.string_types): + raise ValueError("'path_prefix' should be a string.") + if path_prefix.endswith("/"): + raise ValueError("'path_prefix' should not be a directory") + path_prefix = os.path.normpath(path_prefix) + path_prefix = os.path.abspath(path_prefix) + try: + # mkdir may conflict if pserver and trainer are running on the same machine + dirname = os.path.dirname(path_prefix) + os.makedirs(dirname) + except OSError as e: + if e.errno != errno.EEXIST: + raise + model_path = path_prefix + ".pdmodel" + params_path = path_prefix + ".pdiparams" + if os.path.isdir(model_path): + raise ValueError("'{}' is an existing directory.".format(model_path)) + if os.path.isdir(params_path): + raise ValueError("'{}' is an existing directory.".format(params_path)) + + # verify feed_vars + if not isinstance(feed_vars, list): + feed_vars = [feed_vars] + if not feed_vars or not all([isinstance(var, Variable) for var in feed_vars]): + raise ValueError("'feed_vars' should be a Variable or a list of Variable.") + + # verify fetch_vars + if not isinstance(fetch_vars, list): + fetch_vars = [fetch_vars] + if not fetch_vars or not all([isinstance(var, Variable) for var in fetch_vars]): + raise ValueError("'fetch_vars' should be a Variable or a list of Variable.") + + main_program = _get_valid_program() + # remind users to set auc_states to 0 if auc op were found. + for op in main_program.global_block().ops: + # clear device of Op + device_attr_name = core.op_proto_and_checker_maker.kOpDeviceAttrName() + op._set_attr(device_attr_name, "") + if op.type == 'auc': + warnings.warn("Be sure that you have set auc states to 0 before saving inference model.") + break + + # fix the bug that the activation op's output as target will be pruned. + # will affect the inference performance. + # TODO(Superjomn) add an IR pass to remove 1-scale op. + with program_guard(main_program): + uniq_fetch_vars = [] + for i, var in enumerate(fetch_vars): + var = layers.scale(var, 1., name="save_infer_model/scale_{}".format(i)) + uniq_fetch_vars.append(var) + fetch_vars = uniq_fetch_vars + + # save model + origin_program = main_program.clone() + main_program = main_program.clone() + global_block = main_program.global_block() + remove_op_idx = [] + for i, op in enumerate(global_block.ops): + op.desc.set_is_target(False) + if op.type == "feed" or op.type == "fetch": + remove_op_idx.append(i) + for idx in remove_op_idx[::-1]: + global_block._remove_op(idx) + main_program.desc.flush() + + feed_var_names = [var.name for var in feed_vars] + main_program = main_program._prune_with_input( + feeded_var_names=feed_var_names, targets=fetch_vars) + main_program = main_program._inference_optimize(prune_read_op=True) + fetch_var_names = [var.name for var in fetch_vars] + prepend_feed_ops(main_program, feed_var_names) + append_fetch_ops(main_program, fetch_var_names) + main_program.desc._set_version() + paddle.fluid.core.save_op_version_info(main_program.desc) + with open(model_path, "wb") as f: + f.write(main_program.desc.serialize_to_string()) + main_program._copy_dist_param_info_from(origin_program) + + # save params + dirname = os.path.dirname(params_path) + basename = os.path.basename(params_path) + save_persistables(executor, dirname, main_program, basename) + + +@static_only +def load_inference_model(path_prefix, executor, **configs): + """ + :api_attr: Static Graph + + Load inference model from a given path. By this API, you can get the model + structure(Inference Program) and model parameters. + + Args: + path_prefix(str | None): One of the following: + - Directory path to save model + model name without suffix. + - Set to None when reading the model from memory. + executor(Executor): The executor to run for loading inference model. + See :ref:`api_guide_executor_en` for more details about it. + + Returns: + list: The return of this API is a list with three elements: + (program, feed_target_names, fetch_targets). The `program` is a + ``Program`` (refer to :ref:`api_guide_Program_en`), which is used for inference. + The `feed_target_names` is a list of ``str``, which contains names of variables + that need to feed data in the inference program. The `fetch_targets` is a list of + ``Variable`` (refer to :ref:`api_guide_Program_en`). It contains variables from which + we can get inference results. + + Raises: + ValueError: If `path_prefix.pdmodel` or `path_prefix.pdiparams` doesn't exist. + + Examples: + .. code-block:: python + + import paddle + import paddle.fluid as fluid + import numpy as np + + paddle.enable_static() + + # Build the model + startup_prog = fluid.default_startup_program() + main_prog = fluid.default_main_program() + with fluid.program_guard(main_prog, startup_prog): + image = fluid.layers.data(name="img", shape=[64, 784], append_batch_size=False) + w = fluid.layers.create_parameter(shape=[784, 200], dtype='float32') + b = fluid.layers.create_parameter(shape=[200], dtype='float32') + hidden_w = fluid.layers.matmul(x=image, y=w) + hidden_b = fluid.layers.elementwise_add(hidden_w, b) + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(startup_prog) + + # Save the inference model + path_prefix = "./infer_model" + paddle.static.io.save_inference_model(path_prefix, [image], [hidden_b], exe) + + [inference_program, feed_target_names, fetch_targets] = ( + paddle.static.io.load_inference_model(path_prefix, exe)) + tensor_img = np.array(np.random.random((1, 64, 784)), dtype=np.float32) + results = exe.run(inference_program, + feed={feed_target_names[0]: tensor_img}, + fetch_list=fetch_targets) + + # In this example, the inference program was saved in file + # "./infer_model.pdmodel" and parameters were saved in file + # " ./infer_model.pdiparams". + # By the inference program, feed_target_names and + # fetch_targets, we can use an executor to run the inference + # program to get the inference result. + """ + # check configs + supported_args = ('model_filename', 'params_filename') + deprecated_args = ('pserver_endpoints',) + caller = inspect.currentframe().f_code.co_name + _check_args(caller, configs, supported_args, deprecated_args) + + # load from memory + if path_prefix is None: + _logger.warning("Load inference model from memory is deprecated.") + model_filename = configs.get('model_filename', None) + params_filename = configs.get('params_filename', None) + if params_filename is None: + raise ValueError( + "params_filename cannot be None when path_prefix is None." + ) + load_dirname = path_prefix + program_desc_str = model_filename + params_filename = params_filename + # load from file + else: + # check and norm path_prefix + if not isinstance(path_prefix, six.string_types): + raise ValueError("'path_prefix' should be a string.") + if path_prefix.endswith("/"): + raise ValueError("'path_prefix' should not be a directory") + path_prefix = os.path.normpath(path_prefix) + path_prefix = os.path.abspath(path_prefix) + + # set model_path and params_path in new way, + # path_prefix represents a file path without suffix in this case. + if not configs: + model_path = path_prefix + ".pdmodel" + params_path = path_prefix + ".pdiparams" + # set model_path and params_path in old way for compatible, + # path_prefix represents a directory path. + else: + model_filename = configs.get('model_filename', None) + params_filename = configs.get('params_filename', None) + # set model_path + if model_filename is None: + model_path = os.path.join(path_prefix, "__model__") + else: + model_path = os.path.join(path_prefix, model_filename + ".pdmodel") + if not os.path.exists(model_path): + model_path = os.path.join(path_prefix, model_filename) + # set params_path + if params_filename is None: + params_path = os.path.join(path_prefix, "") + else: + params_path = os.path.join(path_prefix, params_filename + ".pdiparams") + if not os.path.exists(params_path): + params_path = os.path.join(path_prefix, params_filename) + _logger.warning("The old way to load inference model is deprecated." + " model path: {}, params path: {}".format(model_path, params_path)) + with open(model_path, "rb") as f: + program_desc_str = f.read() + load_dirname = os.path.dirname(params_path) + params_filename = os.path.basename(params_path) + + program = Program.parse_from_string(program_desc_str) + if not core._is_program_version_supported(program._version()): + raise ValueError("Unsupported program version: %d\n" % + program._version()) + # Binary data also need versioning. + load_persistables(executor, load_dirname, program, params_filename) + + feed_target_names = program.desc.get_feed_target_names() + fetch_target_names = program.desc.get_fetch_target_names() + fetch_targets = [ + program.global_block().var(name) for name in fetch_target_names + ] + + return [program, feed_target_names, fetch_targets] + From 532e4bbf2a62e98fadce4eda6b9f07f235b40399 Mon Sep 17 00:00:00 2001 From: LielinJiang <50691816+LielinJiang@users.noreply.github.com> Date: Wed, 18 Nov 2020 15:04:14 +0800 Subject: [PATCH 48/56] fix docs (#28683) --- python/paddle/nn/functional/conv.py | 3 +-- python/paddle/nn/functional/loss.py | 2 -- python/paddle/nn/layer/loss.py | 2 -- python/paddle/utils/download.py | 2 +- python/paddle/vision/datasets/folder.py | 2 +- python/paddle/vision/transforms/functional.py | 10 +++------- 6 files changed, 6 insertions(+), 15 deletions(-) diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py index 6df1ce368c1b0b..1b0441b0a8cca4 100644 --- a/python/paddle/nn/functional/conv.py +++ b/python/paddle/nn/functional/conv.py @@ -211,7 +211,7 @@ def conv1d(x, [[0, 3, 4], [2, 9, 7], [5, 6, 8]]]).astype(np.float32) - paddle.disable_static() + x_var = paddle.to_tensor(x) w_var = paddle.to_tensor(w) y_var = F.conv1d(x_var, w_var) @@ -673,7 +673,6 @@ def conv1d_transpose(x, import paddle.nn.functional as F import numpy as np - paddle.disable_static() # shape: (1, 2, 4) x=np.array([[[4, 0, 9, 7], [8, 0, 9, 2,]]]).astype(np.float32) diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index ae04cdcc931eca..fa0789b762041f 100644 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -895,8 +895,6 @@ def kl_div(input, label, reduction='mean', name=None): import numpy as np import paddle.nn.functional as F - paddle.disable_static() - shape = (5, 20) input = np.random.uniform(-10, 10, shape).astype('float32') target = np.random.uniform(-10, 10, shape).astype('float32') diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py index fdeed0ae49dfd6..b16dcae7b63292 100644 --- a/python/paddle/nn/layer/loss.py +++ b/python/paddle/nn/layer/loss.py @@ -773,8 +773,6 @@ class KLDivLoss(fluid.dygraph.Layer): import numpy as np import paddle.nn as nn - paddle.disable_static() - shape = (5, 20) x = np.random.uniform(-10, 10, shape).astype('float32') target = np.random.uniform(-10, 10, shape).astype('float32') diff --git a/python/paddle/utils/download.py b/python/paddle/utils/download.py index d8c0a2fc8c2845..7ba208574353fa 100644 --- a/python/paddle/utils/download.py +++ b/python/paddle/utils/download.py @@ -123,7 +123,7 @@ def get_weights_path_from_url(url, md5sum=None): Examples: .. code-block:: python - from paddle.incubate.hapi.download import get_weights_path_from_url + from paddle.utils.download import get_weights_path_from_url resnet18_pretrained_weight_url = 'https://paddle-hapi.bj.bcebos.com/models/resnet18.pdparams' local_weight_path = get_weights_path_from_url(resnet18_pretrained_weight_url) diff --git a/python/paddle/vision/datasets/folder.py b/python/paddle/vision/datasets/folder.py index d005bc4f19ebb8..8b17da9c9236bb 100644 --- a/python/paddle/vision/datasets/folder.py +++ b/python/paddle/vision/datasets/folder.py @@ -306,7 +306,7 @@ def __getitem__(self, index): index (int): Index Returns: - tuple: (sample, target) where target is class_index of the target class. + sample of specific index. """ path = self.samples[index] sample = self.loader(path) diff --git a/python/paddle/vision/transforms/functional.py b/python/paddle/vision/transforms/functional.py index 7391ae322e3598..67dff85f57014b 100644 --- a/python/paddle/vision/transforms/functional.py +++ b/python/paddle/vision/transforms/functional.py @@ -39,7 +39,7 @@ __all__ = [ 'to_tensor', 'hflip', 'vflip', 'resize', 'pad', 'rotate', 'to_grayscale', 'crop', 'center_crop', 'adjust_brightness', 'adjust_contrast', 'adjust_hue', - 'to_grayscale', 'normalize' + 'normalize' ] @@ -283,13 +283,11 @@ def center_crop(img, output_size): return F_cv2.center_crop(img, output_size) -def hflip(img, backend='pil'): +def hflip(img): """Horizontally flips the given Image or np.array. Args: img (PIL.Image|np.array): Image to be flipped. - backend (str, optional): The image proccess backend type. Options are `pil`, - `cv2`. Default: 'pil'. Returns: PIL.Image or np.array: Horizontall flipped image. @@ -576,8 +574,6 @@ def to_grayscale(img, num_output_channels=1): Args: img (PIL.Image|np.array): Image to be converted to grayscale. - backend (str, optional): The image proccess backend type. Options are `pil`, - `cv2`. Default: 'pil'. Returns: PIL.Image or np.array: Grayscale version of the image. @@ -624,7 +620,7 @@ def normalize(img, mean, std, data_format='CHW', to_rgb=False): this option will be igored. Default: False. Returns: - Tensor: Normalized mage. Data format is same as input img. + np.ndarray or Tensor: Normalized mage. Data format is same as input img. Examples: .. code-block:: python From 01a14e1be209b3300be1f36a27152cfd429533a4 Mon Sep 17 00:00:00 2001 From: LielinJiang <50691816+LielinJiang@users.noreply.github.com> Date: Wed, 18 Nov 2020 15:26:50 +0800 Subject: [PATCH 49/56] Add with_pool args for vgg (#28684) * add arg for vgg --- python/paddle/vision/models/resnet.py | 2 +- python/paddle/vision/models/vgg.py | 42 +++++++++++++++++---------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/python/paddle/vision/models/resnet.py b/python/paddle/vision/models/resnet.py index 8cf797f1719e99..1f44e0bc6dfeb1 100644 --- a/python/paddle/vision/models/resnet.py +++ b/python/paddle/vision/models/resnet.py @@ -245,7 +245,7 @@ def forward(self, x): x = self.layer3(x) x = self.layer4(x) - if self.with_pool > 0: + if self.with_pool: x = self.avgpool(x) if self.num_classes > 0: diff --git a/python/paddle/vision/models/vgg.py b/python/paddle/vision/models/vgg.py index 00f6cccbdfe9f1..f6b4c75e84f013 100644 --- a/python/paddle/vision/models/vgg.py +++ b/python/paddle/vision/models/vgg.py @@ -36,9 +36,10 @@ class VGG(nn.Layer): `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ Args: - features (nn.Layer): vgg features create by function make_layers. - num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer + features (nn.Layer): Vgg features create by function make_layers. + num_classes (int): Output dim of last fc layer. If num_classes <=0, last fc layer will not be defined. Default: 1000. + with_pool (bool): Use pool before the last three fc layer or not. Default: True. Examples: .. code-block:: python @@ -54,24 +55,35 @@ class VGG(nn.Layer): """ - def __init__(self, features, num_classes=1000): + def __init__(self, features, num_classes=1000, with_pool=True): super(VGG, self).__init__() self.features = features - self.avgpool = nn.AdaptiveAvgPool2D((7, 7)) - self.classifier = nn.Sequential( - nn.Linear(512 * 7 * 7, 4096), - nn.ReLU(), - nn.Dropout(), - nn.Linear(4096, 4096), - nn.ReLU(), - nn.Dropout(), - nn.Linear(4096, num_classes), ) + self.num_classes = num_classes + self.with_pool = with_pool + + if with_pool: + self.avgpool = nn.AdaptiveAvgPool2D((7, 7)) + + if num_classes > 0: + self.classifier = nn.Sequential( + nn.Linear(512 * 7 * 7, 4096), + nn.ReLU(), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(), + nn.Dropout(), + nn.Linear(4096, num_classes), ) def forward(self, x): x = self.features(x) - x = self.avgpool(x) - x = paddle.flatten(x, 1) - x = self.classifier(x) + + if self.with_pool: + x = self.avgpool(x) + + if self.num_classes > 0: + x = paddle.flatten(x, 1) + x = self.classifier(x) + return x From 628fb29c1b5c3c791f39e6bd906b28fbce61a6dd Mon Sep 17 00:00:00 2001 From: YUNSHEN XIE <1084314248@qq.com> Date: Wed, 18 Nov 2020 16:44:10 +0800 Subject: [PATCH 50/56] modified the sys adress of quickly disable file (#28660) --- tools/get_quick_disable_lt.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/get_quick_disable_lt.py b/tools/get_quick_disable_lt.py index 9b41f5e78085e5..1e3d7178922728 100644 --- a/tools/get_quick_disable_lt.py +++ b/tools/get_quick_disable_lt.py @@ -20,7 +20,11 @@ def download_file(): """Get disabled unit tests""" ssl._create_default_https_context = ssl._create_unverified_context - url = "https://sys-p0.bj.bcebos.com/prec/{}".format('disable_ut') + sysstr=sys.platform + if sysstr == 'win32': + url = "https://sys-p0.bj.bcebos.com/prec/{}".format('disable_ut_win') + else: + url = "https://sys-p0.bj.bcebos.com/prec/{}".format('disable_ut') f = requests.get(url) data = f.text status_code = f.status_code From e880c90c5a091e4a331b09b43987142b02f61ac1 Mon Sep 17 00:00:00 2001 From: YUNSHEN XIE <1084314248@qq.com> Date: Wed, 18 Nov 2020 16:44:31 +0800 Subject: [PATCH 51/56] fix error when setting ut timeout value (#28696) --- python/paddle/fluid/tests/unittests/CMakeLists.txt | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 6e78f7d90149e2..e527ba613ba554 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -621,11 +621,14 @@ endif() if (WITH_DISTRIBUTE) set_tests_properties(test_communicator_half_async PROPERTIES TIMEOUT 120) endif() + if (WITH_DISTRIBUTE AND NOT APPLE) + if(WITH_GPU) + set_tests_properties(test_c_comm_init_op PROPERTIES TIMEOUT 120) + set_tests_properties(test_fleet_checkpoint PROPERTIES TIMEOUT 120) + set_tests_properties(test_launch PROPERTIES TIMEOUT 120) + endif() set_tests_properties(test_fleet_launch PROPERTIES TIMEOUT 120) - set_tests_properties(test_c_comm_init_op PROPERTIES TIMEOUT 120) - set_tests_properties(test_fleet_checkpoint PROPERTIES TIMEOUT 120) - set_tests_properties(test_launch PROPERTIES TIMEOUT 120) endif() # setting timeout value as 15S From 8c75b2554aeb591d5696083258ec3b89edcf5b8b Mon Sep 17 00:00:00 2001 From: xiaoting <31891223+tink2123@users.noreply.github.com> Date: Wed, 18 Nov 2020 18:59:33 +0800 Subject: [PATCH 52/56] Support Tensor for attr_scale and attr_size (#28677) * update interpolate, test=develop * fix coverage, test=develop --- .../unittests/test_bilinear_interp_v2_op.py | 63 +++++++++++++++++++ python/paddle/nn/functional/common.py | 18 ++++-- 2 files changed, 77 insertions(+), 4 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_bilinear_interp_v2_op.py b/python/paddle/fluid/tests/unittests/test_bilinear_interp_v2_op.py index 58312979c523bd..2ff32b2f95bb44 100755 --- a/python/paddle/fluid/tests/unittests/test_bilinear_interp_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_bilinear_interp_v2_op.py @@ -623,5 +623,68 @@ def test_case(self): self.assertTrue(np.allclose(out.numpy(), expect_res)) +class TestBilinearInterpOpAPI_dy2(unittest.TestCase): + def test_case(self): + import paddle + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + else: + place = core.CPUPlace() + with fluid.dygraph.guard(place): + input_data = np.random.random((2, 3, 6, 6)).astype("float32") + size_np = np.array([12, 12]).astype("int64") + input_x = paddle.to_tensor(input_data) + size = paddle.to_tensor(size_np) + expect_res = bilinear_interp_np( + input_data, out_h=12, out_w=12, align_corners=False) + out = interpolate( + x=input_x, size=size, mode="bilinear", align_corners=False) + self.assertTrue(np.allclose(out.numpy(), expect_res)) + + +class TestBilinearInterpOpAPI_dy3(unittest.TestCase): + def test_case(self): + import paddle + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + else: + place = core.CPUPlace() + with fluid.dygraph.guard(place): + input_data = np.random.random((2, 3, 6, 6)).astype("float32") + size_1 = np.array([12]).astype("int64") + input_x = paddle.to_tensor(input_data) + size = paddle.to_tensor(size_1) + expect_res = bilinear_interp_np( + input_data, out_h=12, out_w=12, align_corners=False) + out = interpolate( + x=input_x, + size=[size, size], + mode="bilinear", + align_corners=False) + self.assertTrue(np.allclose(out.numpy(), expect_res)) + + +class TestBilinearInterpOpAPI_dy4(unittest.TestCase): + def test_case(self): + import paddle + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + else: + place = core.CPUPlace() + with fluid.dygraph.guard(place): + input_data = np.random.random((2, 3, 6, 6)).astype("float32") + scale_np = np.array([2, 2]).astype("int64") + input_x = paddle.to_tensor(input_data) + scale = paddle.to_tensor(scale_np) + expect_res = bilinear_interp_np( + input_data, out_h=12, out_w=12, align_corners=False) + out = interpolate( + x=input_x, + scale_factor=scale, + mode="bilinear", + align_corners=False) + self.assertTrue(np.allclose(out.numpy(), expect_res)) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py index 5c5e3f37916da1..e4f145cf4234fc 100644 --- a/python/paddle/nn/functional/common.py +++ b/python/paddle/nn/functional/common.py @@ -366,10 +366,18 @@ def _is_list_or_turple_(data): if out_shape is not None and scale is not None: raise ValueError("Only one of size or scale_factor should be defined.") if out_shape is not None: - if isinstance(out_shape, Variable): + + if isinstance(out_shape, Variable) and not in_dygraph_mode(): out_shape.stop_gradient = True inputs['OutSize'] = out_shape + else: + if in_dygraph_mode(): + if isinstance(out_shape, Variable): + out_shape = list(out_shape.numpy()) + for i, dim in enumerate(out_shape): + if isinstance(dim, Variable): + out_shape[i] = dim.numpy()[0] if not (_is_list_or_turple_(out_shape)): raise TypeError("size should be a list or tuple or Variable.") # Validate the shape @@ -435,6 +443,8 @@ def _is_list_or_turple_(data): attrs['out_w'] = out_shape[2] else: + if in_dygraph_mode() and isinstance(scale, Variable): + scale = list(scale.numpy()) if isinstance(scale, Variable): scale.stop_gradient = True inputs["Scale"] = scale @@ -1240,7 +1250,7 @@ def pad(x, pad, mode='constant', value=0, data_format="NCHW", name=None): y = F.pad(x, [2, 3], value=1, mode='constant', data_format="NCL") print(y) # [[[1. 1. 1. 2. 3. 1. 1. 1.]]] - + # example 2 x_shape = (1, 1, 2, 3) x = paddle.arange(np.prod(x_shape), dtype="float32").reshape(x_shape) + 1 @@ -1364,7 +1374,7 @@ def cosine_similarity(x1, x2, axis=1, eps=1e-8): Examples: .. code-block:: text - + Case 0: x1 = [[0.8024077 0.9927354 0.27238318 0.8344984 ] [0.48949873 0.5797396 0.65444374 0.66510963] @@ -1380,7 +1390,7 @@ def cosine_similarity(x1, x2, axis=1, eps=1e-8): Code Examples: .. code-block:: python - + import paddle import paddle.nn as nn import numpy as np From 5a9f6889c19eeea06733632b676d233691b2cff1 Mon Sep 17 00:00:00 2001 From: JZ-LIANG <38102074+JZ-LIANG@users.noreply.github.com> Date: Wed, 18 Nov 2020 19:38:02 +0800 Subject: [PATCH 53/56] [Sharding] add new features (#28568) * add lars to fleet meta optimizer * add lamb to proto * add lamb to fleet meta optimizer * fixed syntax bug * fixed syntax bug * fixed syntax error in lamb, add config setter of lamb in distributed_strategy * trigger unitest to rerun * add new unitest func for lamb * revise unitest for lars and lamb * revise dgc meta unitest * revise lars document in distribute_strategy * revise lars lamb document in distributed_strategy.py * revise lars lamb document in distributed_strategy.py * add weight decay exclude logic to lars * restore optimzier.py * restore optimizer.py as develop except lars * add epsilon and exclude fn to distributed_sttrategy * add lars epsilon * revise unitest for fleet lars and lamb * revise lars lamb unitest for CI coverage * revise lars argument api * revise lars argument api * revise lars argument api * revise api doc of lars * fix op role * add sharding save and add_sync_comm_for_test function * add comm_analyse to utlis * revise sharding_utils * add sharding saving unittest * revise sharding utils for unittest --- .../fleet/meta_optimizers/sharding/utils.py | 139 +++++++++++++++++- .../tests/unittests/dist_sharding_save.py | 90 ++++++++++++ .../unittests/test_dist_sharding_save.py | 79 ++++++++++ .../test_fleet_sharding_meta_optimizer.py | 22 +++ 4 files changed, 324 insertions(+), 6 deletions(-) mode change 100644 => 100755 python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py create mode 100755 python/paddle/fluid/tests/unittests/dist_sharding_save.py create mode 100755 python/paddle/fluid/tests/unittests/test_dist_sharding_save.py mode change 100644 => 100755 python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py old mode 100644 new mode 100755 index 51435ebb9e5e95..2aa4bdd68c9907 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py @@ -11,13 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import paddle from paddle.fluid import core from functools import reduce from paddle.distributed.fleet.meta_optimizers.common import is_loss_grad_op from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY import re +import os def check_broadcast(block): @@ -126,11 +127,25 @@ def check_allreduce_sum(block): return +def get_valid_op_role(block, insert_idx): + """ + return OpRole.Forward or OpRole.Backward + """ + op_role = block.ops[insert_idx].attr('op_role') + if (insert_idx >= len(block.ops)) or ( + op_role in [int(OpRole.Backward), int(OpRole.Optimize)]): + return OpRole.Backward + if op_role in [int(OpRole.Forward), int(OpRole.Loss)]: + return OpRole.Forward + + return get_valid_op_role(block, insert_idx + 1) + + def insert_sync_calc_op(block, insert_idx, calc_dep_vars): """ _insert_sync_calc_op """ - op_role = block.ops[insert_idx].attr('op_role') + op_role = get_valid_op_role(block, insert_idx) block._insert_op_without_sync( insert_idx, type='c_sync_calc_stream', @@ -144,7 +159,7 @@ def insert_sync_comm_ops(block, insert_idx, nrings, comm_dep_vars): """ _insert_sync_comm_ops """ - op_role = block.ops[insert_idx].attr('op_role') + op_role = get_valid_op_role(block, insert_idx) for i in range(nrings): block._insert_op_without_sync( insert_idx, @@ -160,7 +175,7 @@ def insert_fill_constant_ops(block, insert_idx, fill_constant_vars): """ _add_fill_constant_ops """ - op_role = block.ops[insert_idx].attr('op_role') + op_role = get_valid_op_role(block, insert_idx) for broadcast_name in fill_constant_vars: broadcast_var = block.var(broadcast_name) block._insert_op_without_sync( @@ -180,7 +195,7 @@ def insert_cast_ops(block, insert_idx, cast_ops): """ _add_cast_ops """ - op_role = block.ops[insert_idx].attr('op_role') + op_role = get_valid_op_role(block, insert_idx) for fp16_name, fp32_name in cast_ops.items(): block._insert_op_without_sync( insert_idx, @@ -217,7 +232,7 @@ def insert_broadcast_ops(block, insert_idx, nrings, broadcast2root): _add_broadcast_ops """ ring_id = -1 - op_role = block.ops[insert_idx].attr('op_role') + op_role = get_valid_op_role(block, insert_idx) for broadcast_name, root_device in broadcast2root: ring_id = (ring_id + 1) % nrings block._insert_op_without_sync( @@ -272,3 +287,115 @@ def insert_scale_loss_grad_ops(block, scale=1.0): outputs={'Out': loss_grad_var}, attrs={'scale': scale, OP_ROLE_KEY: OpRole.Backward}) + + +def comm_analyse(main_program): + """ + Analyse the parameter size that need to be broadcast/allreduce during sharding training + """ + reduce_vars = {} + broadcast_vars = {} + block = main_program.global_block() + for op in block.ops: + if op.type == "c_broadcast": + var_name = op.desc.input_arg_names()[0] + broadcast_vars[var_name] = get_var_size(block.var(var_name)) + elif op.type == "c_allreduce_sum": + var_name = op.desc.input_arg_names()[0] + reduce_vars[var_name] = get_var_size(block.var(var_name)) + + varsize_count = {} + gap = 1 + + for k, v in broadcast_vars.items(): + print("broadcast: {}: {} KB".format(k, v)) + if (int(v / gap) in varsize_count): + varsize_count[int(v / gap)] += 1 + else: + varsize_count[int(v / gap)] = 1 + + for k, v in reduce_vars.items(): + print("allreduce: {}: {} KB".format(k, v)) + if (int(v / gap) in varsize_count): + varsize_count[int(v / gap)] += 1 + else: + varsize_count[int(v / gap)] = 1 + + with open("nccl_size.txt", 'w') as f: + sorted_varsize = sorted(varsize_count.items(), key=lambda x: x[0]) + for varsize, count in sorted_varsize: + print("NCCL size {}~{} KB: {}".format(varsize, varsize + 1, count)) + f.write("NCCL size {}~{} KB: {}\n".format(varsize, varsize + 1, + count)) + + +def add_sync_comm_for_test(program, dist_strategy): + """ + When clone a test prog by clone from the sharding main prog, + part of the sync_comm op maybe be pruned by mistake, this function + add the sync_comm op for the test prog. + + """ + #NOTE (liangjianzhong): only support one comm stream by now, use more than one + # comm streams will cause error. should be revise in future. + + block = program.global_block() + not_sync_vars = set([]) + for op in block.ops: + if op.type in ["c_broadcast", "c_allreduce"]: + for input_name in op.desc.input_arg_names(): + not_sync_vars.add(input_name) + if op.type == "c_sync_comm_stream": + for input_name in op.desc.input_arg_names(): + not_sync_vars.remove(input_name) + if not_sync_vars: + for nccl_id in range(dist_strategy.nccl_comm_num): + block.append_op( + type='c_sync_comm_stream', + inputs={'X': list(not_sync_vars)}, + outputs={'Out': list(not_sync_vars)}, + attrs={ + 'ring_id': nccl_id, + 'op_role': core.op_proto_and_checker_maker.OpRole.Forward + }) + return + + +def sharding_save_persistables(exe, dirname, main_program, filename=None): + """ + When use sharding, part of persistable vars are unique and are partitioned in different ranks, + and part of persistable vars are duplicated and exist in all the ranks with different values. + This function handles the model saving for sharding training. + """ + + def is_opt_vars(var): + # NOTE(liangjianzhong): The checks should be updated when add new compatible optimizer + # now only Momentum and adam are compatible with sharding + checks = [ + "_moment1_0", "_moment2_0", "_beta1_pow_acc_0", "_beta2_pow_acc_0", + "_velocity_0" + ] + for check in checks: + if var.name.endswith(check): + return True + return False + + def is_trainable(var): + return isinstance(var, + paddle.fluid.framework.Parameter) and var.trainable + + def sharding_predicate(var): + return is_trainable(var) or is_opt_vars(var) + + if int(os.environ.get('PADDLE_TRAINER_ID', 0)) == 0: + paddle.fluid.io.save_persistables( + exe, dirname, main_program=main_program, filename=None) + else: + paddle.fluid.io.save_vars( + exe, + dirname, + main_program=main_program, + predicate=sharding_predicate, + filename=None) + + return diff --git a/python/paddle/fluid/tests/unittests/dist_sharding_save.py b/python/paddle/fluid/tests/unittests/dist_sharding_save.py new file mode 100755 index 00000000000000..05578c9e4a57f8 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/dist_sharding_save.py @@ -0,0 +1,90 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import paddle +import paddle.fluid as fluid +from test_dist_base import TestDistRunnerBase, runtime_main +from dist_mnist import cnn_model +# from paddle.fluid.incubate.fleet.collective import fleet +import paddle.distributed.fleet as fleet +import paddle.distributed.fleet.base.role_maker as role_maker +from paddle.distributed.fleet.meta_optimizers.sharding.utils import sharding_save_persistables + +import os +import six +import sys +import pickle + +# Fix seed for test +fluid.default_startup_program().random_seed = 1 +fluid.default_main_program().random_seed = 1 + +def runtime_main(): + import paddle.distributed.fleet as fleet + + # model definition + train_prog = paddle.fluid.Program() + startup_prog = paddle.fluid.Program() + role = role_maker.PaddleCloudRoleMaker(is_collective=True) + fleet.init(role) + with fluid.program_guard(train_prog, startup_prog): + with fluid.unique_name.guard(): + input_x = paddle.fluid.layers.data( + name="x", shape=[32], dtype='float32') + input_y = paddle.fluid.layers.data( + name="y", shape=[1], dtype='int64') + + fc_1 = paddle.fluid.layers.fc(input=input_x, + size=64, + act='tanh') + fc_2 = paddle.fluid.layers.fc(input=fc_1, size=256, act='tanh') + prediction = paddle.fluid.layers.fc(input=[fc_2], + size=2, + act='softmax') + cost = paddle.fluid.layers.cross_entropy( + input=prediction, label=input_y) + avg_cost = paddle.fluid.layers.mean(x=cost) + + strategy = paddle.distributed.fleet.DistributedStrategy() + strategy.sharding = True + strategy.sharding_configs = {"fuse_broadcast_MB": 0.2} + + optimizer = paddle.fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9) + optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) + optimizer.minimize(avg_cost) + + # execution + device_id = int(os.getenv("FLAGS_selected_gpus", "0")) + place = fluid.CUDAPlace(device_id) + exe = fluid.Executor(place) + exe.run(startup_prog) + dirname="./ut_sharding_save_model" + sharding_save_persistables(exe, dirname, main_program=train_prog, filename=None) + + out_losses=[] + if six.PY2: + print(pickle.dumps(out_losses)) + else: + sys.stdout.buffer.write(pickle.dumps(out_losses)) + +if __name__ == "__main__": + #NOTE(liangjianzhong): dist unittest should be imlpement using runtime_main in test_dist_base.py + # but the runtime_main in test_dist_base.py use the fleet, DistributedStrategy from + # paddle.fluid.incubate.fleet.collective which is not support by sharding (paddle.distributed.fleet). + # this should be update in future. + # runtime_main(TestDistMnist2x2) + runtime_main() + \ No newline at end of file diff --git a/python/paddle/fluid/tests/unittests/test_dist_sharding_save.py b/python/paddle/fluid/tests/unittests/test_dist_sharding_save.py new file mode 100755 index 00000000000000..b4620d7a0c5a8f --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_sharding_save.py @@ -0,0 +1,79 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import shutil +import os +import unittest +from test_dist_base import TestDistBase +import paddle + +paddle.enable_static() + + +class TestDistMnistFleetSave(TestDistBase): + def _setup_config(self): + self._sync_mode = True + self._use_reduce = False + self._use_reader_alloc = False + self._nccl2_mode = True + self._gpu_fleet_api = True + self._sharding_save = True + self._enforce_place = "GPU" + + + def _rm_temp_files(self, dirname): + shutil.rmtree(dirname) + + def _test_saved_files(self, dirname): + + sharding_save_files = sorted(os.listdir(dirname)) + + check_files = ['fc_0.b_0', 'fc_0.b_0_velocity_0', 'fc_0.w_0', 'fc_0.w_0_velocity_0', 'fc_1.b_0', + 'fc_1.b_0_velocity_0', 'fc_1.w_0', 'fc_1.w_0_velocity_0', 'fc_2.b_0', + 'fc_2.b_0_velocity_0', 'fc_2.w_0', 'fc_2.w_0_velocity_0', 'learning_rate_0'] + + if sharding_save_files != check_files: + self._rm_temp_files(dirname) + raise ValueError("Test Failed.") + self._rm_temp_files(dirname) + + return True + + def check_with_place(self, + model_file, + delta=1e-3, + check_error_log=True, + need_envs={}, + log_name=""): + required_envs = self._get_required_envs(check_error_log, need_envs) + + tr0_losses, tr1_losses = self._run_cluster_nccl2( + model_file, + required_envs, + False, + check_error_log, + log_name=log_name) + + dirname = './ut_sharding_save_model' + self._test_saved_files(dirname) + + def test_dist_train(self): + import paddle.fluid as fluid + if fluid.core.is_compiled_with_cuda(): + self.check_with_place("dist_sharding_save.py", delta=1e-5) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py old mode 100644 new mode 100755 index 6a9f3e3ba7bf35..063ff726b10e4b --- a/python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py @@ -17,8 +17,11 @@ import os import paddle.distributed.fleet as fleet import paddle.distributed.fleet.base.role_maker as role_maker +import paddle.fluid.core as core +import paddle.fluid as fluid from fleet_meta_optimizer_base import TestFleetMetaOptimizer +from paddle.distributed.fleet.meta_optimizers.sharding.utils import add_sync_comm_for_test, sharding_save_persistables, comm_analyse paddle.enable_static() @@ -270,6 +273,25 @@ def test_sharding_gradient_clip(self): 'momentum' ]) + def test_sharding_clone_for_test(self): + train_prog, startup_prog = paddle.fluid.Program(), paddle.fluid.Program( + ) + avg_cost, strategy = self.net(train_prog, startup_prog) + self.set_strategy(strategy, 'sharding') + self.optimizer(avg_cost, strategy, train_prog, startup_prog) + comm_analyse(train_prog) + test_prog = train_prog.clone(for_test=True) + add_sync_comm_for_test(test_prog, strategy) + ops = [op.type for op in test_prog.global_block().ops] + + self.assertEqual(ops, ['fill_constant', 'fill_constant', 'fill_constant', 'c_sync_calc_stream', 'c_broadcast', + 'c_broadcast', 'c_broadcast', 'c_broadcast', 'c_broadcast', 'c_broadcast', 'c_sync_comm_stream', 'mul', + 'elementwise_add', 'tanh', 'mul', 'elementwise_add', 'tanh', 'mul', 'elementwise_add', 'softmax', + 'cross_entropy2', 'mean']) + + + + if __name__ == "__main__": unittest.main() From 20b12765982d7ba152b0bf90bfd6cdb71bd6cd55 Mon Sep 17 00:00:00 2001 From: wawltor Date: Wed, 18 Nov 2020 20:58:50 +0800 Subject: [PATCH 54/56] faster the compare ops dygraph model speed faster the compare ops dygraph model speed --- .../fluid/tests/unittests/test_compare_op.py | 9 ++ python/paddle/tensor/logic.py | 94 +++++++++++++++++-- 2 files changed, 97 insertions(+), 6 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_compare_op.py b/python/paddle/fluid/tests/unittests/test_compare_op.py index 25ae65aa7c968b..63a43432b4e555 100644 --- a/python/paddle/fluid/tests/unittests/test_compare_op.py +++ b/python/paddle/fluid/tests/unittests/test_compare_op.py @@ -93,6 +93,15 @@ def test_api(self): fetch_list=[out]) self.assertEqual((res == self.real_result).all(), True) + def test_dynamic_api(self): + paddle.disable_static() + x = paddle.to_tensor(self.input_x) + y = paddle.to_tensor(self.input_y) + op = eval("paddle.%s" % (self.op_type)) + out = op(x, y) + self.assertEqual((out.numpy() == self.real_result).all(), True) + paddle.enable_static() + def test_broadcast_api_1(self): paddle.enable_static() with program_guard(Program(), Program()): diff --git a/python/paddle/tensor/logic.py b/python/paddle/tensor/logic.py index da08270d742e54..839ecaa1fbaecd 100644 --- a/python/paddle/tensor/logic.py +++ b/python/paddle/tensor/logic.py @@ -216,7 +216,20 @@ def equal(x, y, name=None): result1 = paddle.equal(x, y) print(result1.numpy()) # result1 = [True False False] """ - out = fluid.layers.equal(x, y, name=name, cond=None) + if in_dygraph_mode(): + return core.ops.equal(x, y) + + check_variable_and_dtype(x, "x", ["float32", "float64", "int32", "int64"], + "equal") + check_variable_and_dtype(y, "y", ["float32", "float64", "int32", "int64"], + "equal") + helper = LayerHelper("equal", **locals()) + out = helper.create_variable_for_type_inference(dtype='bool') + out.stop_gradient = True + + helper.append_op( + type='equal', inputs={'X': [x], + 'Y': [y]}, outputs={'Out': [out]}) return out @@ -247,7 +260,22 @@ def greater_equal(x, y, name=None): result1 = paddle.greater_equal(x, y) print(result1.numpy()) # result1 = [True False True] """ - out = fluid.layers.greater_equal(x, y, name=name, cond=None) + if in_dygraph_mode(): + return core.ops.greater_equal(x, y) + + check_variable_and_dtype(x, "x", ["float32", "float64", "int32", "int64"], + "greater_equal") + check_variable_and_dtype(y, "y", ["float32", "float64", "int32", "int64"], + "greater_equal") + helper = LayerHelper("greater_equal", **locals()) + out = helper.create_variable_for_type_inference(dtype='bool') + out.stop_gradient = True + + helper.append_op( + type='greater_equal', + inputs={'X': [x], + 'Y': [y]}, + outputs={'Out': [out]}) return out @@ -278,7 +306,22 @@ def greater_than(x, y, name=None): result1 = paddle.greater_than(x, y) print(result1.numpy()) # result1 = [False False True] """ - out = fluid.layers.greater_than(x, y, name=name, cond=None) + if in_dygraph_mode(): + return core.ops.greater_than(x, y) + + check_variable_and_dtype(x, "x", ["float32", "float64", "int32", "int64"], + "greater_than") + check_variable_and_dtype(y, "y", ["float32", "float64", "int32", "int64"], + "greater_than") + helper = LayerHelper("greater_than", **locals()) + out = helper.create_variable_for_type_inference(dtype='bool') + out.stop_gradient = True + + helper.append_op( + type='greater_than', + inputs={'X': [x], + 'Y': [y]}, + outputs={'Out': [out]}) return out @@ -310,7 +353,20 @@ def less_equal(x, y, name=None): result1 = paddle.less_equal(x, y) print(result1.numpy()) # result1 = [True True False] """ - out = fluid.layers.less_equal(x, y, name=name, cond=None) + if in_dygraph_mode(): + return core.ops.less_equal(x, y) + + check_variable_and_dtype(x, "x", ["float32", "float64", "int32", "int64"], + "less_equal") + check_variable_and_dtype(y, "y", ["float32", "float64", "int32", "int64"], + "less_equal") + helper = LayerHelper("less_equal", **locals()) + out = helper.create_variable_for_type_inference(dtype='bool') + out.stop_gradient = True + + helper.append_op( + type='less_equal', inputs={'X': [x], + 'Y': [y]}, outputs={'Out': [out]}) return out @@ -342,7 +398,20 @@ def less_than(x, y, name=None): result1 = paddle.less_than(x, y) print(result1.numpy()) # result1 = [False True False] """ - out = fluid.layers.less_than(x, y, force_cpu=False, name=name, cond=None) + if in_dygraph_mode(): + return core.ops.less_than(x, y) + + check_variable_and_dtype(x, "x", ["float32", "float64", "int32", "int64"], + "less_than") + check_variable_and_dtype(y, "y", ["float32", "float64", "int32", "int64"], + "less_than") + helper = LayerHelper("less_than", **locals()) + out = helper.create_variable_for_type_inference(dtype='bool') + out.stop_gradient = True + + helper.append_op( + type='less_than', inputs={'X': [x], + 'Y': [y]}, outputs={'Out': [out]}) return out @@ -375,7 +444,20 @@ def not_equal(x, y, name=None): result1 = paddle.not_equal(x, y) print(result1.numpy()) # result1 = [False True True] """ - out = fluid.layers.not_equal(x, y, name=name, cond=None) + if in_dygraph_mode(): + return core.ops.not_equal(x, y) + + check_variable_and_dtype(x, "x", ["float32", "float64", "int32", "int64"], + "not_equal") + check_variable_and_dtype(y, "y", ["float32", "float64", "int32", "int64"], + "not_equal") + helper = LayerHelper("not_equal", **locals()) + out = helper.create_variable_for_type_inference(dtype='bool') + out.stop_gradient = True + + helper.append_op( + type='not_equal', inputs={'X': [x], + 'Y': [y]}, outputs={'Out': [out]}) return out From 19226ba8d682f1b41dfaef97761784415f1b8e0c Mon Sep 17 00:00:00 2001 From: Yiqun Liu Date: Wed, 18 Nov 2020 21:09:46 +0800 Subject: [PATCH 55/56] Simplify the timeline, to remove the prefix of each event. (#28723) --- tools/timeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/timeline.py b/tools/timeline.py index 44c1c09b803dfc..119018380b551c 100644 --- a/tools/timeline.py +++ b/tools/timeline.py @@ -58,7 +58,7 @@ def _create_event(self, ph, category, name, pid, tid, timestamp): event = {} event['ph'] = ph event['cat'] = category - event['name'] = name + event['name'] = name.replace("ParallelExecutor::Run/", "") event['pid'] = pid event['tid'] = tid event['ts'] = timestamp From 3d09929b1f28b978a5f34dc6139546c4d7def323 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Wed, 18 Nov 2020 22:05:41 +0800 Subject: [PATCH 56/56] Add check for non-dispensable input (#28666) * Add check for non-dispensable input * fix typo --- paddle/fluid/pybind/op_function.h | 16 ++++++++++++++-- paddle/fluid/pybind/op_function_generator.cc | 7 ++++--- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/pybind/op_function.h b/paddle/fluid/pybind/op_function.h index 70b321f658cd2c..1e20ac958b9bbb 100644 --- a/paddle/fluid/pybind/op_function.h +++ b/paddle/fluid/pybind/op_function.h @@ -36,9 +36,15 @@ namespace pybind { static inline std::shared_ptr CastPyHandleToVarBase( const std::string& op_type, const std::string& arg_name, int arg_idx, - const py::handle& handle) { + const py::handle& handle, bool dispensable = false) { PyObject* py_obj = handle.ptr(); // get underlying PyObject if (!py_obj || py_obj == Py_None) { + if (!dispensable) { + PADDLE_THROW(platform::errors::InvalidArgument( + "%s(): argument '%s' (position %d) must be Tensor, but got " + "%s", + op_type, arg_name, arg_idx, Py_TYPE(py_obj)->tp_name)); + } return nullptr; } try { @@ -54,9 +60,15 @@ static inline std::shared_ptr CastPyHandleToVarBase( static inline std::vector> CastPyHandleToVarBaseList(const std::string& op_type, const std::string& arg_name, int arg_idx, - const py::handle& handle) { + const py::handle& handle, bool dispensable = false) { PyObject* py_obj = handle.ptr(); // get underlying PyObject if (!py_obj || py_obj == Py_None) { + if (!dispensable) { + PADDLE_THROW(platform::errors::InvalidArgument( + "%s(): argument '%s' (position %d) must be Tensor, but got " + "%s", + op_type, arg_name, arg_idx, Py_TYPE(py_obj)->tp_name)); + } return {}; } std::vector> result; diff --git a/paddle/fluid/pybind/op_function_generator.cc b/paddle/fluid/pybind/op_function_generator.cc index 10914cf0ab7ba2..0f5ce841559462 100644 --- a/paddle/fluid/pybind/op_function_generator.cc +++ b/paddle/fluid/pybind/op_function_generator.cc @@ -166,10 +166,10 @@ const char* OUT_VAR_TYPE = R"(std::shared_ptr)"; const char* OUT_VAR_LIST_TYPE = R"(std::vector>)"; const char* CAST_VAR_TEMPLATE = R"( - auto %s = CastPyHandleToVarBase("%s", "%s", %d, %s);)"; + auto %s = CastPyHandleToVarBase("%s", "%s", %d, %s, %s);)"; const char* CAST_VAR_LIST_TEMPLATE = R"( - auto %s = CastPyHandleToVarBaseList("%s", "%s", %d, %s);)"; + auto %s = CastPyHandleToVarBaseList("%s", "%s", %d, %s, %s);)"; const char* ARG_TEMPLATE = R"(const %s& %s)"; @@ -263,9 +263,10 @@ GenerateOpFunctions(const std::string& module_name) { input_args_num++; const auto in_cast_type = input.duplicable() ? CAST_VAR_LIST_TEMPLATE : CAST_VAR_TEMPLATE; + auto dispensable = input.dispensable() ? "true" : "false"; ins_cast_str += paddle::string::Sprintf(in_cast_type, in_name, op_type, in_name, - arg_idx++, TempName(in_name)); + arg_idx++, TempName(in_name), dispensable); if (input.dispensable()) { const auto in_template = input.duplicable()