diff --git a/README.md b/README.md index 9ac7464971c..e93aa1d9ccb 100644 --- a/README.md +++ b/README.md @@ -179,7 +179,7 @@ Via the easy-to-use, efficient, flexible and scalable implementation, our vision ## Installation -We strongly recommend our users to install PaddleSpeech in **Linux** with *python>=3.7*. +We strongly recommend our users to install PaddleSpeech in **Linux** with *python>=3.7* and *paddlepaddle>=2.3.1*. Up to now, **Linux** supports CLI for the all our tasks, **Mac OSX** and **Windows** only supports PaddleSpeech CLI for Audio Classification, Speech-to-Text and Text-to-Speech. To install `PaddleSpeech`, please see [installation](./docs/source/install.md). diff --git a/docs/source/install.md b/docs/source/install.md index 83b64619826..6a9ff3bc83c 100644 --- a/docs/source/install.md +++ b/docs/source/install.md @@ -117,9 +117,9 @@ conda install -y -c gcc_linux-64=8.4.0 gxx_linux-64=8.4.0 ``` (Hip: Do not use the last script if you want to install by **Hard** way): ### Install PaddlePaddle -You can choose the `PaddlePaddle` version based on your system. For example, for CUDA 10.2, CuDNN7.5 install paddlepaddle-gpu 2.2.0: +You can choose the `PaddlePaddle` version based on your system. For example, for CUDA 10.2, CuDNN7.5 install paddlepaddle-gpu 2.3.1: ```bash -python3 -m pip install paddlepaddle-gpu==2.2.0 -i https://mirror.baidu.com/pypi/simple +python3 -m pip install paddlepaddle-gpu==2.3.1 -i https://mirror.baidu.com/pypi/simple ``` ### Install PaddleSpeech You can install `paddlespeech` by the following command,then you can use the `ready-made` examples in `paddlespeech` : @@ -180,9 +180,9 @@ Some users may fail to install `kaldiio` due to the default download source, you ```bash pip install pytest-runner -i https://pypi.tuna.tsinghua.edu.cn/simple ``` -Make sure you have GPU and the paddlepaddle version is right. For example, for CUDA 10.2, CuDNN7.5 install paddle 2.2.0: +Make sure you have GPU and the paddlepaddle version is right. For example, for CUDA 10.2, CuDNN7.5 install paddle 2.3.1: ```bash -python3 -m pip install paddlepaddle-gpu==2.2.0 -i https://mirror.baidu.com/pypi/simple +python3 -m pip install paddlepaddle-gpu==2.3.1 -i https://mirror.baidu.com/pypi/simple ``` ### Install PaddleSpeech in Developing Mode ```bash diff --git a/docs/source/install_cn.md b/docs/source/install_cn.md index 75f4174e06e..9f49ebad637 100644 --- a/docs/source/install_cn.md +++ b/docs/source/install_cn.md @@ -111,9 +111,9 @@ conda install -y -c gcc_linux-64=8.4.0 gxx_linux-64=8.4.0 ``` (提示: 如果你想使用**困难**方式完成安装,请不要使用最后一条命令) ### 安装 PaddlePaddle -你可以根据系统配置选择 PaddlePaddle 版本,例如系统使用 CUDA 10.2, CuDNN7.5 ,你可以安装 paddlepaddle-gpu 2.2.0: +你可以根据系统配置选择 PaddlePaddle 版本,例如系统使用 CUDA 10.2, CuDNN7.5 ,你可以安装 paddlepaddle-gpu 2.3.1: ```bash -python3 -m pip install paddlepaddle-gpu==2.2.0 -i https://mirror.baidu.com/pypi/simple +python3 -m pip install paddlepaddle-gpu==2.3.1 -i https://mirror.baidu.com/pypi/simple ``` ### 安装 PaddleSpeech 最后安装 `paddlespeech`,这样你就可以使用 `paddlespeech` 中已有的 examples: @@ -168,9 +168,9 @@ conda activate tools/venv conda install -y -c conda-forge sox libsndfile swig bzip2 libflac bc ``` ### 安装 PaddlePaddle -请确认你系统是否有 GPU,并且使用了正确版本的 paddlepaddle。例如系统使用 CUDA 10.2, CuDNN7.5 ,你可以安装 paddlepaddle-gpu 2.2.0: +请确认你系统是否有 GPU,并且使用了正确版本的 paddlepaddle。例如系统使用 CUDA 10.2, CuDNN7.5 ,你可以安装 paddlepaddle-gpu 2.3.1: ```bash -python3 -m pip install paddlepaddle-gpu==2.2.0 -i https://mirror.baidu.com/pypi/simple +python3 -m pip install paddlepaddle-gpu==2.3.1 -i https://mirror.baidu.com/pypi/simple ``` ### 用开发者模式安装 PaddleSpeech 部分用户系统由于默认源的问题,安装中会出现 kaldiio 安转出错的问题,建议首先安装 pytest-runner: diff --git a/paddlespeech/s2t/__init__.py b/paddlespeech/s2t/__init__.py index 2da68435c27..540d58c7373 100644 --- a/paddlespeech/s2t/__init__.py +++ b/paddlespeech/s2t/__init__.py @@ -18,7 +18,6 @@ import paddle from paddle import nn -from paddle.fluid import core from paddle.nn import functional as F from paddlespeech.s2t.utils.log import Log @@ -39,46 +38,6 @@ paddle.uint16 = 'uint16' paddle.cdouble = 'complex128' - -def convert_dtype_to_string(tensor_dtype): - """ - Convert the data type in numpy to the data type in Paddle - Args: - tensor_dtype(core.VarDesc.VarType): the data type in numpy. - Returns: - core.VarDesc.VarType: the data type in Paddle. - """ - dtype = tensor_dtype - if dtype == core.VarDesc.VarType.FP32: - return paddle.float32 - elif dtype == core.VarDesc.VarType.FP64: - return paddle.float64 - elif dtype == core.VarDesc.VarType.FP16: - return paddle.float16 - elif dtype == core.VarDesc.VarType.INT32: - return paddle.int32 - elif dtype == core.VarDesc.VarType.INT16: - return paddle.int16 - elif dtype == core.VarDesc.VarType.INT64: - return paddle.int64 - elif dtype == core.VarDesc.VarType.BOOL: - return paddle.bool - elif dtype == core.VarDesc.VarType.BF16: - # since there is still no support for bfloat16 in NumPy, - # uint16 is used for casting bfloat16 - return paddle.uint16 - elif dtype == core.VarDesc.VarType.UINT8: - return paddle.uint8 - elif dtype == core.VarDesc.VarType.INT8: - return paddle.int8 - elif dtype == core.VarDesc.VarType.COMPLEX64: - return paddle.complex64 - elif dtype == core.VarDesc.VarType.COMPLEX128: - return paddle.complex128 - else: - raise ValueError("Not supported tensor dtype %s" % dtype) - - if not hasattr(paddle, 'softmax'): logger.debug("register user softmax to paddle, remove this when fixed!") setattr(paddle, 'softmax', paddle.nn.functional.softmax) @@ -155,28 +114,6 @@ def new_full(x: paddle.Tensor, paddle.Tensor.new_full = new_full paddle.static.Variable.new_full = new_full - -def eq(xs: paddle.Tensor, ys: Union[paddle.Tensor, float]) -> paddle.Tensor: - if convert_dtype_to_string(xs.dtype) == paddle.bool: - xs = xs.astype(paddle.int) - return xs.equal( - paddle.to_tensor( - ys, dtype=convert_dtype_to_string(xs.dtype), place=xs.place)) - - -if not hasattr(paddle.Tensor, 'eq'): - logger.debug( - "override eq of paddle.Tensor if exists or register, remove this when fixed!" - ) - paddle.Tensor.eq = eq - paddle.static.Variable.eq = eq - -if not hasattr(paddle, 'eq'): - logger.debug( - "override eq of paddle if exists or register, remove this when fixed!") - paddle.eq = eq - - def contiguous(xs: paddle.Tensor) -> paddle.Tensor: return xs diff --git a/paddlespeech/s2t/models/u2/u2.py b/paddlespeech/s2t/models/u2/u2.py index 3af3536000f..c7750184866 100644 --- a/paddlespeech/s2t/models/u2/u2.py +++ b/paddlespeech/s2t/models/u2/u2.py @@ -318,7 +318,7 @@ def recognize( dim=1) # (B*N, i+1) # 2.6 Update end flag - end_flag = paddle.eq(hyps[:, -1], self.eos).view(-1, 1) + end_flag = paddle.equal(hyps[:, -1], self.eos).view(-1, 1) # 3. Select best of best scores = scores.view(batch_size, beam_size) diff --git a/paddlespeech/s2t/modules/align.py b/paddlespeech/s2t/modules/align.py index ad71ee02166..cacda246148 100644 --- a/paddlespeech/s2t/modules/align.py +++ b/paddlespeech/s2t/modules/align.py @@ -13,8 +13,7 @@ # limitations under the License. import paddle from paddle import nn - -from paddlespeech.s2t.modules.initializer import KaimingUniform +import math """ To align the initializer between paddle and torch, the API below are set defalut initializer with priority higger than global initializer. @@ -82,10 +81,10 @@ def __init__(self, name=None): if weight_attr is None: if global_init_type == "kaiming_uniform": - weight_attr = paddle.ParamAttr(initializer=KaimingUniform()) + weight_attr = paddle.ParamAttr(initializer=nn.initializer.KaimingUniform(fan_in=None, negative_slope=math.sqrt(5), nonlinearity='leaky_relu')) if bias_attr is None: if global_init_type == "kaiming_uniform": - bias_attr = paddle.ParamAttr(initializer=KaimingUniform()) + bias_attr = paddle.ParamAttr(initializer=nn.initializer.KaimingUniform(fan_in=None, negative_slope=math.sqrt(5), nonlinearity='leaky_relu')) super(Linear, self).__init__(in_features, out_features, weight_attr, bias_attr, name) @@ -105,10 +104,10 @@ def __init__(self, data_format='NCL'): if weight_attr is None: if global_init_type == "kaiming_uniform": - weight_attr = paddle.ParamAttr(initializer=KaimingUniform()) + weight_attr = paddle.ParamAttr(initializer=nn.initializer.KaimingUniform(fan_in=None, negative_slope=math.sqrt(5), nonlinearity='leaky_relu')) if bias_attr is None: if global_init_type == "kaiming_uniform": - bias_attr = paddle.ParamAttr(initializer=KaimingUniform()) + bias_attr = paddle.ParamAttr(initializer=nn.initializer.KaimingUniform(fan_in=None, negative_slope=math.sqrt(5), nonlinearity='leaky_relu')) super(Conv1D, self).__init__( in_channels, out_channels, kernel_size, stride, padding, dilation, groups, padding_mode, weight_attr, bias_attr, data_format) @@ -129,10 +128,10 @@ def __init__(self, data_format='NCHW'): if weight_attr is None: if global_init_type == "kaiming_uniform": - weight_attr = paddle.ParamAttr(initializer=KaimingUniform()) + weight_attr = paddle.ParamAttr(initializer=nn.initializer.KaimingUniform(fan_in=None, negative_slope=math.sqrt(5), nonlinearity='leaky_relu')) if bias_attr is None: if global_init_type == "kaiming_uniform": - bias_attr = paddle.ParamAttr(initializer=KaimingUniform()) + bias_attr = paddle.ParamAttr(initializer=nn.initializer.KaimingUniform(fan_in=None, negative_slope=math.sqrt(5), nonlinearity='leaky_relu')) super(Conv2D, self).__init__( in_channels, out_channels, kernel_size, stride, padding, dilation, groups, padding_mode, weight_attr, bias_attr, data_format) diff --git a/paddlespeech/s2t/modules/attention.py b/paddlespeech/s2t/modules/attention.py index 454f9c14774..b6d61586772 100644 --- a/paddlespeech/s2t/modules/attention.py +++ b/paddlespeech/s2t/modules/attention.py @@ -109,7 +109,7 @@ def forward_attention(self, # 1. onnx(16/-1, -1/-1, 16/0) # 2. jit (16/-1, -1/-1, 16/0, 16/4) if paddle.shape(mask)[2] > 0: # time2 > 0 - mask = mask.unsqueeze(1).eq(0) # (batch, 1, *, time2) + mask = mask.unsqueeze(1).equal(0) # (batch, 1, *, time2) # for last chunk, time2 might be larger than scores.size(-1) mask = mask[:, :, :, :paddle.shape(scores)[-1]] scores = scores.masked_fill(mask, -float('inf')) @@ -321,4 +321,4 @@ def forward(self, scores = (matrix_ac + matrix_bd) / math.sqrt( self.d_k) # (batch, head, time1, time2) - return self.forward_attention(v, scores, mask), new_cache \ No newline at end of file + return self.forward_attention(v, scores, mask), new_cache diff --git a/paddlespeech/s2t/modules/initializer.py b/paddlespeech/s2t/modules/initializer.py index 30a04e44fb2..cdcf2e0523a 100644 --- a/paddlespeech/s2t/modules/initializer.py +++ b/paddlespeech/s2t/modules/initializer.py @@ -12,142 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. import numpy as np -from paddle.fluid import framework -from paddle.fluid import unique_name -from paddle.fluid.core import VarDesc -from paddle.fluid.initializer import MSRAInitializer - -__all__ = ['KaimingUniform'] - - -class KaimingUniform(MSRAInitializer): - r"""Implements the Kaiming Uniform initializer - - This class implements the weight initialization from the paper - `Delving Deep into Rectifiers: Surpassing Human-Level Performance on - ImageNet Classification `_ - by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. This is a - robust initialization method that particularly considers the rectifier - nonlinearities. - - In case of Uniform distribution, the range is [-x, x], where - - .. math:: - - x = \sqrt{\frac{1.0}{fan\_in}} - - In case of Normal distribution, the mean is 0 and the standard deviation - is - - .. math:: - - \sqrt{\\frac{2.0}{fan\_in}} - - Args: - fan_in (float32|None): fan_in for Kaiming uniform Initializer. If None, it is\ - inferred from the variable. default is None. - - Note: - It is recommended to set fan_in to None for most cases. - - Examples: - .. code-block:: python - - import paddle - import paddle.nn as nn - - linear = nn.Linear(2, - 4, - weight_attr=nn.initializer.KaimingUniform()) - data = paddle.rand([30, 10, 2], dtype='float32') - res = linear(data) - - """ - - def __init__(self, fan_in=None): - super(KaimingUniform, self).__init__( - uniform=True, fan_in=fan_in, seed=0) - - def __call__(self, var, block=None): - """Initialize the input tensor with MSRA initialization. - - Args: - var(Tensor): Tensor that needs to be initialized. - block(Block, optional): The block in which initialization ops - should be added. Used in static graph only, default None. - - Returns: - The initialization op - """ - block = self._check_block(block) - - assert isinstance(var, framework.Variable) - assert isinstance(block, framework.Block) - f_in, f_out = self._compute_fans(var) - - # If fan_in is passed, use it - fan_in = f_in if self._fan_in is None else self._fan_in - - if self._seed == 0: - self._seed = block.program.random_seed - - # to be compatible of fp16 initalizers - if var.dtype == VarDesc.VarType.FP16 or ( - var.dtype == VarDesc.VarType.BF16 and not self._uniform): - out_dtype = VarDesc.VarType.FP32 - out_var = block.create_var( - name=unique_name.generate( - ".".join(['masra_init', var.name, 'tmp'])), - shape=var.shape, - dtype=out_dtype, - type=VarDesc.VarType.LOD_TENSOR, - persistable=False) - else: - out_dtype = var.dtype - out_var = var - - if self._uniform: - limit = np.sqrt(1.0 / float(fan_in)) - op = block.append_op( - type="uniform_random", - inputs={}, - outputs={"Out": out_var}, - attrs={ - "shape": out_var.shape, - "dtype": int(out_dtype), - "min": -limit, - "max": limit, - "seed": self._seed - }, - stop_gradient=True) - - else: - std = np.sqrt(2.0 / float(fan_in)) - op = block.append_op( - type="gaussian_random", - outputs={"Out": out_var}, - attrs={ - "shape": out_var.shape, - "dtype": int(out_dtype), - "mean": 0.0, - "std": std, - "seed": self._seed - }, - stop_gradient=True) - - if var.dtype == VarDesc.VarType.FP16 or ( - var.dtype == VarDesc.VarType.BF16 and not self._uniform): - block.append_op( - type="cast", - inputs={"X": out_var}, - outputs={"Out": var}, - attrs={"in_dtype": out_var.dtype, - "out_dtype": var.dtype}) - - if not framework.in_dygraph_mode(): - var.op = op - return op - class DefaultInitializerContext(object): """