Merge pull request #10 from LuxDL/ap/generalize

Generalize the generators to complex numbers
LuxDL · Dec 10, 2023 · 9b101d8 · 9b101d8 · avik-pal · Dec 10, 2023
2 parents e21fbed + 5ffa3a0
commit 9b101d8
Show file tree

Hide file tree

Showing 8 changed files with 172 additions and 102 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,9 +1,10 @@
 name = "WeightInitializers"
 uuid = "d49dbf32-c5c2-4618-8acc-27bb2598ef2d"
 authors = ["Avik Pal <avikpal@mit.edu> and contributors"]
-version = "0.1.2"
+version = "0.1.3"
 
 [deps]
+PackageExtensionCompat = "65ce6f38-6b18-4e1d-a461-8949797d7930"
 PartialFunctions = "570af359-4316-4cb7-8c74-252c00c2016b"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
@@ -16,6 +17,13 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 WeightInitializersCUDAExt = "CUDA"
 
 [compat]
+CUDA = "4, 5"
+PackageExtensionCompat = "1"
 PartialFunctions = "1"
+Random = "<0.0.1, 1"
 SpecialFunctions = "2"
+Statistics = "<0.01, 1"
 julia = "1.6"
+
+[extras]
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
diff --git a/README.md b/README.md
@@ -12,12 +12,13 @@
 [![ColPrac: Contributor's Guide on Collaborative Practices for Community Packages](https://img.shields.io/badge/ColPrac-Contributor's%20Guide-blueviolet)](https://github.com/SciML/ColPrac)
 [![SciML Code Style](https://img.shields.io/static/v1?label=code%20style&message=SciML&color=9558b2&labelColor=389826)](https://github.com/SciML/SciMLStyle)
 
-This package is a light dependency providing common weight initialization schemes for deep learning models.
+This package is a light dependency providing common weight initialization schemes for deep
+learning models.
 
 ## Example
 
-These code snippets are just provided to give a high level overview
-of the functionalities of the package.
+These code snippets are just provided to give a high level overview of the functionalities
+of the package.
 
 ```julia
 using WeightInitializers, Random
@@ -54,8 +55,8 @@ weights = weights_cl(2, 5)
 
 ## API
 
-The package is meant to be working with deep learning
-libraries such as F/Lux. All the methods take as input the chosen `rng` type and the dimension for the array.
+The package is meant to be working with deep learning libraries such as F/Lux. All the
+methods take as input the chosen `rng` type and the dimension for the AbstractArray.
 
 ```julia
 weights = init(rng, dims...)
@@ -67,8 +68,9 @@ The `rng` is optional, if not specified a default one will be used.
 weights = init(dims...)
 ```
 
-If there is the need to use keyword arguments the methods can be called with just the `rng` (optionally)
-and the keywords to get in return a function behaving like the two examples above.
+If there is the need to use keyword arguments the methods can be called with just the `rng`
+(optionally) and the keywords to get in return a function behaving like the two examples
+above.
 
 ```julia
 weights_init = init(rng; kwargs...)

diff --git a/ext/WeightInitializersCUDAExt.jl b/ext/WeightInitializersCUDAExt.jl
@@ -0,0 +1,22 @@
+module WeightInitializersCUDAExt
+
+using WeightInitializers, CUDA
+import WeightInitializers: __partial_apply, NUM_TO_FPOINT
+
+const AbstractCuRNG = Union{CUDA.RNG, CURAND.RNG}
+
+for T in ("16", "32", "64", "C16", "C32", "C64"), fname in (:ones, :zeros)
+    name = Symbol(fname, T)
+    TP = NUM_TO_FPOINT[Symbol(T)]
+    @eval begin
+        function WeightInitializers.$(name)(rng::AbstractCuRNG, dims::Integer...; kwargs...)
+            return CUDA.$(fname)($TP, dims...; kwargs...)
+        end
+    end
+
+    @eval function WeightInitializers.$(name)(rng::AbstractCuRNG; kwargs...)
+        return __partial_apply($name, (rng, (; kwargs...)))
+    end
+end
+
+end
diff --git a/ext/WeightInitializersCUDAExt/WeightInitializersCUDAExt.jl b/ext/WeightInitializersCUDAExt/WeightInitializersCUDAExt.jl
diff --git a/src/WeightInitializers.jl b/src/WeightInitializers.jl
@@ -2,10 +2,18 @@ module WeightInitializers
 
 using PartialFunctions, Random, SpecialFunctions, Statistics
 
+import PackageExtensionCompat: @require_extensions
+function __init__()
+    @require_extensions
+end
+
 include("utils.jl")
 include("initializers.jl")
 
-export zeros32, ones32, rand32, randn32
+export zeros64, ones64, rand64, randn64, zeros32, ones32, rand32, randn32, zeros16, ones16,
+    rand16, randn16
+export zerosC64, onesC64, randC64, randnC64, zerosC32, onesC32, randC32, randnC32, zerosC16,
+    onesC16, randC16, randnC16
 export glorot_normal, glorot_uniform
 export kaiming_normal, kaiming_uniform
 export truncated_normal

diff --git a/src/initializers.jl b/src/initializers.jl
@@ -1,38 +1,29 @@
-"""
-    zeros32([::AbstractRNG=_default_rng()], size...) -> Array{Float32, length(size)}
-
-Return an `Array{Float32}` of zeros of the given `size`. (`rng` is ignored)
-"""
-zeros32(::AbstractRNG, dims...) = zeros(Float32, dims...)
-
-"""
-    ones32([::AbstractRNG=_default_rng()], size...) -> Array{Float32, length(size)}
-
-Return an `Array{Float32}` of ones of the given `size`. (`rng` is ignored)
-"""
-ones32(::AbstractRNG, dims...) = ones(Float32, dims...)
-
-"""
-    randn32([::AbstractRNG=_default_rng()], size...) -> Array{Float32, length(size)}
-
-Return an `Array{Float32}` of random numbers from a standard normal distribution of the
-given `size`.
-"""
-randn32(rng::AbstractRNG, dims...) = randn(rng, Float32, dims...)
-
-"""
-    rand32([::AbstractRNG=_default_rng()], size...) -> Array{Float32, length(size)}
-
-Return an `Array{Float32}` of random numbers from a uniform distribution of the given
-`size`.
-"""
-rand32(rng::AbstractRNG, dims...) = rand(rng, Float32, dims...)
+for T in ("16", "32", "64", "C16", "C32", "C64"), fname in (:ones, :zeros, :rand, :randn)
+    name = Symbol(fname, T)
+    docstring = __generic_docstring(string(name))
+    TP = NUM_TO_FPOINT[Symbol(T)]
+    if fname in (:ones, :zeros)
+        @eval begin
+            @doc $docstring
+            function $(name)(rng::AbstractRNG, dims::Integer...; kwargs...)
+                return $(fname)($TP, dims...; kwargs...)
+            end
+        end
+    else
+        @eval begin
+            @doc $docstring
+            function $(name)(rng::AbstractRNG, dims::Integer...; kwargs...)
+                return $(fname)(rng, $TP, dims...; kwargs...)
+            end
+        end
+    end
+end
 
 """
     glorot_uniform([::AbstractRNG=_default_rng()], [T=Float32], size...;
-        gain = 1) -> Array{T, length(size)}
+        gain = 1) -> AbstractArray{T, length(size)}
 
-Return an `Array{T}` of the given `size` containing random numbers drawn from a
+Return an `AbstractArray{T}` of the given `size` containing random numbers drawn from a
 uniform distribution on the interval ``[-x, x]``, where
 `x = gain * sqrt(6 / (fan_in + fan_out))`. This method is described in [1] and also known as
 Xavier initialization.
@@ -44,18 +35,18 @@ feedforward neural networks." _Proceedings of the thirteenth international confe
 artificial intelligence and statistics_. 2010.
 """
 function glorot_uniform(rng::AbstractRNG, ::Type{T}, dims::Integer...;
-        gain::Real=1) where {T <: Real}
+        gain::Number=1) where {T <: Number}
     scale = T(gain) * sqrt(T(24) / sum(_nfan(dims...)))
     return (rand(rng, T, dims...) .- T(1 // 2)) .* scale
 end
 
 """
     glorot_normal([::AbstractRNG=_default_rng()], [T=Float32], size...;
-        gain = 1) -> Array{T, length(size)}
+        gain = 1) -> AbstractArray{T, length(size)}
 
-Return an `Array{T}` of the given `size` containing random numbers drawn from a normal
-distribution with standard deviation `gain * sqrt(2 / (fan_in + fan_out))`. This method is
-described in [1] and also known as Xavier initialization.
+Return an `AbstractArray{T}` of the given `size` containing random numbers drawn from a
+normal distribution with standard deviation `gain * sqrt(2 / (fan_in + fan_out))`. This
+method is described in [1] and also known as Xavier initialization.
 
 # References
 
@@ -64,16 +55,16 @@ feedforward neural networks." _Proceedings of the thirteenth international confe
 artificial intelligence and statistics_. 2010.
 """
 function glorot_normal(rng::AbstractRNG, ::Type{T}, dims::Integer...;
-        gain::Real=1) where {T <: Real}
+        gain::Number=1) where {T <: Number}
     std = T(gain) * sqrt(T(2) / sum(_nfan(dims...)))
     return randn(rng, T, dims...) .* std
 end
 
 """
     kaiming_uniform([::AbstractRNG=_default_rng()], [T=Float32], size...;
-        gain = √T(2)) -> Array{T, length(size)}
+        gain = √T(2)) -> AbstractArray{T, length(size)}
 
-Return an `Array{T}` of the given `size` containing random numbers drawn from a
+Return an `AbstractArray{T}` of the given `size` containing random numbers drawn from a
 uniform distribution on the interval `[-x, x]`, where `x = gain * sqrt(3/fan_in)`.
 
 # References
@@ -83,17 +74,17 @@ imagenet classification." _Proceedings of the IEEE international conference on c
 vision_. 2015.
 """
 function kaiming_uniform(rng::AbstractRNG, ::Type{T}, dims::Integer...;
-        gain::Real=√T(2)) where {T <: Real}
+        gain::Number=√T(2)) where {T <: Number}
     bound = √T(3) * gain / sqrt(T(first(_nfan(dims...))))
     return (rand(rng, T, dims...) .- T(1 // 2)) .* 2 * bound
 end
 
 """
     kaiming_normal([::AbstractRNG=_default_rng()], [T=Float32], size...;
-        gain = √T(2)) -> Array{T, length(size)}
+        gain = √T(2)) -> AbstractArray{T, length(size)}
 
-Return an `Array{T}` of the given `size` containing random numbers taken from a normal
-distribution standard deviation `gain / sqrt(fan_in)`
+Return an `AbstractArray{T}` of the given `size` containing random numbers taken from a
+normal distribution standard deviation `gain / sqrt(fan_in)`
 
 # References
 
@@ -102,23 +93,23 @@ imagenet classification." _Proceedings of the IEEE international conference on c
 vision_. 2015.
 """
 function kaiming_normal(rng::AbstractRNG, ::Type{T}, dims::Integer...;
-        gain::Real=√T(2)) where {T <: Real}
+        gain::Number=√T(2)) where {T <: Number}
     std = gain / sqrt(T(first(_nfan(dims...))))
     return randn(rng, T, dims...) .* std
 end
 
 """
-    truncated_normal([::AbstractRNG=_default_rng()], [T=Float32], size...; mean = 0, std = 1,
-        lo = -2, hi = 2) -> Array{T, length(size)}
+    truncated_normal([::AbstractRNG=_default_rng()], [T=Float32], size...; mean = 0,
+        std = 1, lo = -2, hi = 2) -> AbstractArray{T, length(size)}
 
-Return an `Array{T}` of the given `size` where each element is drawn from a truncated normal
-distribution. The numbers are distributed like
+Return an `AbstractArray{T}` of the given `size` where each element is drawn from a
+truncated normal distribution. The numbers are distributed like
 `filter(x -> lo ≤ x ≤ hi, mean .+ std .* randn(100))`.
 """
 function truncated_normal(rng::AbstractRNG, ::Type{T}, dims::Integer...; mean=T(0),
         std=T(1), lo=-T(2), hi=T(2)) where {T <: Real}
     if (mean < lo - 2 * std) || (mean > hi + 2 * std)
-        @warn "Mean is more than 2 std outside the limits in truncated_normal, so the distribution of values may be inaccurate." maxlog=1
+        @warn "Mean is more than 2 std outside the limits in truncated_normal, so the distribution of values may be inaccurate."
     end
     l = _norm_cdf((lo - mean) / std)
     u = _norm_cdf((hi - mean) / std)
@@ -134,29 +125,34 @@ end
 # Default Fallbacks for all functions
 for initializer in (:glorot_uniform, :glorot_normal, :kaiming_uniform, :kaiming_normal,
     :truncated_normal)
+    NType = ifelse(initializer === :truncated_normal, Real, Number)
     @eval function ($initializer)(dims::Integer...; kwargs...)
         return $initializer(_default_rng(), Float32, dims...; kwargs...)
     end
     @eval function ($initializer)(rng::AbstractRNG, dims::Integer...; kwargs...)
         return $initializer(rng, Float32, dims...; kwargs...)
     end
-    @eval function ($initializer)(::Type{T}, dims::Integer...; kwargs...) where {T <: Real}
+    @eval function ($initializer)(::Type{T},
+            dims::Integer...; kwargs...) where {T <: $NType}
         return $initializer(_default_rng(), T, dims...; kwargs...)
     end
     @eval function ($initializer)(rng::AbstractRNG; kwargs...)
-        return _partial_apply($initializer, (rng, (; kwargs...)))
+        return __partial_apply($initializer, (rng, (; kwargs...)))
     end
-    @eval function ($initializer)(rng::AbstractRNG, ::Type{T}; kwargs...) where {T <: Real}
-        return _partial_apply($initializer, ((rng, T), (; kwargs...)))
+    @eval function ($initializer)(rng::AbstractRNG,
+            ::Type{T}; kwargs...) where {T <: $NType}
+        return __partial_apply($initializer, ((rng, T), (; kwargs...)))
     end
-    @eval ($initializer)(; kwargs...) = _partial_apply($initializer, (; kwargs...))
+    @eval ($initializer)(; kwargs...) = __partial_apply($initializer, (; kwargs...))
 end
 
-for initializer in (:zeros32, :ones32, :randn32, :rand32)
+for tp in ("16", "32", "64", "C16", "C32", "C64"), func in (:zeros, :ones, :randn, :rand)
+    initializer = Symbol(func, tp)
     @eval function ($initializer)(dims::Integer...; kwargs...)
         return $initializer(_default_rng(), dims...; kwargs...)
     end
     @eval function ($initializer)(rng::AbstractRNG; kwargs...)
-        return _partial_apply($initializer, (rng, (; kwargs...)))
+        return __partial_apply($initializer, (rng, (; kwargs...)))
     end
+    @eval ($initializer)(; kwargs...) = __partial_apply($initializer, (; kwargs...))
 end
diff --git a/src/utils.jl b/src/utils.jl
@@ -14,4 +14,31 @@ function _default_rng()
 end
 
 # This is needed if using `PartialFunctions.$` inside @eval block
-_partial_apply(fn, inp) = fn$inp
+__partial_apply(fn, inp) = fn$inp
+
+const NAME_TO_DIST = Dict(:zeros => "an AbstractArray of zeros",
+    :ones => "an AbstractArray of ones",
+    :randn => "random numbers from a standard normal distribution",
+    :rand => "random numbers from a uniform distribution")
+const NUM_TO_FPOINT = Dict(Symbol(16) => Float16, Symbol(32) => Float32,
+    Symbol(64) => Float64, :C16 => ComplexF16, :C32 => ComplexF32, :C64 => ComplexF64)
+
+@inline function __funcname(fname::String)
+    fp = fname[(end - 2):end]
+    if Symbol(fp) in keys(NUM_TO_FPOINT)
+        return fname[1:(end - 3)], fp
+    else
+        return fname[1:(end - 2)], fname[(end - 1):end]
+    end
+end
+
+@inline function __generic_docstring(fname::String)
+    funcname, fp = __funcname(fname)
+    name = NAME_TO_DIST[Symbol(funcname)]
+    dist_type = NUM_TO_FPOINT[Symbol(fp)]
+    return """
+        $fname([::AbstractRNG=_default_rng()], size...; kwargs...) -> AbstractArray{$(dist_type), length(size)}
+
+    Return an `AbstractArray{$(dist_type)}` of the given `size` containing $(name).
+    """
+end