Skip to content

Commit

Permalink
Increase performance (#26)
Browse files Browse the repository at this point in the history
* Allow multiple signal types - including integers

* Signal may have a different type than replicas

* Increase performance and allow multiple input types

Co-authored-by: Soeren Schoenbrod <soeren.schoenbrod@rwth-aachen.de>
  • Loading branch information
zsoerenm and Soeren Schoenbrod authored Jun 17, 2021
1 parent 8979b74 commit fa8814e
Show file tree
Hide file tree
Showing 8 changed files with 198 additions and 71 deletions.
1 change: 1 addition & 0 deletions src/Tracking.jl
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,5 @@ module Tracking
include("gpsl5.jl")
include("galileo_e1b.jl")
include("boc.jl")
include("downconvert_and_correlate.jl")
end
8 changes: 3 additions & 5 deletions src/carrier_replica.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,11 @@ function gen_carrier_replica!(
num_samples
) where T
c_re = carrier_replica.re; c_im = carrier_replica.im
carr_freq = T(upreferred(carrier_frequency / Hz))
sample_freq = T(upreferred(sampling_frequency / Hz))
twopi = T(2π)
phase = T(start_phase)
carrier_freq = upreferred(carrier_frequency / Hz)
sampling_freq = upreferred(sampling_frequency / Hz)
@avx for i in 0:num_samples - 1
c_im[i + start_sample], c_re[i + start_sample] =
sincos(twopi * (i * carr_freq / sample_freq + phase))
sincos(T(2π) * (i * T(carrier_freq) / T(sampling_freq) + T(start_phase)))
end
carrier_replica
end
Expand Down
54 changes: 24 additions & 30 deletions src/correlator.jl
Original file line number Diff line number Diff line change
Expand Up @@ -262,14 +262,19 @@ function correlate(
num_samples
) where {T <: AbstractCorrelator}
accumulators = zero_accumulators(get_accumulators(correlator), downconverted_signal)
@inbounds @fastmath for i = start_sample:num_samples + start_sample - 1
d_re = downconverted_signal.re
d_im = downconverted_signal.im
a_re = real.(accumulators)
a_im = imag.(accumulators)
@avx for i = start_sample:num_samples + start_sample - 1
for j = 1:length(accumulators)
sample_shift = correlator_sample_shifts[j] - correlator_sample_shifts[1]
accumulators[j] += downconverted_signal[i] * code[i + sample_shift]
a_re[j] += d_re[i] * code[i + sample_shift]
a_im[j] += d_im[i] * code[i + sample_shift]
end
end

T(map(+, get_accumulators(correlator), accumulators))
accumulators_result = complex.(a_re, a_im)
T(map(+, get_accumulators(correlator), accumulators_result))
end

function zero_accumulators(accumulators::SVector, signal)
Expand All @@ -292,32 +297,21 @@ function correlate(
start_sample,
num_samples,
) where {N}

accumulators = map(correlator_sample_shifts) do correlator_sample_shift
correlate_single_tap(
NumAnts(N),
correlator_sample_shift - correlator_sample_shifts[1],
start_sample,
num_samples,
downconverted_signal,
code
)
accumulators = zero(MMatrix{N, length(correlator_sample_shifts), eltype(downconverted_signal)})
a_re = real.(accumulators)
a_im = imag.(accumulators)
d_re = downconverted_signal.re
d_im = downconverted_signal.im
@avx for i = start_sample:num_samples + start_sample - 1
for k = 1:size(accumulators, 2)
for j = 1:size(accumulators, 1)
shift = correlator_sample_shifts[k] - correlator_sample_shifts[1]
a_re[j,k] += d_re[i,j] * code[shift + i]
a_im[j,k] += d_im[i,j] * code[shift + i]
end
end
end

typeof(correlator)(map(+, get_accumulators(correlator), accumulators))
end

function correlate_single_tap(
::NumAnts{N},
offset,
start_sample,
num_samples,
downconverted_signal,
code
) where N
accumulator = zero(MVector{N, eltype(downconverted_signal)})
@inbounds @fastmath for i = start_sample:num_samples + start_sample - 1, j = 1:length(accumulator)
accumulator[j] += downconverted_signal[i,j] * code[i + offset]
end
SVector(accumulator)
accumulators_new = SVector(eachcol(complex.(SMatrix(a_re), SMatrix(a_im)))...)
typeof(correlator)(map(+, get_accumulators(correlator), accumulators_new))
end
131 changes: 114 additions & 17 deletions src/downconvert.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
# TODO: This function might not be needed! Without @avx it can be merged with
# the two dimensional case
function downconvert!(
downconverted_signal::StructArray{Complex{T}, 1},
signal::StructArray{Complex{T}, 1},
signal::StructArray{Complex{ST}, 1},
carrier_replica::StructArray{Complex{T}, 1},
start_sample::Integer,
num_samples::Integer
) where T
) where {T, ST}
ds_re = downconverted_signal.re; ds_im = downconverted_signal.im
s_re = signal.re; s_im = signal.im
c_re = carrier_replica.re; c_im = carrier_replica.im
Expand All @@ -17,13 +15,64 @@ function downconvert!(
downconverted_signal
end

# This function creates the carrier and downconverts
# directly. It is a little bit faster than first creating the
# replica and then downconverting. This is not in use at the moment
# to be consistent (see next function).
function downconvert!(
downconverted_signal::StructArray{Complex{T}},
signal::StructArray{Complex{TS}},
carrier_frequency,
sampling_frequency,
start_phase,
start_sample,
num_samples
) where {T, TS}
ds_re = downconverted_signal.re; ds_im = downconverted_signal.im
s_re = signal.re; s_im = signal.im
carrier_freq = upreferred(carrier_frequency / Hz)
sampling_freq = upreferred(sampling_frequency / Hz)
@avx for i = start_sample:start_sample + num_samples - 1
c_im, c_re = sincos(T(2π) * ((i - start_sample) * T(carrier_freq) / T(sampling_freq) + T(start_phase)))
ds_re[i] = s_re[i] * c_re + s_im[i] * c_im
ds_im[i] = s_im[i] * c_re - s_re[i] * c_im
end
downconverted_signal
end

# Same as above but for the multiple antenna case. It is faster
# for less than 4 antennas, but slower otherwise. This is not in
# use due to this circumstances.
function downconvert!(
downconverted_signal::StructArray{Complex{T}, 2},
signal::StructArray{Complex{TS}, 2},
carrier_frequency,
sampling_frequency,
start_phase,
start_sample,
num_samples
) where {N, T, TS}
ds_re = downconverted_signal.re; ds_im = downconverted_signal.im
s_re = signal.re; s_im = signal.im
carrier_freq = upreferred(carrier_frequency / Hz)
sampling_freq = upreferred(sampling_frequency / Hz)
@avx for i = start_sample:start_sample + num_samples - 1
c_im, c_re = sincos(T(2π) * ((i - start_sample) * T(carrier_freq) / T(sampling_freq) + T(start_phase)))
for j = 1:size(s_re, 2)
ds_re[i,j] = s_re[i,j] * c_re + s_im[i,j] * c_im
ds_im[i,j] = s_im[i,j] * c_re - s_re[i,j] * c_im
end
end
downconverted_signal
end

function downconvert!(
downconverted_signal::StructArray{Complex{T}, 2},
signal::StructArray{Complex{T}, 2},
signal::StructArray{Complex{ST}, 2},
carrier_replica::StructArray{Complex{T}, 1},
start_sample::Integer,
num_samples::Integer
) where T
) where {T, ST}
ds_re = downconverted_signal.re; ds_im = downconverted_signal.im
s_re = signal.re; s_im = signal.im
c_re = carrier_replica.re; c_im = carrier_replica.im
Expand All @@ -34,14 +83,62 @@ function downconvert!(
downconverted_signal
end

function downconvert!(
downconverted_signal,
signal,
carrier_replica,
start_sample::Integer,
num_samples::Integer
)
sample_range = start_sample:start_sample + num_samples - 1
downconverted_signal[sample_range,:] .= @view(signal[sample_range,:]) .* conj.(@view(carrier_replica[sample_range]))
downconverted_signal
end
@static if VERSION >= v"1.6"
function downconvert!(
downconverted_signal::StructArray{Complex{T}, 1},
signal::AbstractArray{Complex{ST}, 1},
carrier_replica::StructArray{Complex{T}, 1},
start_sample::Integer,
num_samples::Integer
) where {T, ST}
signal_real = reinterpret(reshape, ST, signal)
ds_re = downconverted_signal.re; ds_im = downconverted_signal.im
c_re = carrier_replica.re; c_im = carrier_replica.im
@avx for i = start_sample:num_samples + start_sample - 1
ds_re[i] = signal_real[1, i] * c_re[i] + signal_real[2, i] * c_im[i]
ds_im[i] = signal_real[2, i] * c_re[i] - signal_real[1, i] * c_im[i]
end
downconverted_signal
end

function downconvert!(
downconverted_signal::StructArray{Complex{T}, 2},
signal::AbstractArray{Complex{ST}, 2},
carrier_replica::StructArray{Complex{T}, 1},
start_sample::Integer,
num_samples::Integer
) where {T, ST}
signal_real = reinterpret(reshape, ST, signal)
ds_re = downconverted_signal.re; ds_im = downconverted_signal.im
c_re = carrier_replica.re; c_im = carrier_replica.im
@avx for i = start_sample:num_samples + start_sample - 1, j = 1:size(signal_real, 3)
ds_re[i, j] = signal_real[1, i, j] * c_re[i] + signal_real[2, i, j] * c_im[i]
ds_im[i, j] = signal_real[2, i, j] * c_re[i] - signal_real[1, i, j] * c_im[i]
end
downconverted_signal
end
else
function downconvert!(
downconverted_signal,
signal::AbstractMatrix,
carrier_replica,
start_sample::Integer,
num_samples::Integer
)
sample_range = start_sample:start_sample + num_samples - 1
downconverted_signal[sample_range,:] .= @view(signal[sample_range,:]) .* conj.(@view(carrier_replica[sample_range]))
downconverted_signal
end

function downconvert!(
downconverted_signal,
signal::AbstractVector,
carrier_replica,
start_sample::Integer,
num_samples::Integer
)
sample_range = start_sample:start_sample + num_samples - 1
downconverted_signal[sample_range,:] .= @view(signal[sample_range]) .* conj.(@view(carrier_replica[sample_range]))
downconverted_signal
end
end
32 changes: 32 additions & 0 deletions src/downconvert_and_correlate.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#=
# This is currently slower than splitting the loop.
# See https://github.com/JuliaSIMD/LoopVectorization.jl/issues/284
function downconvert_and_correlate(
signal::StructArray{Complex{T}},
correlator::C,
code,
correlator_sample_shifts,
carrier_frequency,
sampling_frequency,
start_phase,
start_sample,
num_samples
) where {T, C <: AbstractCorrelator}
s_re = signal.re; s_im = signal.im
accumulators = zero_accumulators(get_accumulators(correlator), signal)
a_re = real.(accumulators)
a_im = imag.(accumulators)
@avx for i = start_sample:start_sample + num_samples - 1
c_im, c_re = sincos(T(2π) * ((i - start_sample) * T(upreferred(carrier_frequency / Hz)) / T(upreferred(sampling_frequency / Hz)) + T(start_phase)))
d_re = s_re[i] * c_re + s_im[i] * c_im
d_im = s_im[i] * c_re - s_re[i] * c_im
for j = 1:length(a_re)
sample_shift = correlator_sample_shifts[j] - correlator_sample_shifts[1]
a_re[j] += d_re * code[i + sample_shift]
a_im[j] += d_im * code[i + sample_shift]
end
end
accumulators_result = complex.(a_re, a_im)
C(map(+, get_accumulators(correlator), accumulators_result))
end
=#
7 changes: 3 additions & 4 deletions src/tracking_loop.jl
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,6 @@ function track(
if num_samples_left == num_samples_left_to_integrate &&
integration_time >= min_integration_time
got_correlator = true

correlator = normalize(correlator, integrated_samples)
valid_correlator = correlator
valid_correlator_carrier_phase = carrier_phase
Expand Down Expand Up @@ -280,13 +279,13 @@ end
function choose(replica::CarrierReplicaCPU, signal::AbstractArray{Complex{Float64}})
replica.carrier_f64
end
function choose(replica::CarrierReplicaCPU, signal::AbstractArray{Complex{Float32}})
function choose(replica::CarrierReplicaCPU, signal::AbstractArray{Complex{T}}) where T <: Number
replica.carrier_f32
end
function choose(replica::DownconvertedSignalCPU, signal::AbstractArray{Complex{Float64}})
replica.downconverted_signal_f64
end
function choose(replica::DownconvertedSignalCPU, signal::AbstractArray{Complex{Float32}})
function choose(replica::DownconvertedSignalCPU, signal::AbstractArray{Complex{T}}) where T <: Number
replica.downconverted_signal_f32
end

Expand Down Expand Up @@ -411,7 +410,7 @@ function resize!(ds::DownconvertedSignalCPU, b::Integer, signal::AbstractMatrix{
)
end

function resize!(ds::DownconvertedSignalCPU, b::Integer, signal::AbstractMatrix{Complex{Float32}})
function resize!(ds::DownconvertedSignalCPU, b::Integer, signal::AbstractMatrix{Complex{T}}) where T <: Number
num_ants = size(signal, 2)
DownconvertedSignalCPU(
size(ds.downconverted_signal_f32, 1) == b ?
Expand Down
3 changes: 2 additions & 1 deletion test/cn0_estimation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,10 @@ end
prn
)
correlator = EarlyPromptLateCorrelator()
signal_struct = StructArray(signal)
correlator = Tracking.correlate(
correlator,
signal,
signal_struct,
code,
correlator_sample_shifts,
start_sample,
Expand Down
Loading

0 comments on commit fa8814e

Please sign in to comment.