Skip to content

Commit

Permalink
Merge pull request #137 from szagoruyko/smoothl1
Browse files Browse the repository at this point in the history
SmoothL1 criterion
  • Loading branch information
soumith committed Sep 23, 2015
2 parents bf25188 + bcf9262 commit d7af719
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 0 deletions.
129 changes: 129 additions & 0 deletions SmoothL1Criterion.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#include "utils.h"

#include <thrust/fill.h>
#include <thrust/functional.h>
#include <thrust/device_ptr.h>
#include <thrust/reduce.h>
#include <thrust/inner_product.h>
#if CUDA_VERSION >= 7000
#include <thrust/system/cuda/execution_policy.h>
#endif

struct smoothl1_functor
{
smoothl1_functor() {}

__host__ __device__ float operator()(const float& x, const float& y) const
{
float z = fabsf(x-y);
return z < 1.f ? 0.5f*z*z : z - 0.5f;
}
};


static int cunn_SmoothL1Criterion_updateOutput(lua_State *L)
{
THCState *state = getCutorchState(L);
THCudaTensor *input = (THCudaTensor*)luaT_checkudata(L, 2, "torch.CudaTensor");
THCudaTensor *target = (THCudaTensor*)luaT_checkudata(L, 3, "torch.CudaTensor");
THAssert(THCudaTensor_checkGPU(state, 2, input, target));

int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
luaL_argcheck(L, THCudaTensor_nElement(state, input) == THCudaTensor_nElement(state, target), 2,
"input and target need to have the same number of elements");

float sum;

long size = THCudaTensor_nElement(state, input);

input = THCudaTensor_newContiguous(state, input);
target = THCudaTensor_newContiguous(state, target);

thrust::device_ptr<float> input_data(THCudaTensor_data(state, input));
thrust::device_ptr<float> target_data(THCudaTensor_data(state, target));
sum = thrust::inner_product(
#if CUDA_VERSION >= 7000
thrust::cuda::par.on(THCState_getCurrentStream(state)),
#endif
input_data, input_data+size, target_data, (float) 0,
thrust::plus<float>(), smoothl1_functor());

if(sizeAverage)
sum /= size;

THCudaTensor_free(state, input);
THCudaTensor_free(state, target);

lua_pushnumber(L, sum);
lua_setfield(L, 1, "output");

lua_pushnumber(L, sum);
return 1;
}


struct smoothl1_updateGradInput_functor
{
const float norm;

smoothl1_updateGradInput_functor(float norm_) : norm(norm_) {}

__host__ __device__ float operator()(const float& x, const float& y) const
{
float z = x - y;
if(z < -1.f)
return -norm;
else if(z > 1.f)
return norm;
else
return norm * z;
}
};

static int cunn_SmoothL1Criterion_updateGradInput(lua_State *L)
{
THCState *state = getCutorchState(L);
THCudaTensor *input = (THCudaTensor*)luaT_checkudata(L, 2, "torch.CudaTensor");
THCudaTensor *target = (THCudaTensor*)luaT_checkudata(L, 3, "torch.CudaTensor");
int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
THCudaTensor *gradInput = (THCudaTensor*)luaT_getfieldcheckudata(L, 1, "gradInput", "torch.CudaTensor");
luaL_argcheck(L, THCudaTensor_nElement(state, input) == THCudaTensor_nElement(state, target), 2,
"input and target need to have the same number of elements");
THAssert(THCudaTensor_checkGPU(state, 3, input, target, gradInput));

long size = THCudaTensor_nElement(state, input);
float norm = (sizeAverage ? 1./size : 1.);

input = THCudaTensor_newContiguous(state, input);
target = THCudaTensor_newContiguous(state, target);

THCudaTensor_resizeAs(state, gradInput, input);

thrust::device_ptr<float> input_data(THCudaTensor_data(state, input));
thrust::device_ptr<float> target_data(THCudaTensor_data(state, target));
thrust::device_ptr<float> gradInput_data(THCudaTensor_data(state, gradInput));

thrust::transform(
#if CUDA_VERSION >= 7000
thrust::cuda::par.on(THCState_getCurrentStream(state)),
#endif
input_data, input_data+size, target_data, gradInput_data,
smoothl1_updateGradInput_functor(norm));

THCudaTensor_free(state, input);
THCudaTensor_free(state, target);
return 1;
}

static const struct luaL_Reg cunn_SmoothL1Criterion__ [] = {
{"SmoothL1Criterion_updateOutput", cunn_SmoothL1Criterion_updateOutput},
{"SmoothL1Criterion_updateGradInput", cunn_SmoothL1Criterion_updateGradInput},
{NULL, NULL}
};

void cunn_SmoothL1Criterion_init(lua_State *L)
{
luaT_pushmetatable(L, "torch.CudaTensor");
luaT_registeratname(L, cunn_SmoothL1Criterion__, "nn");
lua_pop(L,1);
}
1 change: 1 addition & 0 deletions init.cu
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ int luaopen_libcunn(lua_State *L)
cunn_Sqrt_init(L);
cunn_Threshold_init(L);
cunn_MSECriterion_init(L);
cunn_SmoothL1Criterion_init(L);
cunn_AbsCriterion_init(L);
cunn_DistKLDivCriterion_init(L);
cunn_Abs_init(L);
Expand Down
31 changes: 31 additions & 0 deletions test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -2279,6 +2279,37 @@ function cunntest.mse()
end
end

function cunntest.SmoothL1()
for sizeAverage = 0, 1 do
local size = math.random(3000,5000)
local input = torch.randn(size,1,1)
local target = torch.randn(size)
local mod = nn.SmoothL1Criterion(sizeAverage == 1)

local tm = {}
local title = string.format('SmoothL1Criterion sizeAverage %d, %d ', sizeAverage, size)
times[title] = tm

local a = torch.Timer()
local fout = mod:forward(input,target)
local fgin = mod:backward(input,target):clone()
tm.cpu = a:time().real

local cinput = input:cuda()
local ctarget = target:cuda()
local cmod = nn.SmoothL1Criterion(sizeAverage == 1):cuda()
a:reset()
local cout = cmod:forward(cinput,ctarget)
local cgin = cmod:backward(cinput,ctarget)
cutorch.synchronize()
tm.gpu = a:time().real

mytester:assertlt(math.abs(fout-cout), precision_forward, 'error on output')
local gerr = cgin:float() - fgin
mytester:assertlt(gerr:abs():max(), precision_forward, 'error on gradInput')
end
end

function cunntest.distkldiv()
for sizeAverage = 0, 1 do
local size = math.random(3000,5000)
Expand Down
1 change: 1 addition & 0 deletions utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ void cunn_Square_init(lua_State *L);
void cunn_Sqrt_init(lua_State *L);
void cunn_Threshold_init(lua_State *L);
void cunn_MSECriterion_init(lua_State *L);
void cunn_SmoothL1Criterion_init(lua_State *L);
void cunn_AbsCriterion_init(lua_State *L);
void cunn_DistKLDivCriterion_init(lua_State *L);
void cunn_Abs_init(lua_State *L);
Expand Down

0 comments on commit d7af719

Please sign in to comment.