Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Python Initial Unit Testing and Bindings #18

Merged
merged 8 commits into from
Sep 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions python/cuspatial/cuspatial/bindings/soa_readers.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from cudf.dataframe.column import Column
from cudf.bindings.cudf_cpp import *
from cudf.core.column import Column
from cudf.core._lib import *
from libc.stdlib cimport calloc, malloc, free
from libcpp.pair cimport pair

Expand Down
50 changes: 35 additions & 15 deletions python/cuspatial/cuspatial/bindings/spatial.pyx
Original file line number Diff line number Diff line change
@@ -1,20 +1,28 @@
from cudf.bindings.cudf_cpp import *
from cudf.dataframe.column import Column
from cudf._lib.cudf import *
from cudf._lib.cudf cimport *
from cudf.core.column import Column
from cudf import Series
from libcpp.pair cimport pair

from libc.stdlib cimport calloc, malloc, free

cpdef cpp_point_in_polygon_bitmap(points_x, points_y,
poly_fpos, poly_rpos,
poly_x, poly_y):
poly_x, poly_y):
points_x = points_x.astype('float64')._column
points_y = points_y.astype('float64')._column
poly_fpos = poly_fpos.astype('int32')._column
poly_rpos = poly_rpos.astype('int32')._column
poly_x = poly_x.astype('float64')._column
poly_y = poly_y.astype('float64')._column
cdef gdf_column* c_points_x = column_view_from_column(points_x)
cdef gdf_column* c_points_y = column_view_from_column(points_y)

cdef gdf_column* c_poly_fpos = column_view_from_column(poly_fpos)
cdef gdf_column* c_poly_rpos = column_view_from_column(poly_rpos)

cdef gdf_column* c_poly_x = column_view_from_column(poly_x)
cdef gdf_column* c_poly_y = column_view_from_column(poly_y)
cdef gdf_column* c_poly_y = column_view_from_column(poly_y)
cdef gdf_column* result_bitmap = <gdf_column*>malloc(sizeof(gdf_column))

with nogil:
Expand All @@ -28,13 +36,18 @@ cpdef cpp_point_in_polygon_bitmap(points_x, points_y,
free(c_poly_fpos)
free(c_poly_rpos)
free(c_poly_x)
free(c_poly_y)
free(c_poly_y)
free(result_bitmap)
bitmap = Column.from_mem_views(data, mask)
bitmap = Column.from_mem_views(data, mask)

return bitmap

cpdef cpp_haversine_distance(x1,y1,x2,y2):
cpdef cpp_haversine_distance(x1,y1,x2,y2):
x1 = x1.astype('float64')._column
y1 = y1.astype('float64')._column
x2 = x2.astype('float64')._column
y2 = y2.astype('float64')._column

cdef gdf_column* c_x1= column_view_from_column(x1)
cdef gdf_column* c_y1 = column_view_from_column(y1)
cdef gdf_column* c_x2= column_view_from_column(x2)
Expand All @@ -51,11 +64,15 @@ cpdef cpp_haversine_distance(x1,y1,x2,y2):
free(c_x2)
free(c_y2)
free(c_h_dist)
h_dist=Column.from_mem_views(data, mask)
h_dist=Column.from_mem_views(data, mask)

return h_dist
return Series(h_dist)

cpdef cpp_lonlat2coord(cam_lon, cam_lat, in_lon, in_lat):
cam_lon = np.float64(cam_lon)
cam_lat = np.float64(cam_lat)
in_lon = in_lon.astype('float64')._column
in_lat = in_lat.astype('float64')._column
cdef gdf_scalar* c_cam_lon = gdf_scalar_from_scalar(cam_lon)
cdef gdf_scalar* c_cam_lat = gdf_scalar_from_scalar(cam_lat)
cdef gdf_column* c_in_lon = column_view_from_column(in_lon)
Expand All @@ -76,17 +93,20 @@ cpdef cpp_lonlat2coord(cam_lon, cam_lat, in_lon, in_lat):
x=Column.from_mem_views(x_data, x_mask)
y=Column.from_mem_views(y_data, y_mask)

return x,y
return Series(x), Series(y)

cpdef cpp_directed_hausdorff_distance(coor_x,coor_y,cnt):
coor_x = coor_x.astype('float64')._column
coor_y = coor_y.astype('float64')._column
cnt = cnt.astype('int32')._column
cdef gdf_column* c_coor_x = column_view_from_column(coor_x)
cdef gdf_column* c_coor_y = column_view_from_column(coor_y)
cdef gdf_column* c_cnt = column_view_from_column(cnt)
cdef gdf_column* c_dist = <gdf_column*>malloc(sizeof(gdf_column))
with nogil:
c_dist[0]=directed_hausdorff_distance(c_coor_x[0],c_coor_y[0],c_cnt[0])

dist_data, dist_mask = gdf_column_to_column_mem(c_dist)
dist_data, dist_mask = gdf_column_to_column_mem(c_dist)
dist=Column.from_mem_views(dist_data,dist_mask)

return dist
return Series(dist)
12 changes: 8 additions & 4 deletions python/cuspatial/cuspatial/bindings/trajectory.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,21 @@ from libcpp.pair cimport pair

cdef extern from "trajectory.hpp" namespace "cuspatial" nogil:

cdef int derive_trajectories(gdf_column& coor_x, gdf_column& coor_y,
gdf_column& pid, gdf_column& ts,
gdf_column& tid, gdf_column& len,
cdef int derive_trajectories(gdf_column& coor_x,
gdf_column& coor_y,
gdf_column& pid,
gdf_column& ts,
gdf_column& tid,
gdf_column& len,
gdf_column& pos) except +

cdef pair[gdf_column, gdf_column] trajectory_distance_and_speed(
const gdf_column& x,
const gdf_column& y,
const gdf_column& ts,
const gdf_column& len,
const gdf_column& pos) except +
const gdf_column& pos
) except +

cdef void trajectory_spatial_bounds(const gdf_column& x,
const gdf_column& y,
Expand Down
4 changes: 2 additions & 2 deletions python/cuspatial/cuspatial/bindings/trajectory.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from cudf.dataframe.column import Column
from cudf.bindings.cudf_cpp import *
from cudf.core.column import Column
from cudf._lib.cudf import *

from libc.stdlib cimport calloc, malloc, free
from libcpp.pair cimport pair
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import numpy as np
import time
from cudf.dataframe import columnops
from cudf.core import column
import cuspatial.bindings.spatial as gis
from scipy.spatial.distance import directed_hausdorff
from sklearn.cluster import AgglomerativeClustering,DBSCAN
Expand All @@ -26,9 +26,9 @@
py_cnt = []
for traj in in_trajs:
py_cnt.append(len(traj))
pnt_x=columnops.as_column(py_x,dtype=np.float64)
pnt_y=columnops.as_column(py_y,dtype=np.float64)
cnt=columnops.as_column(py_cnt,dtype=np.int32)
pnt_x=column.as_column(py_x,dtype=np.float64)
pnt_y=column.as_column(py_y,dtype=np.float64)
cnt=column.as_column(py_cnt,dtype=np.int32)
distance=gis.cpp_directed_hausdorff_distance(pnt_x,pnt_y,cnt)

num_set=len(cnt)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,94 +16,101 @@
import cuspatial.bindings.spatial as gis
import cuspatial.bindings.soa_readers as readers

data_dir="/home/jianting/trajcode/"
data_set="locust256"
data_dir = "/home/jianting/trajcode/"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's remove personal home directories from our released code. We need to figure out a dataset that's small enough (a few kb) to ship with cuspatial for testing.

data_set = "locust256"

#scipy_res='scipyres.mat'
#cuspatial_res='cuspatialres.mat'
#if(len(sys.argv)>=2):
# scipy_res='scipyres.mat'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Delete commented out code?

# cuspatial_res='cuspatialres.mat'
# if(len(sys.argv)>=2):
# scipy_res=sys.argv[1]
#if(len(sys.argv)>=3):
# if(len(sys.argv)>=3):
# cuspatial_res=sys.argv[2]

if(len(sys.argv)>=2):
data_set=sys.argv[1]
if len(sys.argv) >= 2:
data_set = sys.argv[1]

#reading poing xy coordinate data (relative to a camera origin)
pnt_x,pnt_y=readers.cpp_read_pnt_xy_soa(data_dir+data_set+".coor");
#reading numbers of points in trajectories
cnt=readers.cpp_read_uint_soa(data_dir+data_set+".objcnt")
#reading object(vehicle) id
id=readers.cpp_read_uint_soa(data_dir+data_set+".objectid")
# reading poing xy coordinate data (relative to a camera origin)
pnt_x, pnt_y = readers.cpp_read_pnt_xy_soa(data_dir + data_set + ".coor")
# reading numbers of points in trajectories
cnt = readers.cpp_read_uint_soa(data_dir + data_set + ".objcnt")
# reading object(vehicle) id
id = readers.cpp_read_uint_soa(data_dir + data_set + ".objectid")

num_traj=cnt.data.size
dist0=gis.cpp_directed_hausdorff_distance(pnt_x,pnt_y,cnt)
cuspatial_dist0=dist0.data.to_array().reshape((num_traj,num_traj))
num_traj = cnt.data.size
dist0 = gis.cpp_directed_hausdorff_distance(pnt_x, pnt_y, cnt)
cuspatial_dist0 = dist0.data.to_array().reshape((num_traj, num_traj))

start = time.time()
dist=gis.cpp_directed_hausdorff_distance(pnt_x,pnt_y,cnt)
print("dis.size={} num_traj*num_traj={}".format(dist.data.size,num_traj*num_traj))
dist = gis.cpp_directed_hausdorff_distance(pnt_x, pnt_y, cnt)
print("dis.size={} num_traj*num_traj={}".format(dist.data.size, num_traj * num_traj))
end = time.time()
print(end - start)
print("python Directed Hausdorff distance GPU end-to-end time in ms (end-to-end)={}".format((end - start)*1000))
print(end - start)
print(
"python Directed Hausdorff distance GPU end-to-end time in ms (end-to-end)={}".format(
(end - start) * 1000
)
)

start = time.time()
cuspatial_dist=dist.data.to_array().reshape((num_traj,num_traj))
cuspatial_dist = dist.data.to_array().reshape((num_traj, num_traj))
print("num_traj={}".format(num_traj))
print("cuspatial_dist[0[1]={}".format(cuspatial_dist[0][1]))

#with open(cuspatial_res, 'wb') as f:
# with open(cuspatial_res, 'wb') as f:
# pickle.dump(cuspatial_dist, f)

mis_match=0
mis_match = 0
for i in range(num_traj):
for j in range(num_traj):
if(abs(cuspatial_dist0[i][j]-cuspatial_dist[i][j])>0.00001):
mis_match=mis_match+1
print('mis_match between two rounds ={}'.format(mis_match))
for j in range(num_traj):
if abs(cuspatial_dist0[i][j] - cuspatial_dist[i][j]) > 0.00001:
mis_match = mis_match + 1
print("mis_match between two rounds ={}".format(mis_match))


x=pnt_x.data.to_array()
y=pnt_y.data.to_array()
n=cnt.data.to_array()
end = time.time()
print("data conversion time={}".format((end - start)*1000))
x = pnt_x.data.to_array()
y = pnt_y.data.to_array()
n = cnt.data.to_array()
end = time.time()
print("data conversion time={}".format((end - start) * 1000))

start = time.time()
trajs=[]
c=0
trajs = []
c = 0
for i in range(num_traj):
traj=np.zeros((n[i],2),dtype=np.float64)
for j in range(n[i]):
traj[j][0]=x[c+j]
traj[j][1]=y[c+j]
trajs.append(traj.reshape(-1,2))
c=c+n[i]
#print('c={}'.format(c))
end=time.time()
print("CPU traj prep time={}".format((end - start)*1000))
#print("trajs[0]")
#print(trajs[0])

mis_match=0
d=np.zeros((num_traj,num_traj), dtype=np.float64)
traj = np.zeros((n[i], 2), dtype=np.float64)
for j in range(n[i]):
traj[j][0] = x[c + j]
traj[j][1] = y[c + j]
trajs.append(traj.reshape(-1, 2))
c = c + n[i]
# print('c={}'.format(c))
end = time.time()
print("CPU traj prep time={}".format((end - start) * 1000))
# print("trajs[0]")
# print(trajs[0])

mis_match = 0
d = np.zeros((num_traj, num_traj), dtype=np.float64)
for i in range(num_traj):
if(i%100==99):
print("i={}".format(i))
for j in range(num_traj):
dij=directed_hausdorff(trajs[i],trajs[j])
d[i][j]=dij[0]
if(abs(d[i][j]-cuspatial_dist[i][j])>0.00001):
print('{} {} {} {}'.format(i,j,d[i][j],cuspatial_dist[i][j]))
mis_match=mis_match+1
print('mis_match={}'.format(mis_match))
end = time.time()
print("python Directed Hausdorff distance cpu end-to-end time in ms (end-to-end)={}".format((end - start)*1000))

#for val in d[0]:
if i % 100 == 99:
print("i={}".format(i))
for j in range(num_traj):
dij = directed_hausdorff(trajs[i], trajs[j])
d[i][j] = dij[0]
if abs(d[i][j] - cuspatial_dist[i][j]) > 0.00001:
print("{} {} {} {}".format(i, j, d[i][j], cuspatial_dist[i][j]))
mis_match = mis_match + 1
print("mis_match={}".format(mis_match))
end = time.time()
print(
"python Directed Hausdorff distance cpu end-to-end time in ms (end-to-end)={}".format(
(end - start) * 1000
)
)

# for val in d[0]:
# print('{}'.format(val))
#print
# print

#with open(scipy_res, 'wb') as f:
# with open(scipy_res, 'wb') as f:
# pickle.dump(d, f)

Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
import time
import cudf
from cudf.dataframe import columnops
from cudf.core import column
import cuspatial.bindings.spatial as gis

start = time.time()
#data dowloaded from https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-01.csv
# data dowloaded from https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-01.csv
df = cudf.read_csv("/home/jianting/hardbd19/data/nyctaxi/yellow_tripdata_2009-01.csv")
end = time.time()
print("data ingesting time (from SSD) in ms={}".format((end - start)*1000))
print("data ingesting time (from SSD) in ms={}".format((end - start) * 1000))
df.head().to_pandas().columns

start = time.time()
x1=columnops.as_column(df['Start_Lon'])
y1=columnops.as_column(df['Start_Lat'])
x2=columnops.as_column(df['End_Lon'])
y2=columnops.as_column(df['End_Lat'])
x1 = column.as_column(df["Start_Lon"])
y1 = column.as_column(df["Start_Lat"])
x2 = column.as_column(df["End_Lon"])
y2 = column.as_column(df["End_Lat"])
end = time.time()
print("data frame to gdf column conversion time in ms={}".format((end - start)*1000))
print("data frame to gdf column conversion time in ms={}".format((end - start) * 1000))

start = time.time()
h_dist=gis.cpp_haversine_distance(x1,y1,x2,y1)
h_dist = gis.cpp_haversine_distance(x1, y1, x2, y1)
end = time.time()
print("python computing distance time in ms={}".format((end - start)*1000))
#h_dist.data.to_array()
print("python computing distance time in ms={}".format((end - start) * 1000))
# h_dist.data.to_array()
Loading