Skip to content

Commit

Permalink
Merge pull request #18 from thomcom/python-initial-code
Browse files Browse the repository at this point in the history
[WIP] Python Initial Unit Testing and Bindings
  • Loading branch information
harrism authored Sep 5, 2019
2 parents 8bcbff3 + 25f8fdd commit 46f7c21
Show file tree
Hide file tree
Showing 20 changed files with 681 additions and 211 deletions.
4 changes: 2 additions & 2 deletions python/cuspatial/cuspatial/bindings/soa_readers.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from cudf.dataframe.column import Column
from cudf.bindings.cudf_cpp import *
from cudf.core.column import Column
from cudf.core._lib import *
from libc.stdlib cimport calloc, malloc, free
from libcpp.pair cimport pair

Expand Down
50 changes: 35 additions & 15 deletions python/cuspatial/cuspatial/bindings/spatial.pyx
Original file line number Diff line number Diff line change
@@ -1,20 +1,28 @@
from cudf.bindings.cudf_cpp import *
from cudf.dataframe.column import Column
from cudf._lib.cudf import *
from cudf._lib.cudf cimport *
from cudf.core.column import Column
from cudf import Series
from libcpp.pair cimport pair

from libc.stdlib cimport calloc, malloc, free

cpdef cpp_point_in_polygon_bitmap(points_x, points_y,
poly_fpos, poly_rpos,
poly_x, poly_y):
poly_x, poly_y):
points_x = points_x.astype('float64')._column
points_y = points_y.astype('float64')._column
poly_fpos = poly_fpos.astype('int32')._column
poly_rpos = poly_rpos.astype('int32')._column
poly_x = poly_x.astype('float64')._column
poly_y = poly_y.astype('float64')._column
cdef gdf_column* c_points_x = column_view_from_column(points_x)
cdef gdf_column* c_points_y = column_view_from_column(points_y)

cdef gdf_column* c_poly_fpos = column_view_from_column(poly_fpos)
cdef gdf_column* c_poly_rpos = column_view_from_column(poly_rpos)

cdef gdf_column* c_poly_x = column_view_from_column(poly_x)
cdef gdf_column* c_poly_y = column_view_from_column(poly_y)
cdef gdf_column* c_poly_y = column_view_from_column(poly_y)
cdef gdf_column* result_bitmap = <gdf_column*>malloc(sizeof(gdf_column))

with nogil:
Expand All @@ -28,13 +36,18 @@ cpdef cpp_point_in_polygon_bitmap(points_x, points_y,
free(c_poly_fpos)
free(c_poly_rpos)
free(c_poly_x)
free(c_poly_y)
free(c_poly_y)
free(result_bitmap)
bitmap = Column.from_mem_views(data, mask)
bitmap = Column.from_mem_views(data, mask)

return bitmap

cpdef cpp_haversine_distance(x1,y1,x2,y2):
cpdef cpp_haversine_distance(x1,y1,x2,y2):
x1 = x1.astype('float64')._column
y1 = y1.astype('float64')._column
x2 = x2.astype('float64')._column
y2 = y2.astype('float64')._column

cdef gdf_column* c_x1= column_view_from_column(x1)
cdef gdf_column* c_y1 = column_view_from_column(y1)
cdef gdf_column* c_x2= column_view_from_column(x2)
Expand All @@ -51,11 +64,15 @@ cpdef cpp_haversine_distance(x1,y1,x2,y2):
free(c_x2)
free(c_y2)
free(c_h_dist)
h_dist=Column.from_mem_views(data, mask)
h_dist=Column.from_mem_views(data, mask)

return h_dist
return Series(h_dist)

cpdef cpp_lonlat2coord(cam_lon, cam_lat, in_lon, in_lat):
cam_lon = np.float64(cam_lon)
cam_lat = np.float64(cam_lat)
in_lon = in_lon.astype('float64')._column
in_lat = in_lat.astype('float64')._column
cdef gdf_scalar* c_cam_lon = gdf_scalar_from_scalar(cam_lon)
cdef gdf_scalar* c_cam_lat = gdf_scalar_from_scalar(cam_lat)
cdef gdf_column* c_in_lon = column_view_from_column(in_lon)
Expand All @@ -76,17 +93,20 @@ cpdef cpp_lonlat2coord(cam_lon, cam_lat, in_lon, in_lat):
x=Column.from_mem_views(x_data, x_mask)
y=Column.from_mem_views(y_data, y_mask)

return x,y
return Series(x), Series(y)

cpdef cpp_directed_hausdorff_distance(coor_x,coor_y,cnt):
coor_x = coor_x.astype('float64')._column
coor_y = coor_y.astype('float64')._column
cnt = cnt.astype('int32')._column
cdef gdf_column* c_coor_x = column_view_from_column(coor_x)
cdef gdf_column* c_coor_y = column_view_from_column(coor_y)
cdef gdf_column* c_cnt = column_view_from_column(cnt)
cdef gdf_column* c_dist = <gdf_column*>malloc(sizeof(gdf_column))
with nogil:
c_dist[0]=directed_hausdorff_distance(c_coor_x[0],c_coor_y[0],c_cnt[0])

dist_data, dist_mask = gdf_column_to_column_mem(c_dist)
dist_data, dist_mask = gdf_column_to_column_mem(c_dist)
dist=Column.from_mem_views(dist_data,dist_mask)

return dist
return Series(dist)
12 changes: 8 additions & 4 deletions python/cuspatial/cuspatial/bindings/trajectory.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,21 @@ from libcpp.pair cimport pair

cdef extern from "trajectory.hpp" namespace "cuspatial" nogil:

cdef int derive_trajectories(gdf_column& coor_x, gdf_column& coor_y,
gdf_column& pid, gdf_column& ts,
gdf_column& tid, gdf_column& len,
cdef int derive_trajectories(gdf_column& coor_x,
gdf_column& coor_y,
gdf_column& pid,
gdf_column& ts,
gdf_column& tid,
gdf_column& len,
gdf_column& pos) except +

cdef pair[gdf_column, gdf_column] trajectory_distance_and_speed(
const gdf_column& x,
const gdf_column& y,
const gdf_column& ts,
const gdf_column& len,
const gdf_column& pos) except +
const gdf_column& pos
) except +

cdef void trajectory_spatial_bounds(const gdf_column& x,
const gdf_column& y,
Expand Down
4 changes: 2 additions & 2 deletions python/cuspatial/cuspatial/bindings/trajectory.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from cudf.dataframe.column import Column
from cudf.bindings.cudf_cpp import *
from cudf.core.column import Column
from cudf._lib.cudf import *

from libc.stdlib cimport calloc, malloc, free
from libcpp.pair cimport pair
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import numpy as np
import time
from cudf.dataframe import columnops
from cudf.core import column
import cuspatial.bindings.spatial as gis
from scipy.spatial.distance import directed_hausdorff
from sklearn.cluster import AgglomerativeClustering,DBSCAN
Expand All @@ -26,9 +26,9 @@
py_cnt = []
for traj in in_trajs:
py_cnt.append(len(traj))
pnt_x=columnops.as_column(py_x,dtype=np.float64)
pnt_y=columnops.as_column(py_y,dtype=np.float64)
cnt=columnops.as_column(py_cnt,dtype=np.int32)
pnt_x=column.as_column(py_x,dtype=np.float64)
pnt_y=column.as_column(py_y,dtype=np.float64)
cnt=column.as_column(py_cnt,dtype=np.int32)
distance=gis.cpp_directed_hausdorff_distance(pnt_x,pnt_y,cnt)

num_set=len(cnt)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,94 +16,101 @@
import cuspatial.bindings.spatial as gis
import cuspatial.bindings.soa_readers as readers

data_dir="/home/jianting/trajcode/"
data_set="locust256"
data_dir = "/home/jianting/trajcode/"
data_set = "locust256"

#scipy_res='scipyres.mat'
#cuspatial_res='cuspatialres.mat'
#if(len(sys.argv)>=2):
# scipy_res='scipyres.mat'
# cuspatial_res='cuspatialres.mat'
# if(len(sys.argv)>=2):
# scipy_res=sys.argv[1]
#if(len(sys.argv)>=3):
# if(len(sys.argv)>=3):
# cuspatial_res=sys.argv[2]

if(len(sys.argv)>=2):
data_set=sys.argv[1]
if len(sys.argv) >= 2:
data_set = sys.argv[1]

#reading poing xy coordinate data (relative to a camera origin)
pnt_x,pnt_y=readers.cpp_read_pnt_xy_soa(data_dir+data_set+".coor");
#reading numbers of points in trajectories
cnt=readers.cpp_read_uint_soa(data_dir+data_set+".objcnt")
#reading object(vehicle) id
id=readers.cpp_read_uint_soa(data_dir+data_set+".objectid")
# reading poing xy coordinate data (relative to a camera origin)
pnt_x, pnt_y = readers.cpp_read_pnt_xy_soa(data_dir + data_set + ".coor")
# reading numbers of points in trajectories
cnt = readers.cpp_read_uint_soa(data_dir + data_set + ".objcnt")
# reading object(vehicle) id
id = readers.cpp_read_uint_soa(data_dir + data_set + ".objectid")

num_traj=cnt.data.size
dist0=gis.cpp_directed_hausdorff_distance(pnt_x,pnt_y,cnt)
cuspatial_dist0=dist0.data.to_array().reshape((num_traj,num_traj))
num_traj = cnt.data.size
dist0 = gis.cpp_directed_hausdorff_distance(pnt_x, pnt_y, cnt)
cuspatial_dist0 = dist0.data.to_array().reshape((num_traj, num_traj))

start = time.time()
dist=gis.cpp_directed_hausdorff_distance(pnt_x,pnt_y,cnt)
print("dis.size={} num_traj*num_traj={}".format(dist.data.size,num_traj*num_traj))
dist = gis.cpp_directed_hausdorff_distance(pnt_x, pnt_y, cnt)
print("dis.size={} num_traj*num_traj={}".format(dist.data.size, num_traj * num_traj))
end = time.time()
print(end - start)
print("python Directed Hausdorff distance GPU end-to-end time in ms (end-to-end)={}".format((end - start)*1000))
print(end - start)
print(
"python Directed Hausdorff distance GPU end-to-end time in ms (end-to-end)={}".format(
(end - start) * 1000
)
)

start = time.time()
cuspatial_dist=dist.data.to_array().reshape((num_traj,num_traj))
cuspatial_dist = dist.data.to_array().reshape((num_traj, num_traj))
print("num_traj={}".format(num_traj))
print("cuspatial_dist[0[1]={}".format(cuspatial_dist[0][1]))

#with open(cuspatial_res, 'wb') as f:
# with open(cuspatial_res, 'wb') as f:
# pickle.dump(cuspatial_dist, f)

mis_match=0
mis_match = 0
for i in range(num_traj):
for j in range(num_traj):
if(abs(cuspatial_dist0[i][j]-cuspatial_dist[i][j])>0.00001):
mis_match=mis_match+1
print('mis_match between two rounds ={}'.format(mis_match))
for j in range(num_traj):
if abs(cuspatial_dist0[i][j] - cuspatial_dist[i][j]) > 0.00001:
mis_match = mis_match + 1
print("mis_match between two rounds ={}".format(mis_match))


x=pnt_x.data.to_array()
y=pnt_y.data.to_array()
n=cnt.data.to_array()
end = time.time()
print("data conversion time={}".format((end - start)*1000))
x = pnt_x.data.to_array()
y = pnt_y.data.to_array()
n = cnt.data.to_array()
end = time.time()
print("data conversion time={}".format((end - start) * 1000))

start = time.time()
trajs=[]
c=0
trajs = []
c = 0
for i in range(num_traj):
traj=np.zeros((n[i],2),dtype=np.float64)
for j in range(n[i]):
traj[j][0]=x[c+j]
traj[j][1]=y[c+j]
trajs.append(traj.reshape(-1,2))
c=c+n[i]
#print('c={}'.format(c))
end=time.time()
print("CPU traj prep time={}".format((end - start)*1000))
#print("trajs[0]")
#print(trajs[0])

mis_match=0
d=np.zeros((num_traj,num_traj), dtype=np.float64)
traj = np.zeros((n[i], 2), dtype=np.float64)
for j in range(n[i]):
traj[j][0] = x[c + j]
traj[j][1] = y[c + j]
trajs.append(traj.reshape(-1, 2))
c = c + n[i]
# print('c={}'.format(c))
end = time.time()
print("CPU traj prep time={}".format((end - start) * 1000))
# print("trajs[0]")
# print(trajs[0])

mis_match = 0
d = np.zeros((num_traj, num_traj), dtype=np.float64)
for i in range(num_traj):
if(i%100==99):
print("i={}".format(i))
for j in range(num_traj):
dij=directed_hausdorff(trajs[i],trajs[j])
d[i][j]=dij[0]
if(abs(d[i][j]-cuspatial_dist[i][j])>0.00001):
print('{} {} {} {}'.format(i,j,d[i][j],cuspatial_dist[i][j]))
mis_match=mis_match+1
print('mis_match={}'.format(mis_match))
end = time.time()
print("python Directed Hausdorff distance cpu end-to-end time in ms (end-to-end)={}".format((end - start)*1000))

#for val in d[0]:
if i % 100 == 99:
print("i={}".format(i))
for j in range(num_traj):
dij = directed_hausdorff(trajs[i], trajs[j])
d[i][j] = dij[0]
if abs(d[i][j] - cuspatial_dist[i][j]) > 0.00001:
print("{} {} {} {}".format(i, j, d[i][j], cuspatial_dist[i][j]))
mis_match = mis_match + 1
print("mis_match={}".format(mis_match))
end = time.time()
print(
"python Directed Hausdorff distance cpu end-to-end time in ms (end-to-end)={}".format(
(end - start) * 1000
)
)

# for val in d[0]:
# print('{}'.format(val))
#print
# print

#with open(scipy_res, 'wb') as f:
# with open(scipy_res, 'wb') as f:
# pickle.dump(d, f)

22 changes: 11 additions & 11 deletions python/cuspatial/cuspatial/demos/haversine_distance_test_nyctaxi.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
import time
import cudf
from cudf.dataframe import columnops
from cudf.core import column
import cuspatial.bindings.spatial as gis

start = time.time()
#data dowloaded from https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-01.csv
# data dowloaded from https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-01.csv
df = cudf.read_csv("/home/jianting/hardbd19/data/nyctaxi/yellow_tripdata_2009-01.csv")
end = time.time()
print("data ingesting time (from SSD) in ms={}".format((end - start)*1000))
print("data ingesting time (from SSD) in ms={}".format((end - start) * 1000))
df.head().to_pandas().columns

start = time.time()
x1=columnops.as_column(df['Start_Lon'])
y1=columnops.as_column(df['Start_Lat'])
x2=columnops.as_column(df['End_Lon'])
y2=columnops.as_column(df['End_Lat'])
x1 = column.as_column(df["Start_Lon"])
y1 = column.as_column(df["Start_Lat"])
x2 = column.as_column(df["End_Lon"])
y2 = column.as_column(df["End_Lat"])
end = time.time()
print("data frame to gdf column conversion time in ms={}".format((end - start)*1000))
print("data frame to gdf column conversion time in ms={}".format((end - start) * 1000))

start = time.time()
h_dist=gis.cpp_haversine_distance(x1,y1,x2,y1)
h_dist = gis.cpp_haversine_distance(x1, y1, x2, y1)
end = time.time()
print("python computing distance time in ms={}".format((end - start)*1000))
#h_dist.data.to_array()
print("python computing distance time in ms={}".format((end - start) * 1000))
# h_dist.data.to_array()
Loading

0 comments on commit 46f7c21

Please sign in to comment.