-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathproplabel.py
80 lines (63 loc) · 1.69 KB
/
proplabel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import numpy as np
import pandas as pd
# def propagate_label(df, n):
# labels = df["label"].copy()
# for i in range(len(df)):
# if labels[i] == 1:
# start = max(0, i - n)
# labels[start:i] = 1
# df["label"] = labels
# return df
def propagate_label(df, n):
labels = df["label"].copy().values
for i in range(len(labels)):
if labels[i] == 1:
start = max(0, i - n)
labels[start:i] = 1
df["label"] = labels
return df
def propagate_label_np(df, n):
labels = df["label"].copy().values
ones = np.where(labels == 1)[0]
starts = np.maximum.accumulate(np.maximum(ones - n, 0))
mask = np.zeros_like(labels)
for i, start in enumerate(starts):
mask[start : ones[i]] = 1
labels[mask.astype(bool)] = 1
df["label"] = labels
return df
def test_propagate_label():
df = pd.DataFrame(
{"label": [0, 1, 0, 0, 0, 1, 0, 0, 0, 1]},
)
print(df)
expected = pd.DataFrame(
{"label": [1, 1, 0, 1, 1, 1, 0, 1, 1, 1]},
)
result = propagate_label_np(df, 2)
print(result)
print("expect")
print(expected)
pd.testing.assert_frame_equal(result, expected)
def test_speed():
import time
df = pd.DataFrame(
{"label": [0, 1, 0, 0, 0, 1, 0, 0, 0, 1]},
)
n = 1000
total = 0
for _ in range(n):
t = time.time()
propagate_label(df, 2)
total += time.time() - t
pytime = total / n
n = 1000
total = 0
for _ in range(n):
t = time.time()
propagate_label_np(df, 2)
total += time.time() - t
nptime = total / n
print(nptime < pytime)
# test_propagate_label()
test_speed()