-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
28 lines (22 loc) · 805 Bytes
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# Copyright (c) Microsoft Corporation.
# Copyright (c) 2021 HongChien Yu
# Licensed under the MIT license.
import json
## read json file into list
def load_data(data_file, isTrain):
data = list()
with open(data_file) as f:
for line in f.readlines():
ex = json.loads(line)
data.append(ex)
return data
def truncate_input_sequence(tokens_a, tokens_b, max_num_tokens):
while True:
total_length = len(tokens_a) if tokens_b is None else len(tokens_a) + len(tokens_b)
if total_length <= max_num_tokens:
break
trunc_tokens = tokens_a if (tokens_b is None or len(tokens_a) > len(tokens_b)) else tokens_b
if trunc_tokens[-1] == "[SEP]":
del trunc_tokens[-2]
else:
trunc_tokens.pop()