-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathparsers.js
74 lines (74 loc) · 2.46 KB
/
parsers.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import { parseDocument } from 'htmlparser2';
import { getWSIBClassificationFromNAICSCode } from './wsibClassifications.js';
export function stripHTML(rawHTMLString) {
const cleanString = (rawHTMLString ?? '').trim();
if (cleanString.startsWith('<')) {
const rawNode = parseDocument(cleanString);
return rawNode.firstChild.children[0].data;
}
return cleanString;
}
export function parseNAICS(rawHTMLString) {
const naicsCodes = [];
const rawNode = parseDocument(rawHTMLString.trim());
for (const child of rawNode.childNodes) {
if (child.type !== 'tag') {
continue;
}
const rawText = child.children[0].data.trim();
if (rawText.includes(':')) {
const naicsCode = {
code: rawText.slice(0, Math.max(0, rawText.indexOf(':'))).trim(),
codeDescription: rawText
.slice(Math.max(0, rawText.indexOf(':') + 1))
.trim()
};
const classification = getWSIBClassificationFromNAICSCode(naicsCode.code);
if (classification !== undefined) {
Object.assign(naicsCode, classification);
}
naicsCodes.push(naicsCode);
}
}
return naicsCodes;
}
const validityPeriodDateRegexp = /^\d+-[A-Z][a-z]{2}-\d{4}$/;
const validityPeriodMonthStrings = [
'Jan',
'Feb',
'Mar',
'Apr',
'May',
'Jun',
'Jul',
'Aug',
'Sep',
'Oct',
'Nov',
'Dec'
];
function parseValidityPeriodDate(rawDateString) {
const datePieces = rawDateString.split('-');
return new Date(Number.parseInt(datePieces[2], 10), validityPeriodMonthStrings.indexOf(datePieces[1]), Number.parseInt(datePieces[0], 10));
}
export function parseValidityPeriod(rawHTMLString) {
const validityPeriod = {};
const validityPeriodSplit = rawHTMLString.split(' ');
for (const validityPeriodPiece of validityPeriodSplit) {
const validityPeriodPieceTrim = validityPeriodPiece.trim();
if (validityPeriodPieceTrim === '') {
continue;
}
if (validityPeriodDateRegexp.test(validityPeriodPieceTrim)) {
const periodDate = parseValidityPeriodDate(validityPeriodPieceTrim);
if (validityPeriod.start) {
validityPeriod.end = periodDate;
break;
}
else {
validityPeriod.start = periodDate;
}
}
}
return validityPeriod;
}