This repository has been archived by the owner on Aug 24, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
/
mapTransformations.js
117 lines (95 loc) · 3.25 KB
/
mapTransformations.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
/*
This file conforms to ESLint linting rules: http://eslint.org/docs/user-guide/command-line-interface.
ESLine configuration is below. Here is what the numbers mean:
0 - turn the rule off
1 - turn the rule on as a warning (doesn't affect exit code)
2 - turn the rule on as an error (exit code is 1 when triggered)
*/
/* eslint-env node */
/* eslint new-cap: 0 */
/* eslint no-trailing-spaces: [2, { "skipBlankLines": true }] */
'use strict'
module.exports = (function createTransformations () {
var moment = require('moment')
var findExtraWhitespacesRegex = new RegExp('\\s\\s+', 'gm')
return {
TrimTransformation: trim,
ParseDateTransformation: parseDate,
RemoveExtraWhitespaceTransformation: removeExtraWhitespace,
TextExtractionBetterWhitespaceTransformation: textExtractionBetterWhitespace
}
function trim (node, config) {
var textContent
if (typeof node === 'string') {
textContent = node
} else {
textContent = node.textContent
}
return textContent.trim()
}
function parseDate (node, config) {
var textContent
if (typeof node === 'string') {
textContent = node
} else {
textContent = node.textContent
}
var extractedDate = moment(new Date(textContent))
if (extractedDate.isValid()) {
if (config && config._format) {
return extractedDate.format(config._format)
} else {
return extractedDate.format()
}
} else {
return undefined
}
}
function removeExtraWhitespace (node, config) {
var textContent
if (typeof node === 'string') {
textContent = node
} else {
textContent = node.textContent
}
return textContent.replace(findExtraWhitespacesRegex, ' ')
}
function textExtractionBetterWhitespace (node, config) {
var textContent
if (typeof node === 'string') {
textContent = node
} else {
textContent = extractTextContent(node)
}
return textContent
}
// Code modified from getTextContent in xmldom/dom.js - https://github.com/jindw/xmldom
// Node number documentation from https://developer.mozilla.org/en-US/docs/Web/API/Node/nodeType
function extractTextContent (node) {
var buf = []
var textContent
switch (node.nodeType) {
case 1: // Element node such as <p> or <div>
case 11: // A DocumentFragment node ( a minimal document object that has no parent)
node = node.firstChild
while (node) {
// Type 7 = a ProcessingInstruction of an XML document such as <?xml-stylesheet ... ?> declaration
// Type 8 = a Comment node
if (node.nodeType !== 7 && node.nodeType !== 8) {
textContent = extractTextContent(node)
if (textContent.length > 0 && buf.length > 0) {
var previousTextContent = buf[buf.length - 1]
if (textContent[0] !== ' ' && previousTextContent.length > 0 && previousTextContent[previousTextContent.length - 1] !== ' ') {
buf.push(' ')
}
}
buf.push(textContent)
}
node = node.nextSibling
}
return buf.join('')
default:
return node.nodeValue
}
}
}())