-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdocument.go
144 lines (119 loc) · 2.65 KB
/
document.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
package goxtag
import (
"bytes"
"github.com/antchfx/htmlquery"
"golang.org/x/net/html"
)
const (
maxUint = ^uint(0)
maxInt = int(maxUint >> 1)
)
type Document struct {
Nodes []*html.Node
}
func NewDocumentWithNode(node *html.Node) *Document {
return &Document{
Nodes: []*html.Node{node},
}
}
func NewDocumentWithNodes(nodes []*html.Node) *Document {
return &Document{
Nodes: nodes,
}
}
func (doc *Document) Length() int {
return len(doc.Nodes)
}
func (doc *Document) IsEmpty() bool {
return doc.Nodes == nil || len(doc.Nodes) == 0
}
func (doc *Document) Html() (ret string, e error) {
// Since there is no .innerHtml, the HTML content must be re-created from
// the nodes using html.Render.
var buf bytes.Buffer
if len(doc.Nodes) > 0 {
for _, node := range doc.Nodes {
e = html.Render(&buf, node)
if e != nil {
return
}
}
ret = buf.String()
}
return
}
func (doc *Document) Text() string {
var buf bytes.Buffer
// Slightly optimized vs calling Each: no single selection object created
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.TextNode {
// Keep newlines and spaces, like jQuery
buf.WriteString(n.Data)
}
if n.FirstChild != nil {
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
}
for _, n := range doc.Nodes {
f(n)
}
return buf.String()
}
func (doc *Document) Attr(attrName string) (val string, exists bool) {
if len(doc.Nodes) == 0 {
return
}
return getAttributeValue(attrName, doc.Nodes[0])
}
func (doc *Document) Find(selector string) *Document {
return NewDocumentWithNodes(htmlquery.Find(doc.Nodes[0], selector))
}
func (doc *Document) FindOne(selector string) (*Document, error) {
node := htmlquery.FindOne(doc.Nodes[0], selector)
var nodes []*html.Node
if node != nil {
nodes = []*html.Node{node}
}
return NewDocumentWithNodes(nodes), nil
}
func (doc *Document) Eq(index int) *Document {
if index < 0 {
index += len(doc.Nodes)
}
if index >= len(doc.Nodes) || index < 0 {
return &Document{}
}
return doc.Slice(index, index+1)
}
func (doc *Document) Slice(start, end int) *Document {
if start < 0 {
start += len(doc.Nodes)
}
if end == maxInt {
end = len(doc.Nodes)
} else if end < 0 {
end += len(doc.Nodes)
}
return NewDocumentWithNodes(doc.Nodes[start:end])
}
func getAttributeValue(attrName string, n *html.Node) (val string, exists bool) {
if a := getAttributePtr(attrName, n); a != nil {
val = a.Val
exists = true
}
return
}
func getAttributePtr(attrName string, n *html.Node) *html.Attribute {
if n == nil {
return nil
}
for i, a := range n.Attr {
if a.Key == attrName {
return &n.Attr[i]
}
}
return nil
}