-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
113 lines (92 loc) · 2.55 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
package main
import (
"fmt"
"net/http"
"io/ioutil"
"strings"
"strconv"
"regexp"
)
type Letter struct {
letter string
pageCount int
}
type Alphabet struct {
letters []Letter
}
func main() {
var TurkmenAlphabet = NewAlphabet()
(&TurkmenAlphabet).setPageCounts()
for _, l := range TurkmenAlphabet.letters {
for i := 1; i <= l.pageCount; i++ {
resp := queryByLetterAndPage(l.letter, i)
words := parseWords(resp)
for _, word := range words {
fmt.Println(word)
}
}
}
}
func NewAlphabet() *Alphabet {
var alphabet = [...]string{
"a", "b", "ç", "d", "e", "ä",
"f", "g", "h", "i", "j", "ž",
"k", "l", "m", "n", "ň", "o",
"ö", "p", "r", "s", "ş", "t",
"u", "ü", "w", "y", "ý",
}
TurkmenAlphabet := Alphabet{
letters: make([]Letter, len(alphabet)),
}
for i, l := range alphabet {
TurkmenAlphabet.letters[i] = Letter{l, 0}
}
return &TurkmenAlphabet
}
func (alph *Alphabet) setPageCounts() {
for i, l := range alph.letters {
alph.letters[i].pageCount =
parsePagination(l.letter, queryByLetter(l.letter))
}
}
func parsePagination(letter string, body []byte) int {
s := string(body)
pageCount := 0
numbers := [20]int{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,16,18,19,20}
for _, page := range numbers {
paginationUrl := "http://enedilim.com/sozluk/harp/" + letter + "/" + strconv.Itoa(page)
if strings.Contains(s, paginationUrl) {
pageCount = page
}
}
return pageCount
}
func parseWords(body []byte) []string {
s := string(body)
r, _ := regexp.Compile("http://enedilim.com/sozluk/soz/[a-zA-ZçäžşýüöÇÄŽŞÝÜÖňŇ-]+")
results := r.FindAllString(s, -1)
for i, result := range results {
results[i] = strings.Replace(result, "http://enedilim.com/sozluk/soz/", "", 1)
}
return results
}
func queryByLetter(letter string) []byte {
baseUrl := "http://enedilim.com/sozluk/harp"
return query(baseUrl + "/" + letter)
}
func queryByLetterAndPage(letter string, page int) []byte {
baseUrl := "http://enedilim.com/sozluk/harp/"
return query(baseUrl + "/" + letter + "/" + strconv.Itoa(page))
}
func query(url string) []byte {
resp, err := http.Get(url)
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil
}
if err != nil {
fmt.Println(err)
}
body, _ := ioutil.ReadAll(resp.Body)
return body
}