-
Notifications
You must be signed in to change notification settings - Fork 40
/
Copy pathgithubsearch.ado
351 lines (312 loc) · 10.6 KB
/
githubsearch.ado
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
*cap prog drop githubsearch
program githubsearch
syntax [anything] , [language(str) save(str) in(str) all created(str) ///
pushed(str) perpage(numlist max=1) debug append replace quiet Number(numlist max=1) scoreless]
// defaults language is Stata
// --------------------------
if missing("`perpage'") local perpage 50
if `perpage' > 100 {
display as err "the maximum {bf:perpage} value is 100"
local perpage 100
}
if "`language'" == "all" {
local savelang `language'
local language //nothing!
}
else if missing("`language'") {
local savelang Stata
local language "+language:stata"
}
else {
local savelang `language'
local language "+language:`language'"
}
// date
if !missing("`created'") {
local created "+created:`created'"
}
if !missing("`pushed'") {
local pushed "+pushed:`pushed'"
}
// search domain
local inoriginal "`in'"
local in : subinstr local in " " "", all //remove spaces
if missing("`in'") {
local in "name,description"
local savein "name,description"
}
else if "`in'" == "all" {
local in "name,description,readme"
local savein all
}
else if "`in'" != "name" & "`in'" != "description" & "`in'" != "readme" & ///
"`in'" != "all" & "`in'" != "name,description" & "`in'" != ///
"description,name" & "`in'" != "name,description,readme" ///
& "`in'" != "description,name,readme" & !missing("`in'") {
di as err `"option in("`inoriginal'") is unacceptable"'
err 198
}
preserve
quietly clear
tempfile data
qui generate str address = ""
qui generate str name = ""
qui generate int installable = .
qui generate str language = ""
qui generate int star = .
qui generate int fork = .
qui generate str created = ""
qui generate str updated = ""
qui generate str pushed = ""
qui generate int kb = .
qui generate int watchers = .
qui generate str description = ""
qui generate str homepage = ""
*quietly copy "https://api.github.com/search/repositories?q=`anything'`language'+size:%3C1+in:description,readme,name&per_page=100" "sth.json", replace
*quietly copy "https://api.github.com/search/repositories?q=`anything'`language'+in:name,description,readme&per_page=100" "sth.json", replace
*di as err "{p}https://api.github.com/search/repositories?q=`anything'`language'+in:`in'&per_page=100"
if !missing("`debug'") {
di as err "{p}https://api.github.com/search/repositories?q=`anything'`language'`created'`pushed'+in:`in'&sort=stars&order=desc&per_page=`perpage'" _n
copy "https://api.github.com/search/repositories?q=`anything'`language'`created'`pushed'+in:`in'&sort=stars&order=desc&per_page=`perpage'" "base.json", replace
}
tempfile apifile tmp
tempname hitch knot
capture qui copy "https://api.github.com/search/repositories?q=`anything'`language'`created'`pushed'+in:`in'&sort=stars&order=desc&per_page=`perpage'" `apifile', replace
if _rc != 0 {
di as err "{p}the GitHub API is not responsive right now. Try again in " ///
"10 or 20 seconds. this can happen if you search GitHub very frequent..."
di as txt ""
if "`c(checksum)'" = "on" {
display as txt "this might be caused by your {help checksum} operation. " ///
"try again after turning checksum off"
}
exit
}
file open `hitch' using "`apifile'", read
qui file open `knot' using "`tmp'", write replace
file read `hitch' line
if !missing("`debug'") {
di as txt `"Line:`line'"' _n(3)
}
// if a description includes an accent, it can crash the program; typical Stata issue with text processing
local line : subinstr local line "'" "", all
// check the number of results
// --------------------------------
//display `"`macval(line)'"'
tokenize `"`macval(line)'"' , parse(",")
local 1 : display substr(`"`macval(1)'"',16,.)
local found `1'
// if the results are more than 100
// --------------------------------
if `1' > 100 {
local warning 1
di as txt "{p}(your search has yielded {bf:`1'} results. " ///
"GitHub API returns the top 100 only. " ///
"Narrow your search using {bf:language}, {bf:in}, {bf:created}, and {bf:pushed} options...)" _n
}
local line : subinstr local line "[" "", all
local line : subinstr local line "]" "", all
local line : subinstr local line "{" "", all
local line : subinstr local line "}" "", all
local n = 0
while r(eof) == 0 {
tokenize `"`macval(line)'"' , parse(",")
while !missing(`"`macval(1)'"') | !missing(`"`macval(2)'"') | !missing(`"`macval(3)'"') {
if !missing("`debug'") {
di as err `"1:`macval(1)'"'`"2:`macval(2)'"' `"3:`macval(3)'"' "4>>" `"4:`macval(4)'"' `"5:`macval(5)'"'
}
if `"`macval(1)'"' == `","' {
macro shift
}
else if `"`macval(1)'"' == "name" {
local 2 : di substr(`"`macval(2)'"', 2,.)
local l : di length(`"`macval(2)'"')
local 2 : di substr(`"`macval(2)'"', 2,`l'-2)
local n `++n'
quietly set obs `n'
quietly replace name = `"`macval(2)'"' in `n'
macro shift
}
else if `"`macval(1)'"' == "full_name" {
local 2 : di substr(`"`macval(2)'"', 2,.)
local l : di length(`"`macval(2)'"')
local 2 : di substr(`"`macval(2)'"', 2,`l'-2)
quietly replace address = `"`macval(2)'"' in `n'
// confirm that it is installable
// ------------------------------
capture quietly githubconfirm `2'
local cal : di mod(`n',10)
if `found' > 10 {
if "`cal'" == "0" & missing("`quiet'") | ///
"`n'" == "1" & missing("`quiet'") {
di as txt "`n'" _c
}
else {
di as txt "." _c
}
}
if "`r(installable)'" == "1" {
quietly replace installable = 1 in `n'
}
else {
quietly replace installable = 0 in `n'
}
macro shift
}
else if `"`macval(1)'"' == "description" {
local l : di length(`"`macval(2)'"')
if `"`macval(2)'"' != ":null" & `l' > 3 {
local 2 : di substr(`"`macval(2)'"', 2,.)
local l : di length(`"`macval(2)'"')
local 2 : di substr(`"`macval(2)'"', 2,`l'-2)
quietly replace description = `"`macval(2)'"' in `n'
}
else {
quietly replace description = "" in `n'
}
macro shift
}
else if `"`macval(1)'"' == "created_at" {
local 2 : di substr(`"`macval(2)'"', 2,.)
local 2 : subinstr local 2 "T" " at "
local 2 : subinstr local 2 "Z" ""
quietly replace created = `"`macval(2)'"' in `n'
macro shift
}
else if `"`macval(1)'"' == "updated_at" {
*local 2 : di substr(`"`macval(2)'"', 3,10)
local 2 : di substr(`"`macval(2)'"', 2,.)
local 2 : subinstr local 2 "T" " at "
local 2 : subinstr local 2 "Z" ""
quietly replace updated = `"`macval(2)'"' in `n'
macro shift
}
else if `"`macval(1)'"' == "pushed_at" {
local 2 : di substr(`"`macval(2)'"', 2,.)
local 2 : subinstr local 2 "T" " at "
local 2 : subinstr local 2 "Z" ""
quietly replace pushed = `"`macval(2)'"' in `n'
macro shift
}
else if `"`macval(1)'"' == "homepage" {
local l : di length(`"`macval(2)'"')
if `"`macval(2)'"' != ":null" & `l' > 3 {
local 2 : di substr(`"`macval(2)'"', 2,.)
local l : di length(`"`macval(2)'"')
local 2 : di substr(`"`macval(2)'"', 2,`l'-2)
quietly replace homepage = `"`macval(2)'"' in `n'
}
else {
quietly replace homepage = "" in `n'
}
macro shift
}
else if `"`macval(1)'"' == "size" {
local 2 : di substr(`"`macval(2)'"', 2,.)
quietly replace kb = `2' in `n'
macro shift
}
else if `"`macval(1)'"' == "language" {
local l : di length(`"`macval(2)'"')
if `"`macval(2)'"' != ":null" & `l' > 3 {
local 2 : di substr(`"`macval(2)'"', 2,.)
local l : di length(`"`macval(2)'"')
local 2 : di substr(`"`macval(2)'"', 2,`l'-2)
quietly replace language = `"`macval(2)'"' in `n'
}
else {
local 2 : di substr(`"`macval(2)'"', 2,.)
quietly replace language = "" in `n'
}
macro shift
}
else if `"`macval(1)'"' == "stargazers_count" {
local 2 : di substr(`"`macval(2)'"', 2,.)
quietly replace star = `2' in `n'
macro shift
}
else if `"`macval(1)'"' == "forks_count" {
local 2 : di substr(`"`macval(2)'"', 2,.)
quietly replace fork = `2' in `n'
macro shift
}
else if `"`macval(1)'"' == "watchers" {
local 2 : di substr(`"`macval(2)'"', 2,.)
quietly replace watchers = `2' in `n'
macro shift
}
else {
macro shift
}
if !missing("`debug'") {
di as txt `"::`macval(1)'"' _n(3)
}
}
file read `hitch' line
}
file close `hitch'
file close `knot'
// -----------------------------------------------------------------------
// Variable correction
// =======================================================================
qui generate double time_admitted = clock(created, "YMD#hms")
qui format time_admitted %tc
qui drop created
quietly rename time_admitted created
qui generate double time_admitted = clock(updated, "YMD#hms")
qui format time_admitted %tc
qui drop updated
quietly rename time_admitted updated
qui generate double time_admitted = clock(pushed, "YMD#hms")
qui format time_admitted %tc
qui drop pushed
quietly rename time_admitted pushed
// convert language to factor variable
*qui encode language, generate(type)
*qui drop language
*qui rename type language
//generate score
qui gen star2 = 1+star
qui gen fork2 = 1+fork*4
if missing("`scoreless'") {
quietly gen score = (star2 * fork2) - 1
quietly replace score = 0 if score < 0
}
qui drop star2
qui drop fork2
// sort the data based on installable and score
if missing("`scoreless'") {
quietly gsort - installable - score
}
// for very rare API issues
qui drop if address == ""
// -----------------------------------------------------------------------
// Save the results
// =======================================================================
if !missing("`save'") {
if missing("`append'") {
quietly save `save', `replace'
}
else {
capture confirm file `"`save'.dta"'
if _rc == 0 {
qui append using `"`save'.dta"'
capture duplicates drop address, force
qui save `"`save'.dta"', replace
}
else {
qui save `"`save'.dta"', replace
}
}
}
// -----------------------------------------------------------------------
// Drawing the output table
// =======================================================================
if missing("`quiet'") {
if !missing("`debug'") {
display as txt _n(5) "ANYTHING:`anything'" _n "in:`in'" _n "number:`number'"
}
githuboutput `anything', in("`in'") `all' number(`number') language(`savelang')
}
restore
end