-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfetch_notes_and_populate_db.rb
53 lines (45 loc) · 1.38 KB
/
fetch_notes_and_populate_db.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
require 'rubygems'
require 'sqlite3'
require 'htmlentities'
require 'date'
require 'json'
require 'pp'
# From http://stackoverflow.com/questions/279769/convert-to-from-datetime-and-time-in-ruby.
class Date
def to_gm_time
to_time(new_offset, :gm)
end
def to_local_time
to_time(new_offset(DateTime.now.offset-offset), :local)
end
private
def to_time(dest, method)
#Convert a fraction of a day to a number of microseconds
usec = (dest.sec_fraction * 60 * 60 * 24 * (10**6)).to_i
Time.send(method, dest.year, dest.month, dest.day, dest.hour, dest.min,
dest.sec, usec)
end
end
coder = HTMLEntities.new
def get_posts(start, num)
raw = `/usr/bin/curl http://jsomers.tumblr.com/api/read/json -d num=#{num} -d start=#{start} -d debug=1 -s`
return JSON.parse(raw.gsub("var tumblr_api_read = ", "").gsub("}]};", "}]}"))
end
posts = []
total = get_posts(0, 1)["posts-total"]
i = 0
while i <= total.to_i
batch = get_posts(i, 50)
batch["posts"].each do |post|
posts << [post["date"], post["url"], coder.decode(post["regular-body"])]
end
i += 50
puts "Fetched #{i} notes total."
end
db = SQLite3::Database.new( "notes.db" )
posts.each do |post|
datetime = DateTime.parse(post[0]).to_time.to_i
permalink = post[1]
content = post[2]
db.execute( "insert into notes(content, permalink, created_at) values ( ?, ?, ? )", content, permalink, datetime)
end