-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtasks.py
58 lines (43 loc) · 1.67 KB
/
tasks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# ================
# File contains tasks to be scheduled on Heroku Server
# ================
# SCHEDULED TASKS:
# ---------------
# This file is scheduled to run daily on the Heroku server
# It will first check the day of the week and ONLY RUN it's tasks on a MONDAY
# TASK 1: Call the spider to crawl - store the results in the database
# TASK 2: Check for VisitedURLs older than 100 days and delete them from the database
# TASK 3: Add new weekly agree / disagree scores for every player & club
# =============
# IMPORTS
# =============
import datetime
# Local imports
from matchreportscraper import crawl, populate_weekly_agreedisagree_scores
from main.models import VisitedURL
# =======================================
# CHECK DAY OF THE WEEK (MONDAY = 0)
# =======================================
# Crawler temporarily paused
if datetime.datetime.today().weekday() == 0:
# =============
# TASK 1 - crawl
# =============
crawl.crawl()
# =============
# TASK 2 - Delete old URLs
# =============
# Code to access old DB records
# Adapted from Stack Overflow answer by Jonathan on May 13 '14
# https://stackoverflow.com/questions/23622501/datetimefield-get-all-objects-older-than-48-hours?lq=1
today = datetime.datetime.now().date()
date_threshold = today - datetime.timedelta(days=100)
# End of referenced code
VisitedURL.objects.filter(date_visited__lte=date_threshold).delete()
# =============
# TASK 3 - Add new Agree Disagree Scores for each club & player in the DB
# =============
# Players
populate_weekly_agreedisagree_scores.add_weekly_agree_disagree_scores_players()
# Clubs
populate_weekly_agreedisagree_scores.add_weekly_agree_disagree_scores_clubs()