Skip to content

resolved chromedriver issue #22

resolved chromedriver issue

resolved chromedriver issue #22

Workflow file for this run

name: Run Scraper Daily
# Set the schedule to run every 24 hours
# on:
# schedule:
# - cron: "0 0 * * *" # This runs at midnight UTC every day
#testing
# Set the schedule to run every 5 minutes
on:
push:
branches:
- scraper-tecq
schedule:
- cron: "*/5 * * * *" # This runs every 5 minutes
workflow_dispatch: # This allows you to run the workflow manually
jobs:
run-scraper:
runs-on: ubuntu-latest
steps:
- name: Check out repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.13' # Use your required Python version
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Install Chrome 94
run: |
# Remove any existing versions of Chrome
sudo apt-get remove -y google-chrome-stable || true
# Download Chrome 94 .deb package
wget https://dl.google.com/linux/chrome/deb/pool/main/g/google-chrome-stable/google-chrome-stable_94.0.4606.81-1_amd64.deb -O /tmp/google-chrome-stable.deb
# Install Chrome 94
sudo apt-get update
sudo apt-get install -y /tmp/google-chrome-stable.deb
# Verify installation
google-chrome --version
- name: Install ChromeDriver
run: |
wget https://chromedriver.storage.googleapis.com/94.0.4606.61/chromedriver_linux64.zip
unzip chromedriver_linux64.zip
sudo mv chromedriver /usr/local/bin/
sudo chmod +x /usr/local/bin/chromedriver
- name: Run Scraper
env:
CHROME_BIN: "/usr/bin/google-chrome" # Optional, in case your script needs the Chrome path
CHROMEDRIVER_PATH: "/usr/local/bin/chromedriver"
run: |
python main.py
- name: Save Excel Output
# Create a file name with the date and time
run: |
timestamp=$(date +"%Y-%m-%d_%H-%M-%S")
mv output.xlsx "output_$timestamp.xlsx"
mv "output_$timestamp.xlsx" results/
- name: Commit and Push the Excel file
run: |
git config --local user.email "nn36@rice.edu"
git config --local user.name "GitHub Actions"
git add results/output_*.xlsx
git commit -m "Add data for $timestamp"
git push --force
env:
# GitHub Token for permissions to push
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# make sure to push to scraper-tecq branch