Games ~ Scraping Categories and Mechanics from BGG

Below is code to scrape the BGG site for information on categories and mechanics and to create corresponding fixtures. The output of running this script will be a file called mechanics_categories_fixtures.json. This file should be placed in /src/chigame/games/fixtures.

Relevant Links

Corresponding issue
Corresponding PR (forthcoming)

# For Jupyter Notebook use
# !pip install requests
# !pip install beautifulsoup4

import json
import requests
from bs4 import BeautifulSoup  # https://www.crummy.com/software/BeautifulSoup/bs4/doc/


# ============ FUNCTIONS ============


def fetch_and_parse(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    return soup


def extract_categories_or_mechanics(url):
    soup = fetch_and_parse(url)

    # Extracting the categories or mechanics
    categories_or_mechanics = {}
    for link in soup.find_all("a", href=True):
        if (
            "/boardgamecategory/" in link["href"]
            or "/boardgamemechanic/" in link["href"]
        ):
            # Creating dictionary with the category or mechanic name as key and the link as value
            categories_or_mechanics[link.text.strip()] = link["href"]

    return categories_or_mechanics


# Gets image and descriptions for a given mechanic or category
def extract_info(url):
    soup = fetch_and_parse(url)

    # Extracting the image source and description
    img_meta_tag = soup.find("meta", property="og:image")
    img_src = (
        img_meta_tag["content"]
        if img_meta_tag
        else "/static/images/no_picture_available.png"
    )

    description_meta_tag = soup.find("meta", {"name": "description"})
    description = (
        description_meta_tag["content"].strip() if description_meta_tag else ""
    )

    return img_src, description


def create_fixtures(base_url, path, model_name):
    count = 0
    index_url = f"{base_url}/{path}"
    items = extract_categories_or_mechanics(index_url)
    fixtures = []

    for name, relative_url in items.items():
        count += 1
        full_url = f"{base_url}{relative_url}"
        img_src, description = extract_info(full_url)
        fixture = {
            "model": model_name,
            "pk": count,
            "fields": {"name": name, "description": description, "image": img_src},
        }
        fixtures.append(fixture)

    return fixtures


# ============ EXECUTION ============


base_url = "https://boardgamegeek.com"

# Extract categories and mechanics
categories = extract_categories_or_mechanics(base_url + "/browse/boardgamecategory")
mechanics = extract_categories_or_mechanics(base_url + "/browse/boardgamemechanic")

# Print categories and mechanics
print("Categories:", categories)
print("Mechanics:", mechanics)

# Create fixtures for Mechanics
mechanics_fixtures = create_fixtures(
    base_url, "browse/boardgamemechanic", "games.mechanic"
)

# Create fixtures for Categories
categories_fixtures = create_fixtures(
    base_url, "browse/boardgamecategory", "games.category"
)

## Combine mechanics and categories fixtures
combined_fixtures = mechanics_fixtures + categories_fixtures

# Save to JSON file
with open("mechanics_categories_fixtures.json", "w") as f:
    json.dump(combined_fixtures, f, indent=4)

Home
Tournaments
Games
Matches
API
- User Stories
- JWT Documentation
Forums
Users & Friending
- Notification Model Documentation

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Games ~ Scraping Categories and Mechanics from BGG

Clone this wiki locally