-
Notifications
You must be signed in to change notification settings - Fork 0
/
MD_Image_Scraper.py
32 lines (26 loc) · 1.21 KB
/
MD_Image_Scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import os
import re
import requests
# Specify the directory containing your notes
notes_dir = os.path.join("C:", os.sep, "Users", "G", "CODE", "CoPilot", "12_Machine_Learning")
# Create a new directory to save downloaded images
download_dir = os.path.join(notes_dir, "downloaded_images")
if not os.path.exists(download_dir):
os.mkdir(download_dir)
# Define the regular expression to match imgur links
imgur_regex = r"!\[\]\((https?://i\.imgur\.com/[^\)]+)\)"
# Loop through all files in the notes directory
for filename in os.listdir(notes_dir):
# Check if the file is a markdown file
if filename.endswith(".md"):
# Read the contents of the file
with open(os.path.join(notes_dir, filename), "r", encoding="utf-8") as f:
contents = f.read()
# Find all imgur links using regular expressions
imgur_links = re.findall(imgur_regex, contents)
# Loop through all imgur links and download the images
for imgur_link in imgur_links:
img_data = requests.get(imgur_link).content
img_filename = os.path.basename(imgur_link)
with open(os.path.join(download_dir, img_filename), "wb") as f:
f.write(img_data)