diff --git a/.ipynb_checkpoints/scrape-checkpoint.ipynb b/.ipynb_checkpoints/scrape-checkpoint.ipynb
new file mode 100644
index 0000000..fea8737
--- /dev/null
+++ b/.ipynb_checkpoints/scrape-checkpoint.ipynb
@@ -0,0 +1,163 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def retrieve_from_web(url, user_agent, fname):\n",
+ " request = urllib.request.Request(url, headers = {'User-Agent': user_agent})\n",
+ " response = urllib.request.urlopen(request)\n",
+ " html = response.read()\n",
+ " fname = '/home/ashutosh/Desktop/WebCrawler/HTML/' + fname\n",
+ " fp = open(fname, 'wb')\n",
+ " fp.write(html)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def read_html():\n",
+ " fp = open('/home/ashutosh/Desktop/WebCrawler/HTML/medium_html', 'r')\n",
+ " buff = fp.read()\n",
+ " return buff"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "#from urllib.request import urlopen\n",
+ "import urllib.request\n",
+ "from bs4 import BeautifulSoup\n",
+ "user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36'\n",
+ "url = 'https://medium.freecodecamp.org/'\n",
+ "#retrieve_from_web(url, user_agent, 'medium_html')\n",
+ "buff = read_html()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "
\n",
+ "The Mobile App Launch Checklist — How to Ship Apps Like a Boss \n",
+ "How To Master Async/Await With This Real World Example \n",
+ "Here are some super secret VS Code hacks to boost your productivity \n",
+ "Removing JavaScript’s “this” keyword makes it a better language. Here’s why. \n",
+ "A chaotic mind leads to chaotic code \n",
+ "I know nothing, but it is okay \n",
+ "Which Programming Language Should You Learn Next? \n",
+ "How to create a Discord bot under 15 minutes \n",
+ "How to go from scratch to Create-React-App on Windows \n",
+ "How I built an async form validation library in ~100 lines of code with React Hooks \n",
+ "Introducing ABS, a programming language for shell scripting \n",
+ "How to write a better CV— the Web Developer edition \n",
+ "The React Handbook \n",
+ "Simple site hosting with Amazon S3 and HTTPS \n",
+ "How to Host a Static Website with S3, CloudFront and Route53 \n",
+ "How to Publish An Application In The Play Store \n",
+ "The Strategy Pattern explained using Java \n",
+ "How to calculate Binary Tree height with the recursive method \n",
+ "I landed an internship at Facebook. Here are some tips I learned. \n",
+ "Essential Gems for Rails Applications \n",
+ "How to secure and manage secrets using Google Cloud KMS \n",
+ "How to Pass Oracle’s Java Certifications — a Practical Guide for Developers \n",
+ "Master the art of looping in JavaScript with these incredible tricks \n",
+ "The art of asking questions \n",
+ "The Definitive Guide to Contributing to Open Source \n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "5748"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import time\n",
+ "soup = BeautifulSoup(buff, \"html.parser\")\n",
+ "#print(soup.prettify())\n",
+ "all_news = soup.find_all('a')\n",
+ "#print(all_news[0])\n",
+ "#print(type(all_news))\n",
+ "#html_links = \"\"\n",
+ "html_links =\"\"\n",
+ "for news in all_news:\n",
+ " head = news.find('h3')\n",
+ " if head:\n",
+ " #Visit our HTML tutorial \n",
+ " lnks = \"{1} \".format(news.get('href'), head.text)\n",
+ " html_links = html_links + \"\\n\" + lnks\n",
+ " #print((news.get('href')))\n",
+ " #print(type(head))\n",
+ " #print(head.attrs)\n",
+ " #print(head.text)\n",
+ "print(html_links)\n",
+ "html_links = html_links + \" \"\n",
+ "fname = '/home/ashutosh/Desktop/WebCrawler/result/'+ str(time.strftime(\"%y-%m%-d\")) + \".html\"\n",
+ "fp = open(fname, 'w')\n",
+ "fp.write(html_links)\n",
+ "#print(type(par))\n",
+ "#print(par)\n",
+ "#print((all_news[0].parent.name))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.5.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/.ipynb_checkpoints/test-checkpoint.ipynb b/.ipynb_checkpoints/test-checkpoint.ipynb
new file mode 100644
index 0000000..2fd6442
--- /dev/null
+++ b/.ipynb_checkpoints/test-checkpoint.ipynb
@@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/.ipynb_checkpoints/web_log-checkpoint.ipynb b/.ipynb_checkpoints/web_log-checkpoint.ipynb
new file mode 100644
index 0000000..8c89322
--- /dev/null
+++ b/.ipynb_checkpoints/web_log-checkpoint.ipynb
@@ -0,0 +1,62 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import logging\n",
+ "#for handler in logging.root.handlers[:]:\n",
+ "# logging.root.removeHandler(handler)\n",
+ "\n",
+ "logging.basicConfig(filename = \"wb.log\", format = '%(asctime)s-%(levelname)s - %(message)s', level=logging.INFO, filemode = 'w')\n",
+ "log = logging.getLogger(__name__)\n",
+ "#log.setLevel(20)\n",
+ "log.info(\"logging outputr\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.5.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/HTML/b.txt b/HTML/b.txt
new file mode 100644
index 0000000..12a6eac
--- /dev/null
+++ b/HTML/b.txt
@@ -0,0 +1 @@
+sjsjj
\ No newline at end of file
diff --git a/HTML/medium_html b/HTML/medium_html
new file mode 100644
index 0000000..dd8d5e6
--- /dev/null
+++ b/HTML/medium_html
@@ -0,0 +1,237 @@
+freeCodeCamp.org
Join a community of millions of people who are learning new skills together, building their portfolios, and getting developer jobs.
\ No newline at end of file
diff --git a/app.log b/app.log
new file mode 100644
index 0000000..97dcf1f
--- /dev/null
+++ b/app.log
@@ -0,0 +1 @@
+root - INFO - s
diff --git a/result/19-0116.html b/result/19-0116.html
new file mode 100644
index 0000000..7c70308
--- /dev/null
+++ b/result/19-0116.html
@@ -0,0 +1,26 @@
+
+The Mobile App Launch Checklist — How to Ship Apps Like a Boss
+How To Master Async/Await With This Real World Example
+Here are some super secret VS Code hacks to boost your productivity
+Removing JavaScript’s “this” keyword makes it a better language. Here’s why.
+A chaotic mind leads to chaotic code
+I know nothing, but it is okay
+Which Programming Language Should You Learn Next?
+How to create a Discord bot under 15 minutes
+How to go from scratch to Create-React-App on Windows
+How I built an async form validation library in ~100 lines of code with React Hooks
+Introducing ABS, a programming language for shell scripting
+How to write a better CV— the Web Developer edition
+The React Handbook
+Simple site hosting with Amazon S3 and HTTPS
+How to Host a Static Website with S3, CloudFront and Route53
+How to Publish An Application In The Play Store
+The Strategy Pattern explained using Java
+How to calculate Binary Tree height with the recursive method
+I landed an internship at Facebook. Here are some tips I learned.
+Essential Gems for Rails Applications
+How to secure and manage secrets using Google Cloud KMS
+How to Pass Oracle’s Java Certifications — a Practical Guide for Developers
+Master the art of looping in JavaScript with these incredible tricks
+The art of asking questions
+The Definitive Guide to Contributing to Open Source
diff --git a/scrape.ipynb b/scrape.ipynb
new file mode 100644
index 0000000..1e622b4
--- /dev/null
+++ b/scrape.ipynb
@@ -0,0 +1,163 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def retrieve_from_web(url, user_agent, fname):\n",
+ " request = urllib.request.Request(url, headers = {'User-Agent': user_agent})\n",
+ " response = urllib.request.urlopen(request)\n",
+ " html = response.read()\n",
+ " fname = '/home/ashutosh/Desktop/WebCrawler/HTML/' + fname\n",
+ " fp = open(fname, 'wb')\n",
+ " fp.write(html)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def read_html():\n",
+ " fp = open('/home/ashutosh/Desktop/WebCrawler/HTML/medium_html', 'r')\n",
+ " buff = fp.read()\n",
+ " return buff"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "#from urllib.request import urlopen\n",
+ "import urllib.request\n",
+ "from bs4 import BeautifulSoup\n",
+ "user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36'\n",
+ "url = 'https://medium.freecodecamp.org/'\n",
+ "#retrieve_from_web(url, user_agent, 'medium_html')\n",
+ "buff = read_html()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "The Mobile App Launch Checklist — How to Ship Apps Like a Boss \n",
+ "How To Master Async/Await With This Real World Example \n",
+ "Here are some super secret VS Code hacks to boost your productivity \n",
+ "Removing JavaScript’s “this” keyword makes it a better language. Here’s why. \n",
+ "A chaotic mind leads to chaotic code \n",
+ "I know nothing, but it is okay \n",
+ "Which Programming Language Should You Learn Next? \n",
+ "How to create a Discord bot under 15 minutes \n",
+ "How to go from scratch to Create-React-App on Windows \n",
+ "How I built an async form validation library in ~100 lines of code with React Hooks \n",
+ "Introducing ABS, a programming language for shell scripting \n",
+ "How to write a better CV— the Web Developer edition \n",
+ "The React Handbook \n",
+ "Simple site hosting with Amazon S3 and HTTPS \n",
+ "How to Host a Static Website with S3, CloudFront and Route53 \n",
+ "How to Publish An Application In The Play Store \n",
+ "The Strategy Pattern explained using Java \n",
+ "How to calculate Binary Tree height with the recursive method \n",
+ "I landed an internship at Facebook. Here are some tips I learned. \n",
+ "Essential Gems for Rails Applications \n",
+ "How to secure and manage secrets using Google Cloud KMS \n",
+ "How to Pass Oracle’s Java Certifications — a Practical Guide for Developers \n",
+ "Master the art of looping in JavaScript with these incredible tricks \n",
+ "The art of asking questions \n",
+ "The Definitive Guide to Contributing to Open Source \n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "5748"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import time\n",
+ "soup = BeautifulSoup(buff, \"html.parser\")\n",
+ "#print(soup.prettify())\n",
+ "all_news = soup.find_all('a')\n",
+ "#print(all_news[0])\n",
+ "#print(type(all_news))\n",
+ "#html_links = \"\"\n",
+ "html_links =\"\"\n",
+ "for news in all_news:\n",
+ " head = news.find('h3')\n",
+ " if head:\n",
+ " #Visit our HTML tutorial \n",
+ " lnks = \"{1} \".format(news.get('href'), head.text)\n",
+ " html_links = html_links + \"\\n\" + lnks\n",
+ " #print((news.get('href')))\n",
+ " #print(type(head))\n",
+ " #print(head.attrs)\n",
+ " #print(head.text)\n",
+ "print(html_links)\n",
+ "html_links = html_links + \" \"\n",
+ "fname = '/home/ashutosh/Desktop/WebCrawler/result/'+ str(time.strftime(\"%y-%m%-d\")) + \".html\"\n",
+ "fp = open(fname, 'w')\n",
+ "fp.write(html_links)\n",
+ "#print(type(par))\n",
+ "#print(par)\n",
+ "#print((all_news[0].parent.name))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.5.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/test.ipynb b/test.ipynb
new file mode 100644
index 0000000..84f6b26
--- /dev/null
+++ b/test.ipynb
@@ -0,0 +1,292 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "freeCodeCamp.org
Join a community of millions of people who are learning new skills together, building their portfolios, and getting developer jobs.
\n"
+ ]
+ }
+ ],
+ "source": [
+ "\n",
+ "a = '/home/ashutosh/Desktop/WebCrawler/HTML/' + 'b.txt'\n",
+ "fp = open(a, 'w')\n",
+ "fp.write(\"sjsjj\")\n",
+ "\n",
+ "fp = open('/home/ashutosh/Desktop/WebCrawler/HTML/medium_html', 'r')\n",
+ "buff = fp.read()\n",
+ "print(buff)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.5.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/wb.log b/wb.log
new file mode 100644
index 0000000..607816c
--- /dev/null
+++ b/wb.log
@@ -0,0 +1 @@
+2019-01-05 15:36:42,211 - INFO logging outputr
diff --git a/web_log.ipynb b/web_log.ipynb
new file mode 100644
index 0000000..ca7dda2
--- /dev/null
+++ b/web_log.ipynb
@@ -0,0 +1,73 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import logging\n",
+ "#for handler in logging.root.handlers[:]:\n",
+ "# logging.root.removeHandler(handler)\n",
+ "\n",
+ "logging.basicConfig(filename = \"wb.log\", format = '%(asctime)s-%(levelname)s - %(message)s', level=logging.INFO, filemode = 'w')\n",
+ "log = logging.getLogger(__name__)\n",
+ "#log.setLevel(20)\n",
+ "log.info(\"logging outputr\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "19-01-16\n"
+ ]
+ }
+ ],
+ "source": [
+ "import time\n",
+ "print(time.strftime(\"%y-%m-%d\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.5.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}