-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDownload_Dataset.py
46 lines (38 loc) · 1.54 KB
/
Download_Dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
## Downloading and presenting an online data using urllib
## To run script on Jupyter uncomment lines as mentioned
import os
import tarfile
import ssl
#%pylab #Jupyter
#%matplotlib inline #Jupyter
import matplotlib.pyplot as plt
import pandas as pd
if (not os.environ.get('PYTHONHTTPSVERIFY', '') and
getattr(ssl, '_create_unverified_context', None)):
ssl._create_default_https_context = ssl._create_unverified_context
from six.moves import urllib
download_root = "https://github.com/ageron/handson-ml/raw/master/"
housing_path = "datasets/housing"
housing_url = download_root+housing_path+"/housing.tgz"
def fetch_housing_data(house_url = housing_url, house_path = housing_path):
if not os.path.isdir(house_path):
os.makedirs(house_path)
tgz_path = os.path.join(house_path, "housing.tgz")
urllib.request.urlretrieve(house_url, tgz_path)
housing_tgz = tarfile.open(tgz_path)
housing_tgz.extractall(path=house_path)
housing_tgz.close()
fetch_housing_data()
def load_housing_data(housing_path=housing_path):
csv_path = os.path.join(housing_path, "housing.csv")
return pd.read_csv(csv_path)
housing = load_housing_data()
#print (housing.head())
#print (housing.info())
#print (housing["ocean_proximity"].value_counts())
#print (housing.describe())
#housing.hist(housing['longitude'],bins=50)
plt.hist(housing['total_rooms'], bins=50)
#plt.hist(x, bins=50)
#plt.hist(x)
plt.show() #Not required by Jupyter