-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcore_learning_algorithms.py
83 lines (50 loc) · 2 KB
/
core_learning_algorithms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# -*- coding: utf-8 -*-
"""Core learning algorithms.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1mf1jkEAK1BtyU-8ozwiho9zO_UMhvK_Q
### **Linear Regression Algorithm**
"""
import matplotlib.pyplot as plt
import numpy as np
"""Creating datapoints"""
x = [1,2,2.5,3,4,5]
y = [1,4,7,9,15,19]
"""Calling the plot function and creating line of best fit"""
plt.plot(x,y,'ro')
plt.axis([0,6,0,20])
plt.plot(np.unique(x), np.poly1d(np.polyfit(x,y,1))(np.unique(x)))
plt.show()
"""## **Setup**"""
!pip install -q sklearn
# Commented out IPython magic to ensure Python compatibility.
# %tensorflow_version 2.x
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output
from six.moves import urllib
import tensorflow.compat.v2.feature_column as fc
import tensorflow as tf
"""Load the dataset"""
dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')#training dataset
dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv') #test dataset
"""I am storing the survived column to a new variable for each dataset using the .pop() method
"""
y_train = dftrain.pop('survived')
y_eval = dfeval.pop('survived')
print(dftrain.loc[0], y_train.loc[0]) # prints the data located at the 0 index of each dataset
"""the .head() function prints the first five elements of the indicated dataset"""
dftrain.head()
""".describe() function runs statistical analysis of the dataset"""
dftrain.describe()
""".shape returns the number of rows by columns"""
dftrain.shape
"""### Correlation review
Returning a histogram of the ages
"""
dftrain.age.hist(bins = 20)
dftrain.sex.value_counts().plot(kind = 'barh')
dftrain['class'].value_counts().plot(kind = 'barh')
pd.concat([dftrain,y_train], axis=1).groupby('sex').survived.mean().plot(kind = 'barh').set_xlabel('Survival percentage')