core_learning_algorithms.py

# -*- coding: utf-8 -*-
"""Core learning algorithms.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1mf1jkEAK1BtyU-8ozwiho9zO_UMhvK_Q

### **Linear Regression Algorithm**
"""

import matplotlib.pyplot as plt
import numpy as np

"""Creating datapoints"""

x = [1,2,2.5,3,4,5]
y = [1,4,7,9,15,19]

"""Calling the plot function and creating line of best fit"""

plt.plot(x,y,'ro')
plt.axis([0,6,0,20])
plt.plot(np.unique(x), np.poly1d(np.polyfit(x,y,1))(np.unique(x)))
plt.show()

"""## **Setup**"""

!pip install -q sklearn

# Commented out IPython magic to ensure Python compatibility.
# %tensorflow_version 2.x

from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output
from six.moves import urllib

import tensorflow.compat.v2.feature_column as fc

import tensorflow as tf

"""Load the dataset"""

dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')#training dataset
dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv') #test dataset

"""I am storing the survived column to a new variable for each dataset using the .pop() method

 
"""

y_train = dftrain.pop('survived')
y_eval = dfeval.pop('survived')
print(dftrain.loc[0], y_train.loc[0]) # prints the data located at the 0 index of each dataset

"""the .head() function prints the first five elements of the indicated dataset"""

dftrain.head()

""".describe() function runs statistical analysis of the dataset"""

dftrain.describe()

""".shape returns the number of rows by columns"""

dftrain.shape

"""### Correlation review

Returning a histogram of the ages
"""

dftrain.age.hist(bins = 20)

dftrain.sex.value_counts().plot(kind = 'barh')

dftrain['class'].value_counts().plot(kind = 'barh')

pd.concat([dftrain,y_train], axis=1).groupby('sex').survived.mean().plot(kind = 'barh').set_xlabel('Survival percentage')