-
Notifications
You must be signed in to change notification settings - Fork 91
/
Copy pathAnalysis4.py
83 lines (52 loc) Β· 1.73 KB
/
Analysis4.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# coding: utf-8
# In[1]:
# get_ipython().magic('matplotlib inline')
# In[2]:
# importing required libraries
import os
import subprocess
import stat
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
sns.set(style="white")
# In[3]:
# absolute path till parent folder
abs_path = os.getcwd()
path_array = abs_path.split("/")
path_array = path_array[:len(path_array)-1]
homefolder_path = ""
for i in path_array[1:]:
homefolder_path = homefolder_path + "/" + i
# In[4]:
# path to clean data
clean_data_path = homefolder_path + "/CleanData/CleanedDataSet/cleaned_autos.csv"
# reading csv into raw dataframe
df = pd.read_csv(clean_data_path,encoding="latin-1")
# In[7]:
trial = pd.DataFrame()
for b in list(df["brand"].unique()):
for v in list(df["vehicleType"].unique()):
z = df[(df["brand"] == b) & (df["vehicleType"] == v)]["price"].mean()
trial = trial.append(pd.DataFrame({'brand':b , 'vehicleType':v , 'avgPrice':z}, index=[0]))
trial = trial.reset_index()
del trial["index"]
trial["avgPrice"].fillna(0,inplace=True)
trial["avgPrice"].isnull().value_counts()
trial["avgPrice"] = trial["avgPrice"].astype(int)
trial.head(5)
# ## Average price of a vehicle by brand as well as vehicle type
# In[11]:
# HeatMap tp show average prices of vehicles by brand and type together
tri = trial.pivot("brand","vehicleType", "avgPrice")
fig, ax = plt.subplots(figsize=(15,20))
sns.heatmap(tri,linewidths=1,cmap="YlGnBu",annot=True, ax=ax, fmt="d")
ax.set_title("Average price of vehicles by vehicle type and brand",fontdict={'size':18})
plt.show()
# In[12]:
fig.savefig((abs_path + "/Plots/heatmap-price-brand-vehicleType.png"))
# In[13]:
df.head(5)
# In[ ]: