-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy path20casedenco.py
161 lines (99 loc) · 3.85 KB
/
20casedenco.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# -*- coding: utf-8 -*-
import pandas as pd
pd.set_option('display.max_columns',15)
pd.set_option('display.max_rows',100)
#others - max_rows, width, precision, height, date_dayfirst, date_yearfirst
import numpy as np
#read data
df = pd.read_csv('20denco.csv')
df.columns
df['region'].value_counts()
df['region'].value_counts().plot(kind='bar')
df1 = df[df['region']=='02-Central']
df1
df2 = df[df['region']=='01-East']
df2
df1
df2
df.head(10)
df.tail(10)
'df.columns
len(df)
df.dtypes
df['partnum'] =(df['partnum']).astype('str')
df.dtypes
df.describe()
df.shape
'''
df['region'].unique()
df['region'] = df['region'].astype('category')
df['region'].unique()
'''
df.region.value_counts()
df.region.value_counts().plot(kind='bar')
#do summary actions
#Who are the most loyal Customers -
#Improve repeated sales, Target customers with low sales Volumes
#count by customers, sort, pick top 5
#numpy method of counts
df.custname.value_counts()
df.custname.value_counts().sort_values(ascending=False)
df.custname.value_counts().sort_values(ascending=False).head(10)
df.custname.value_counts().sort_values(ascending=False).head(5).plot(kind='bar')
#pandas way
df.groupby('custname').size()
df.groupby('custname').size().sort_values(ascending=False)
#pickup first 5
df.groupby('custname').size().sort_values(ascending=False).head(5)
#these are most loyal customers
df.groupby('custname').size().sort_values(ascending=False).head(5).plot(kind='bar')
#extra- ??
'''
df.groupby(['custname'])['margin'].nlargest(3)
df.sort_values(['revenue'], ascending= True).groupby( 'region' ).mean()
'''
#Which customers contribute the most to their revenue - How do I
#retain these customers & target incentives
#find total revenue of each customer, sort in descending.
df.groupby('custname').aggregate({'revenue':np.sum})
df.groupby('custname').aggregate({'revenue':np.sum}).sort_values(by='revenue', ascending=False)
df.groupby('custname').aggregate({'revenue':np.sum}).sort_values(by='revenue', ascending=False).head(5)
#these are top 5 customers who gave revnue to Denco
#extras ???
'''
df.groupby('custname').aggregate({'revenue':[np.sum, 'size'] }).sort_values(by='revenue', ascending=False) #error
df[['revenue','custname']].groupby('custname').agg['size']
.sort_values(by='revenue', ascending=False) #error
'''
#What part numbers bring in to significant portion of revenue -
#Maximise revenue from high value parts
#grouby partnum, find value of revenue, sell these more
'''
df[['partnum','revenue']].sort_values(by='revenue', ascending=False)
df[['partnum','revenue']].sort_values(by='revenue', ascending=False).head(5)
'''
#these are top parts which give max revenue
#if total sales has to be considered
df[['partnum','revenue']].groupby('partnum').sum()
df[['partnum','revenue']].groupby('partnum').sum().sort_values(by='revenue', ascending=False).head(5)
#this total revenue value giving items
df.dtypes
#What parts have the highest profit margin - What parts are driving profits &
#what parts need to build further
#check for margin value, their individual margin and total sales margin like revenue
df[['partnum','margin']].sort_values(by='margin', ascending=False)
df[['partnum','margin']].sort_values(by='margin', ascending=False).head(5)
df[['partnum','margin']].sort_values(by='margin', ascending=False).tail(5)
#these are top parts which give max margins
#if total sales has to be considered
pd.set_option('display.max_columns',1000)
df[['partnum','margin']].groupby('partnum').sum()
df1= df[['partnum','margin']].groupby('partnum').sum().sort_values(by='margin', ascending=False).head(5)
df1
df1.to_csv('aa.csv')
#this total revenue value giving items
#Most sold items
df.groupby('partnum').size().sort_values(ascending=False).head(5)
#which regions gave max revenue
df2 = df[['revenue', 'region']].groupby( 'region').sum().sort_values(by='revenue', ascending=False).head(5)
df2