Skip to content

Commit

Permalink
feat: labelly encoded non-numeric data columns
Browse files Browse the repository at this point in the history
  • Loading branch information
xTwo56 committed Apr 11, 2024
1 parent 44163a3 commit b9ecbe1
Showing 1 changed file with 59 additions and 60 deletions.
119 changes: 59 additions & 60 deletions Accommodating_Insights.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1263,7 +1263,7 @@
{
"data": {
"text/plain": [
"[<matplotlib.lines.Line2D at 0x7803cdf3bd60>]"
"[<matplotlib.lines.Line2D at 0x75933aa72620>]"
]
},
"execution_count": 21,
Expand Down Expand Up @@ -1371,7 +1371,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_64963/535378691.py:2: FutureWarning: \n",
"/tmp/ipykernel_42243/535378691.py:2: FutureWarning: \n",
"\n",
"`shade` is now deprecated in favor of `fill`; setting `fill=True`.\n",
"This will become an error in seaborn v0.14.0; please update your code.\n",
Expand All @@ -1382,7 +1382,7 @@
{
"data": {
"text/plain": [
"[<matplotlib.lines.Line2D at 0x7803cded2c50>]"
"[<matplotlib.lines.Line2D at 0x75933a485690>]"
]
},
"execution_count": 26,
Expand Down Expand Up @@ -1719,7 +1719,7 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 33,
"id": "6b63d81a",
"metadata": {},
"outputs": [
Expand All @@ -1729,7 +1729,7 @@
"Text(0.5, 1.0, 'Reviews per month v/s Price')"
]
},
"execution_count": 35,
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
},
Expand Down Expand Up @@ -1757,7 +1757,7 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 34,
"id": "68e8bfea",
"metadata": {},
"outputs": [
Expand All @@ -1767,7 +1767,7 @@
"numpy.float64"
]
},
"execution_count": 36,
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -1780,7 +1780,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 35,
"id": "7730aae1",
"metadata": {},
"outputs": [],
Expand All @@ -1792,7 +1792,7 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 36,
"id": "0e19d92d",
"metadata": {},
"outputs": [
Expand All @@ -1802,7 +1802,7 @@
"Text(0.5, 1.0, 'Reviews per month v/s Price (after)')"
]
},
"execution_count": 38,
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
},
Expand Down Expand Up @@ -1851,7 +1851,7 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 37,
"id": "5a8a3389",
"metadata": {},
"outputs": [
Expand All @@ -1860,24 +1860,24 @@
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 48895 entries, 0 to 48894\n",
"Index: 48884 entries, 0 to 48894\n",
"Data columns (total 12 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 neighbourhood_group 48895 non-null object \n",
" 1 neighbourhood 48895 non-null object \n",
" 2 latitude 48895 non-null float64\n",
" 3 longitude 48895 non-null float64\n",
" 4 room_type 48895 non-null object \n",
" 5 price 48895 non-null int64 \n",
" 6 minimum_nights 48895 non-null int64 \n",
" 7 number_of_reviews 48895 non-null int64 \n",
" 8 reviews_per_month 48895 non-null float64\n",
" 9 calculated_host_listings_count 48895 non-null int64 \n",
" 10 availability_365 48895 non-null int64 \n",
" 11 price_log 48895 non-null float64\n",
" 0 neighbourhood_group 48884 non-null object \n",
" 1 neighbourhood 48884 non-null object \n",
" 2 latitude 48884 non-null float64\n",
" 3 longitude 48884 non-null float64\n",
" 4 room_type 48884 non-null object \n",
" 5 price 48884 non-null int64 \n",
" 6 minimum_nights 48884 non-null int64 \n",
" 7 number_of_reviews 48884 non-null int64 \n",
" 8 reviews_per_month 38833 non-null float64\n",
" 9 calculated_host_listings_count 48884 non-null int64 \n",
" 10 availability_365 48884 non-null int64 \n",
" 11 price_log 48884 non-null float64\n",
"dtypes: float64(4), int64(5), object(3)\n",
"memory usage: 4.5+ MB\n"
"memory usage: 4.8+ MB\n"
]
}
],
Expand Down Expand Up @@ -1911,7 +1911,7 @@
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 38,
"id": "3dd14f34",
"metadata": {},
"outputs": [
Expand All @@ -1937,7 +1937,7 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": 39,
"id": "9dc69ff4",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -2179,10 +2179,20 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 40,
"id": "31e5a104",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Original Value: Entire home/apt | Code: 0\n",
"Original Value: Private room | Code: 1\n",
"Original Value: Shared room | Code: 2\n"
]
}
],
"source": [
"original_categories = dff['room_type'].astype(\"category\").cat.categories\n",
"\n",
Expand All @@ -2193,53 +2203,42 @@
},
{
"cell_type": "code",
"execution_count": 101,
"execution_count": 41,
"id": "100054b4",
"metadata": {},
"outputs": [],
"source": [
"# Change the non-numerical value to numerical value\n",
"# hint u can use cat.code\n",
"\n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "dbdc9c6c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 48895 entries, 0 to 48894\n",
"Index: 48884 entries, 0 to 48894\n",
"Data columns (total 12 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 neighbourhood_group 48895 non-null int8 \n",
" 1 neighbourhood 48895 non-null int16 \n",
" 2 latitude 48895 non-null float64\n",
" 3 longitude 48895 non-null float64\n",
" 4 room_type 48895 non-null int8 \n",
" 5 price 48895 non-null int64 \n",
" 6 minimum_nights 48895 non-null int64 \n",
" 7 number_of_reviews 48895 non-null int64 \n",
" 8 reviews_per_month 48895 non-null float64\n",
" 9 calculated_host_listings_count 48895 non-null int64 \n",
" 10 availability_365 48895 non-null int64 \n",
" 11 price_log 48895 non-null float64\n",
" 0 neighbourhood_group 48884 non-null int8 \n",
" 1 neighbourhood 48884 non-null int16 \n",
" 2 latitude 48884 non-null float64\n",
" 3 longitude 48884 non-null float64\n",
" 4 room_type 48884 non-null int8 \n",
" 5 price 48884 non-null int64 \n",
" 6 minimum_nights 48884 non-null int64 \n",
" 7 number_of_reviews 48884 non-null int64 \n",
" 8 reviews_per_month 38833 non-null float64\n",
" 9 calculated_host_listings_count 48884 non-null int64 \n",
" 10 availability_365 48884 non-null int64 \n",
" 11 price_log 48884 non-null float64\n",
"dtypes: float64(4), int16(1), int64(5), int8(2)\n",
"memory usage: 3.5 MB\n"
"memory usage: 3.9 MB\n"
]
}
],
"source": [
"# Change the non-numerical value to numerical value\n",
"# hint u can use cat.code\n",
"dff['neighbourhood_group'] = dff['neighbourhood_group'].astype(\"category\").cat.codes\n",
"dff['neighbourhood'] = dff['neighbourhood'].astype(\"category\").cat.codes\n",
"dff['room_type'] = dff['room_type'].astype(\"category\").cat.codes\n",
"dff.info()"
]
},
Expand Down

0 comments on commit b9ecbe1

Please sign in to comment.