Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature]: Label encoding #14

Merged
merged 1 commit into from
Apr 12, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 59 additions & 60 deletions Accommodating_Insights.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1263,7 +1263,7 @@
{
"data": {
"text/plain": [
"[<matplotlib.lines.Line2D at 0x7803cdf3bd60>]"
"[<matplotlib.lines.Line2D at 0x75933aa72620>]"
]
},
"execution_count": 21,
Expand Down Expand Up @@ -1371,7 +1371,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_64963/535378691.py:2: FutureWarning: \n",
"/tmp/ipykernel_42243/535378691.py:2: FutureWarning: \n",
"\n",
"`shade` is now deprecated in favor of `fill`; setting `fill=True`.\n",
"This will become an error in seaborn v0.14.0; please update your code.\n",
Expand All @@ -1382,7 +1382,7 @@
{
"data": {
"text/plain": [
"[<matplotlib.lines.Line2D at 0x7803cded2c50>]"
"[<matplotlib.lines.Line2D at 0x75933a485690>]"
]
},
"execution_count": 26,
Expand Down Expand Up @@ -1719,7 +1719,7 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 33,
"id": "6b63d81a",
"metadata": {},
"outputs": [
Expand All @@ -1729,7 +1729,7 @@
"Text(0.5, 1.0, 'Reviews per month v/s Price')"
]
},
"execution_count": 35,
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
},
Expand Down Expand Up @@ -1757,7 +1757,7 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 34,
"id": "68e8bfea",
"metadata": {},
"outputs": [
Expand All @@ -1767,7 +1767,7 @@
"numpy.float64"
]
},
"execution_count": 36,
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -1780,7 +1780,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 35,
"id": "7730aae1",
"metadata": {},
"outputs": [],
Expand All @@ -1792,7 +1792,7 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 36,
"id": "0e19d92d",
"metadata": {},
"outputs": [
Expand All @@ -1802,7 +1802,7 @@
"Text(0.5, 1.0, 'Reviews per month v/s Price (after)')"
]
},
"execution_count": 38,
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
},
Expand Down Expand Up @@ -1851,7 +1851,7 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 37,
"id": "5a8a3389",
"metadata": {},
"outputs": [
Expand All @@ -1860,24 +1860,24 @@
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 48895 entries, 0 to 48894\n",
"Index: 48884 entries, 0 to 48894\n",
"Data columns (total 12 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 neighbourhood_group 48895 non-null object \n",
" 1 neighbourhood 48895 non-null object \n",
" 2 latitude 48895 non-null float64\n",
" 3 longitude 48895 non-null float64\n",
" 4 room_type 48895 non-null object \n",
" 5 price 48895 non-null int64 \n",
" 6 minimum_nights 48895 non-null int64 \n",
" 7 number_of_reviews 48895 non-null int64 \n",
" 8 reviews_per_month 48895 non-null float64\n",
" 9 calculated_host_listings_count 48895 non-null int64 \n",
" 10 availability_365 48895 non-null int64 \n",
" 11 price_log 48895 non-null float64\n",
" 0 neighbourhood_group 48884 non-null object \n",
" 1 neighbourhood 48884 non-null object \n",
" 2 latitude 48884 non-null float64\n",
" 3 longitude 48884 non-null float64\n",
" 4 room_type 48884 non-null object \n",
" 5 price 48884 non-null int64 \n",
" 6 minimum_nights 48884 non-null int64 \n",
" 7 number_of_reviews 48884 non-null int64 \n",
" 8 reviews_per_month 38833 non-null float64\n",
" 9 calculated_host_listings_count 48884 non-null int64 \n",
" 10 availability_365 48884 non-null int64 \n",
" 11 price_log 48884 non-null float64\n",
"dtypes: float64(4), int64(5), object(3)\n",
"memory usage: 4.5+ MB\n"
"memory usage: 4.8+ MB\n"
]
}
],
Expand Down Expand Up @@ -1911,7 +1911,7 @@
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 38,
"id": "3dd14f34",
"metadata": {},
"outputs": [
Expand All @@ -1937,7 +1937,7 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": 39,
"id": "9dc69ff4",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -2179,10 +2179,20 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 40,
"id": "31e5a104",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Original Value: Entire home/apt | Code: 0\n",
"Original Value: Private room | Code: 1\n",
"Original Value: Shared room | Code: 2\n"
]
}
],
"source": [
"original_categories = dff['room_type'].astype(\"category\").cat.categories\n",
"\n",
Expand All @@ -2193,53 +2203,42 @@
},
{
"cell_type": "code",
"execution_count": 101,
"execution_count": 41,
"id": "100054b4",
"metadata": {},
"outputs": [],
"source": [
"# Change the non-numerical value to numerical value\n",
"# hint u can use cat.code\n",
"\n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "dbdc9c6c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 48895 entries, 0 to 48894\n",
"Index: 48884 entries, 0 to 48894\n",
"Data columns (total 12 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 neighbourhood_group 48895 non-null int8 \n",
" 1 neighbourhood 48895 non-null int16 \n",
" 2 latitude 48895 non-null float64\n",
" 3 longitude 48895 non-null float64\n",
" 4 room_type 48895 non-null int8 \n",
" 5 price 48895 non-null int64 \n",
" 6 minimum_nights 48895 non-null int64 \n",
" 7 number_of_reviews 48895 non-null int64 \n",
" 8 reviews_per_month 48895 non-null float64\n",
" 9 calculated_host_listings_count 48895 non-null int64 \n",
" 10 availability_365 48895 non-null int64 \n",
" 11 price_log 48895 non-null float64\n",
" 0 neighbourhood_group 48884 non-null int8 \n",
" 1 neighbourhood 48884 non-null int16 \n",
" 2 latitude 48884 non-null float64\n",
" 3 longitude 48884 non-null float64\n",
" 4 room_type 48884 non-null int8 \n",
" 5 price 48884 non-null int64 \n",
" 6 minimum_nights 48884 non-null int64 \n",
" 7 number_of_reviews 48884 non-null int64 \n",
" 8 reviews_per_month 38833 non-null float64\n",
" 9 calculated_host_listings_count 48884 non-null int64 \n",
" 10 availability_365 48884 non-null int64 \n",
" 11 price_log 48884 non-null float64\n",
"dtypes: float64(4), int16(1), int64(5), int8(2)\n",
"memory usage: 3.5 MB\n"
"memory usage: 3.9 MB\n"
]
}
],
"source": [
"# Change the non-numerical value to numerical value\n",
"# hint u can use cat.code\n",
"dff['neighbourhood_group'] = dff['neighbourhood_group'].astype(\"category\").cat.codes\n",
"dff['neighbourhood'] = dff['neighbourhood'].astype(\"category\").cat.codes\n",
"dff['room_type'] = dff['room_type'].astype(\"category\").cat.codes\n",
"dff.info()"
]
},
Expand Down
Loading