Skip to content

Commit

Permalink
Merge pull request #178 from g0v/2024-591-api-enhance
Browse files Browse the repository at this point in the history
2024 591 api enhance
  • Loading branch information
ddio authored Sep 8, 2024
2 parents 04b3935 + 7d2d88b commit 2513ab0
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 40 deletions.
2 changes: 1 addition & 1 deletion scrapy-tw-rental-house/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "scrapy-tw-rental-house"
version = "1.3.4"
version = "1.3.6"
description = "Scrapy spider for TW Rental House"
readme = "README.md"
authors = ["ddio <ddio@ddio.io>"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,6 @@ def default_parse_detail(self, response):
# parse detail page in best effort
detail_dict = get_detail_raw_attrs(response)

if '車位' in detail_dict['breadcrumb']:
self.logger.info(
'Skip {} as it is parking lot'.format(house_id)
)
return None

# transform to generic house item
detail_dict['house_id'] = house_id

Expand Down Expand Up @@ -220,20 +214,6 @@ def get_shared_basic(self, detail_dict):
# Issue #14, always update deal status since item may be reopened
ret['deal_status'] = enums.DealStatusType.OPENED

# building_type, 公寓 / 電梯大樓 / 透天
if 'building_type' in detail_dict:
building_type = detail_dict['building_type']
if building_type == '別墅' or building_type == '透天厝':
ret['building_type'] = enums.BuildingType.透天
elif building_type == '住宅大樓' or building_type == '電梯大樓':
ret['building_type'] = enums.BuildingType.電梯大樓
else:
ret['building_type'] = self.get_enum(
enums.BuildingType,
detail_dict['house_id'],
building_type
)

# property type
property_type = breadcrumb[2]
if property_type != '__UNKNOWN__':
Expand All @@ -249,6 +229,23 @@ def get_shared_basic(self, detail_dict):
detail_dict['property_type']
)

if ret['property_type'] == enums.PropertyType.車位:
return ret

# building_type, 公寓 / 電梯大樓 / 透天
if 'building_type' in detail_dict:
building_type = detail_dict['building_type']
if building_type == '別墅' or building_type == '透天厝':
ret['building_type'] = enums.BuildingType.透天
elif building_type == '住宅大樓' or building_type == '電梯大樓':
ret['building_type'] = enums.BuildingType.電梯大樓
else:
ret['building_type'] = self.get_enum(
enums.BuildingType,
detail_dict['house_id'],
building_type
)

# is_rooftop, floor, total_floor
# TODO: use title to detect rooftop
if 'floor' in detail_dict:
Expand Down Expand Up @@ -461,22 +458,32 @@ def gen_detail_shared_attrs(self, detail_dict):
price_range = parse_price(detail_dict['price'])
detail_dict['price'] = price_range['monthly_price']
basic_info = self.get_shared_basic(detail_dict)
price_info = self.get_shared_price(detail_dict, basic_info)
env_info = self.get_shared_environment(detail_dict)
boolean_info = self.get_shared_boolean_info(detail_dict)
misc_info = self.get_shared_misc(detail_dict)

ret = {
'vendor': self.vendor,
'vendor_house_id': detail_dict['house_id'],
'monthly_price': detail_dict['price'],
**price_range,
**price_info,
**basic_info,
}

if basic_info['property_type'] == enums.PropertyType.車位:
self.logger.info(
'Skip {} as it is parking lot'.format(detail_dict['house_id'],)
)
return ret

price_info = self.get_shared_price(detail_dict, basic_info)
env_info = self.get_shared_environment(detail_dict)
boolean_info = self.get_shared_boolean_info(detail_dict)
misc_info = self.get_shared_misc(detail_dict)

ret = {
**ret,
**price_info,
**env_info,
**boolean_info,
**misc_info,

}

return ret
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,7 @@ def get_detail_raw_attrs(response):
TODO: photo list
To check:
- has_parking, is_require_parking_fee, monthly_management_fee, is_require_management_fee: https://rent.591.com.tw/17143085
deal_status,
is_rooftop,
no additional_fee, living_functions, transportation
!!vendor_house_url
'''
ret = {
**get_title(response),
Expand Down Expand Up @@ -48,6 +44,13 @@ def get_house_pattern(response):
items = {}
fields_def = ['property_type', 'floor_ping', 'floor', 'building_type']

if len(item_list) > 0 and '坪' in item_list[0]:
# if 整層住家 && 無房無廳無衛(??),坪數在第一個 🥹
fields_def = ['floor_ping', 'floor', 'building_type']
breadcrumb = css(response, '.crumbs a.t5-link::text')
if breadcrumb and '整層住家' in breadcrumb:
items['property_type'] = '整層住家'

for i, field in enumerate(fields_def):
value = item_list[i]
if len(item_list) > i:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from scrapy_twrh.items import RawHouseItem, GenericHouseItem
from scrapy_twrh.spiders.enums import PropertyType, TopRegionType, SubRegionType
from scrapy_twrh.spiders.util import clean_number
from .util import API_URL, ListRequestMeta, DetailRequestMeta, parse_price
from .util import DETAIL_ENDPOINT, ListRequestMeta, DetailRequestMeta, parse_price
from .request_generator import RequestGenerator

def get_list_val(house, regular_attr, top_attr=None, to_number=False):
Expand Down Expand Up @@ -65,7 +65,7 @@ def default_parse_list(self, response):
def gen_shared_attrs(self, house, meta: ListRequestMeta):
house_id = get_list_val(house, 'id', 'post_id')

url = "{}/v1/house/rent/detail?id={}".format(API_URL, house_id)
url = "{}/{}".format(DETAIL_ENDPOINT, house_id)

if 'region_name' in house:
# topData doesn't contain region_name for some reason..
Expand Down
2 changes: 1 addition & 1 deletion scrapy-twrh-example/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions twrh-dataset/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion twrh-dataset/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ readme = "README.md"
python = "^3.10"
# cffi = "==1.13.2"
django = "^5"
scrapy-tw-rental-house = "==1.3.4"
scrapy-tw-rental-house = "==1.3.6"
psycopg2-binary = "^2.9.9"
pylint-django = "^2.5.5"
sentry-sdk = "^1.39.1"
Expand Down

0 comments on commit 2513ab0

Please sign in to comment.