-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
from abc import ABC, abstractmethod | ||
from collections import defaultdict | ||
from typing import Callable, Dict, Iterable, List, Mapping, Optional, Tuple, Type, Union | ||
from warnings import warn | ||
|
||
from scrapy import Request | ||
from scrapy.crawler import Crawler | ||
|
@@ -110,6 +111,26 @@ def add_rule(self, rule: RuleFromUser) -> None: | |
for_patterns=Patterns([pattern]), use=use, instead_of=instead_of | ||
) | ||
self.rules.append(rule) | ||
|
||
# A common case when a PO subclasses another one with the same URL | ||
# pattern. See the test_item_return_subclass() test case. | ||
matched = self.matcher[rule.to_return] | ||
if [ | ||
pattern | ||
for pattern in matched.patterns.values() | ||
if pattern == rule.for_patterns | ||
]: | ||
# TODO: It would be great to also list down the rules having the | ||
# same URL pattern. But this would require some refactoring. | ||
warn( | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
BurnzZ
Author
Contributor
|
||
f"A similar URL pattern {list(matched.patterns.values())} has been " | ||
f"declared earlier which uses to_return={rule.to_return}. When " | ||
f"matching URLs against rules, the latest declared rule is used. " | ||
f"Consider explicitly updating the priority of the rules containing " | ||
f"the said URL pattern to easily match the expectations when reading " | ||
f"the code." | ||
) | ||
|
||
if rule.instead_of: | ||
self.matcher[rule.instead_of].add_or_update( | ||
len(self.rules) - 1, rule.for_patterns | ||
|
@@ -127,6 +148,7 @@ def overrides_for(self, request: Request) -> Mapping[Callable, Callable]: | |
overrides[instead_of] = self.rules[rule_id].use | ||
return overrides | ||
|
||
# TODO: Refactor later | ||
def rules_overrides_for(self, request: Request) -> Mapping[Callable, Callable]: | ||
overrides: Dict[Callable, Callable] = {} | ||
for instead_of, matcher in self.matcher.items(): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
import attrs | ||
from web_poet import Injectable, ItemPage, WebPage, field, handle_urls, item_from_fields | ||
|
||
from . import URL | ||
|
||
|
||
class POOverridenPage(WebPage): | ||
def to_item(self): | ||
return {"msg": "PO that will be replaced"} | ||
|
||
|
||
@handle_urls(URL, instead_of=POOverridenPage) | ||
class POIntegrationPage(WebPage): | ||
def to_item(self): | ||
return {"msg": "PO replacement"} | ||
|
||
|
||
@attrs.define | ||
class Product: | ||
name: str | ||
|
||
|
||
@attrs.define | ||
class ParentProduct: | ||
name: str | ||
|
||
|
||
@attrs.define | ||
class PriorityParentProduct: | ||
name: str | ||
|
||
|
||
@attrs.define | ||
class ReplacedProduct: | ||
name: str | ||
|
||
|
||
@attrs.define | ||
class ParentReplacedProduct: | ||
name: str | ||
|
||
|
||
@attrs.define | ||
class SubclassReplacedProduct: | ||
name: str | ||
|
||
|
||
@attrs.define | ||
class StandaloneProduct: | ||
name: str | ||
|
||
|
||
@attrs.define | ||
class ProductFromInjectable: | ||
name: str | ||
|
||
|
||
@handle_urls(URL) | ||
class ProductPage(ItemPage[Product]): | ||
@field | ||
def name(self) -> str: | ||
return "product's name" | ||
|
||
|
||
@handle_urls(URL) | ||
class ParentProductPage(ItemPage[ParentProduct]): | ||
@field | ||
def name(self) -> str: | ||
return "parent product's name" | ||
|
||
|
||
@handle_urls(URL) | ||
class PriorityParentProductPage(ItemPage[PriorityParentProduct]): | ||
@field | ||
def name(self) -> str: | ||
return "priority parent product's name" | ||
|
||
|
||
@handle_urls(URL, to_return=ReplacedProduct) | ||
class ReplacedProductPage(ItemPage[Product]): | ||
@field | ||
def name(self) -> str: | ||
return "replaced product's name" | ||
|
||
|
||
@handle_urls(URL) | ||
class ParentReplacedProductPage(ItemPage[ParentReplacedProduct]): | ||
@field | ||
def name(self) -> str: | ||
return "parent replaced product's name" | ||
|
||
|
||
@handle_urls(URL, to_return=StandaloneProduct) | ||
class StandaloneProductPage(ItemPage): | ||
@field | ||
def name(self) -> str: | ||
return "standalone product's name" | ||
|
||
|
||
# TODO: cases where `instead_of` and `to_return` are present, including | ||
# permutations of the cases above | ||
|
||
|
||
@handle_urls(URL, to_return=ProductFromInjectable) | ||
class ProductFromInjectablePage(Injectable): | ||
@field | ||
def name(self) -> str: | ||
return "product from injectable" | ||
|
||
async def to_item(self) -> ProductFromInjectable: | ||
return await item_from_fields(self, item_cls=ProductFromInjectable) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
from web_poet import field, handle_urls | ||
|
||
from . import URL | ||
from .main import ( | ||
ParentProductPage, | ||
ParentReplacedProductPage, | ||
PriorityParentProductPage, | ||
SubclassReplacedProduct, | ||
) | ||
|
||
|
||
@handle_urls(URL) | ||
class SubclassProductPage(ParentProductPage): | ||
@field | ||
def name(self) -> str: | ||
return "subclass product's name" | ||
|
||
|
||
@handle_urls(URL, to_return=SubclassReplacedProduct) | ||
class SubclassReplacedProductPage(ParentReplacedProductPage): | ||
@field | ||
def name(self) -> str: | ||
return "subclass replaced product's name" | ||
|
||
|
||
@handle_urls(URL, priority=600) | ||
class PrioritySubclassProductPage(PriorityParentProductPage): | ||
@field | ||
def name(self) -> str: | ||
return "priority subclass product's name" |
shouldn't we warn about it in web-poet?