From f4d2f408def4270583ac6ee76e7619bfadfba4bc Mon Sep 17 00:00:00 2001
From: Kevin Lloyd Bernal <kevinoxy@gmail.com>
Date: Fri, 6 Jan 2023 15:10:43 +0800
Subject: [PATCH 01/16] move some registry functionalities from scrapy-poet

---
 tests/test_rules.py | 98 ++++++++++++++++++++++++++++++++++++++++++---
 web_poet/rules.py   | 84 ++++++++++++++++++++++++++++++++++----
 2 files changed, 170 insertions(+), 12 deletions(-)
diff --git a/tests/test_rules.py b/tests/test_rules.py
index 6092cc2f..066ad94c 100644
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -1,3 +1,5 @@
+import warnings
+
 import attrs
 import pytest
 from url_matcher import Patterns
@@ -9,8 +11,11 @@
     POTopLevelOverriden2,
 )
 from tests.po_lib.a_module import POModule, POModuleOverriden
-from tests.po_lib.nested_package import PONestedPkg
-from tests.po_lib.nested_package.a_nested_module import PONestedModule
+from tests.po_lib.nested_package import PONestedPkg, PONestedPkgOverriden
+from tests.po_lib.nested_package.a_nested_module import (
+    PONestedModule,
+    PONestedModuleOverriden,
+)
 from tests.po_lib_sub import POLibSub
 from tests.po_lib_to_return import (
     CustomProductPage,
@@ -20,7 +25,10 @@
     LessProductPage,
     MoreProductPage,
     Product,
+    ProductFewerFields,
+    ProductMoreFields,
     ProductPage,
+    ProductSeparate,
     ProductSimilar,
     SeparateProductPage,
     SimilarProductPage,
@@ -292,7 +300,7 @@ def test_registry_search_overrides_deprecation() -> None:
 def test_init_rules() -> None:
     rules = (
         ApplyRule(
-            for_patterns=Patterns(include=["sample.com"]),
+            for_patterns=Patterns(include=["example.com"]),
             use=POTopLevel1,
             instead_of=POTopLevelOverriden2,
         ),
@@ -305,10 +313,90 @@ def test_init_rules() -> None:
     assert default_registry.get_rules() != rules
 
 
+def test_add_rule() -> None:
+    registry = RulesRegistry()
+
+    # Basic case of adding a rule
+    rule_1 = ApplyRule(
+        for_patterns=Patterns(include=["example.com"]),
+        use=POTopLevel1,
+        instead_of=POTopLevelOverriden1,
+        to_return=Product,
+    )
+    registry.add_rule(rule_1)
+    assert registry.get_rules() == [rule_1]
+
+    # Adding a second rule should not emit a warning as long as both the URL
+    # pattern and `.to_return` value is not the same.
+    rule_2 = ApplyRule(
+        for_patterns=Patterns(include=["example.com"]),
+        use=POTopLevel1,
+        instead_of=POTopLevelOverriden2,
+        to_return=ProductSimilar,
+    )
+    with warnings.catch_warnings(record=True) as warnings_emitted:
+        registry.add_rule(rule_2)
+    assert not warnings_emitted
+    assert registry.get_rules() == [rule_1, rule_2]
+
+    # Warnings should be raised for this case since it's the same URL pattern
+    # and `.to_return` value from one of the past rules.
+    rule_3 = ApplyRule(
+        for_patterns=Patterns(include=["example.com"]),
+        use=POTopLevel1,
+        instead_of=POTopLevelOverriden2,
+        to_return=Product,
+    )
+    expected_msg = (
+        r"Similar URL patterns .+? were declared earlier that use "
+        r"to_return=<class 'tests.po_lib_to_return.Product'>."
+    )
+    with pytest.warns(UserWarning, match=expected_msg):
+        registry.add_rule(rule_3)
+    assert registry.get_rules() == [rule_1, rule_2, rule_3]
+
+
+def test_overrides_for() -> None:
+    # FIXME: Flaky
+
+    assert default_registry.overrides_for("https://example.com") == {
+        POTopLevelOverriden1: POTopLevel1,
+        POTopLevelOverriden2: POTopLevel2,
+        POModuleOverriden: POModule,
+        PONestedPkgOverriden: PONestedPkg,
+        PONestedModuleOverriden: PONestedModule,
+        ProductPage: CustomProductPageNoReturns,
+    }
+
+    assert default_registry.overrides_for("https://example.org") == {
+        PONestedModuleOverriden: PONestedModule,
+        PONestedPkgOverriden: PONestedPkg,
+    }
+
+
+def test_page_object_for() -> None:
+    # FIXME: Flaky
+
+    assert default_registry.page_object_for("https://example.com") == {
+        ProductSimilar: SimilarProductPage,
+        Product: CustomProductPageDataTypeOnly,
+        ProductSeparate: SeparateProductPage,
+        ProductFewerFields: LessProductPage,
+        ProductMoreFields: MoreProductPage,
+    }
+
+    assert not default_registry.page_object_for("https://example.org")
+
+
+# TODO: test for updating the hash
+
+# TODO: Add the other new tests from scrapy-poet
+
+
 def test_from_override_rules_deprecation_using_ApplyRule() -> None:
     rules = [
         ApplyRule(
-            for_patterns=Patterns(include=["sample.com"]),
+            for_patterns=Patterns(include=["example.com"]),
             use=POTopLevel1,
             instead_of=POTopLevelOverriden2,
         )
@@ -325,7 +413,7 @@ def test_from_override_rules_deprecation_using_ApplyRule() -> None:
 def test_from_override_rules_deprecation_using_OverrideRule() -> None:
     rules = [
         OverrideRule(
-            for_patterns=Patterns(include=["sample.com"]),
+            for_patterns=Patterns(include=["example.com"]),
             use=POTopLevel1,
             instead_of=POTopLevelOverriden2,
         )
diff --git a/web_poet/rules.py b/web_poet/rules.py
index 7fe42088..41aed8d5 100644
--- a/web_poet/rules.py
+++ b/web_poet/rules.py
@@ -4,14 +4,15 @@
 import importlib.util
 import pkgutil
 import warnings
-from collections import deque
+from collections import defaultdict, deque
 from operator import attrgetter
-from typing import Any, Dict, Iterable, List, Optional, Type, TypeVar, Union
+from typing import Any, Dict, Iterable, List, Mapping, Optional, Type, TypeVar, Union
 
 import attrs
-from url_matcher import Patterns
+from url_matcher import Patterns, URLMatcher
 
 from web_poet._typing import get_item_cls
+from web_poet.page_inputs.url import _Url
 from web_poet.pages import ItemPage
 from web_poet.utils import _create_deprecated_class, as_list, str_to_pattern
 
@@ -115,9 +116,52 @@ class ExampleComProductPage(WebPage[Product]):
     """
 
     def __init__(self, *, rules: Optional[Iterable[ApplyRule]] = None):
-        self._rules: List[ApplyRule] = []
+        self._rules: Dict[int, ApplyRule] = {}
+        self.overrides_matcher: Dict[Type[ItemPage], URLMatcher] = defaultdict(
+            URLMatcher
+        )
+        self.item_matcher: Dict[Optional[Type], URLMatcher] = defaultdict(URLMatcher)
+
         if rules is not None:
-            self._rules = list(rules)
+            for rule in rules:
+                self.add_rule(rule)
+
+    def add_rule(self, rule: ApplyRule) -> None:
+        """Registers an :class:`web_poet.rules.ApplyRule` instance."""
+        rule_id = hash(rule)
+
+        # A common case when a page object subclasses another one with the same
+        # URL pattern.
+        matched = self.item_matcher.get(rule.to_return)
+        if matched:
+            pattern_dupes = [
+                pattern
+                for pattern in matched.patterns.values()
+                if pattern == rule.for_patterns
+            ]
+            if pattern_dupes:
+                rules = [
+                    r
+                    for p in pattern_dupes
+                    for r in self.search(for_patterns=p, to_return=rule.to_return)
+                ]
+                warnings.warn(
+                    f"Similar URL patterns {pattern_dupes} were declared earlier "
+                    f"that use to_return={rule.to_return}. The first, highest-priority "
+                    f"rule added to SCRAPY_POET_REGISTRY will be used when matching "
+                    f"against URLs. Consider updating the priority of these rules: "
+                    f"{rules}."
+                )
+
+        if rule.instead_of:
+            self.overrides_matcher[rule.instead_of].add_or_update(
+                rule_id, rule.for_patterns
+            )
+        if rule.to_return:
+            self.item_matcher[rule.to_return].add_or_update(rule_id, rule.for_patterns)
+
+        # TODO: test removing the rule
+        self._rules[rule_id] = rule
 
     @classmethod
     def from_override_rules(
@@ -199,7 +243,7 @@ def wrapper(cls):
                 to_return=to_return or get_item_cls(cls),
                 meta=kwargs,
             )
-            self._rules.append(rule)
+            self.add_rule(rule)
             return cls
 
         return wrapper
@@ -214,7 +258,7 @@ def get_rules(self) -> List[ApplyRule]:
             beforehand to recursively import all submodules which contains the
             ``@handle_urls`` decorators from external Page Objects.
         """
-        return self._rules[:]
+        return list(self._rules.values())
 
     def get_overrides(self) -> List[ApplyRule]:
         """Deprecated, use :meth:`~.RulesRegistry.get_rules` instead."""
@@ -259,6 +303,32 @@ def search_overrides(self, **kwargs) -> List[ApplyRule]:
         warnings.warn(msg, DeprecationWarning, stacklevel=2)
         return self.search(**kwargs)
 
+    def _rules_for_url(
+        self, url: Union[_Url, str], url_matcher: Dict[Any, URLMatcher]
+    ) -> Mapping[Type, Type[ItemPage]]:
+        result: Dict[Type, Type[ItemPage]] = {}
+
+        for target, matcher in url_matcher.items():
+            rule_id = matcher.match(url)
+            if rule_id is not None:
+                result[target] = self._rules[rule_id].use
+
+        return result
+
+    def overrides_for(self, url: Union[_Url, str]) -> Mapping[Type, Type[ItemPage]]:
+        """Finds all of the page objects associated with the given URL and
+        returns a Mapping where the 'key' represents the page object that is
+        **overridden** by the page object in 'value'.
+        """
+        return self._rules_for_url(url, self.overrides_matcher)
+
+    def page_object_for(self, url: Union[_Url, str]) -> Mapping[Type, Type[ItemPage]]:
+        """Finds all of the page objects associated with the given URL and
+        returns a Mapping where the 'key' represents the item class that is
+        **produced** by the page object in 'value'.
+        """
+        return self._rules_for_url(url, self.item_matcher)
+
 
 def _walk_module(module: str) -> Iterable:
     """Return all modules from a module recursively.

From f2f7177c81b1e9bfdc86de01289755d062d49ede Mon Sep 17 00:00:00 2001
From: Kevin Lloyd Bernal <kevinoxy@gmail.com>
Date: Fri, 6 Jan 2023 16:37:42 +0800
Subject: [PATCH 02/16] fix non-deterministic matching when URL patterns are
 tied

---
 tests/test_rules.py |  6 +++---
 web_poet/rules.py   | 16 ++++++++++++++--
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/tests/test_rules.py b/tests/test_rules.py
index 066ad94c..df2ae7d7 100644
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -357,7 +357,7 @@ def test_add_rule() -> None:
 
 
 def test_overrides_for() -> None:
-    # FIXME: Flaky
+    # TODO: Test with RequestUrl/ResponseUrl as well
 
     assert default_registry.overrides_for("https://example.com") == {
         POTopLevelOverriden1: POTopLevel1,
@@ -375,10 +375,10 @@ def test_overrides_for() -> None:
 
 
 def test_page_object_for() -> None:
-    # FIXME: Flaky
+    # TODO: Test with RequestUrl/ResponseUrl as well
 
     assert default_registry.page_object_for("https://example.com") == {
-        ProductSimilar: SimilarProductPage,
+        ProductSimilar: CustomProductPageNoReturns,
         Product: CustomProductPageDataTypeOnly,
         ProductSeparate: SeparateProductPage,
         ProductFewerFields: LessProductPage,
diff --git a/web_poet/rules.py b/web_poet/rules.py
index 41aed8d5..4153522c 100644
--- a/web_poet/rules.py
+++ b/web_poet/rules.py
@@ -122,14 +122,23 @@ def __init__(self, *, rules: Optional[Iterable[ApplyRule]] = None):
         )
         self.item_matcher: Dict[Optional[Type], URLMatcher] = defaultdict(URLMatcher)
 
+        # Ensures that URLMatcher is deterministic in returning a rule when
+        # matching. Currently, `URLMatcher._sort_domain` has this sorting
+        # criteria:
+        #   * Priority (descending)
+        #   * Sorted list of includes for this domain (descending)
+        #   * Rule identifier (descending)
+        # This means that if the priority and domain are the same, the last tie
+        # breaker woulud be the "Rule identifier", this means we can base it on
+        # when a given rule was added to the registry, i.e. a counter.
+        self._rule_counter = 0
+
         if rules is not None:
             for rule in rules:
                 self.add_rule(rule)
 
     def add_rule(self, rule: ApplyRule) -> None:
         """Registers an :class:`web_poet.rules.ApplyRule` instance."""
-        rule_id = hash(rule)
-
         # A common case when a page object subclasses another one with the same
         # URL pattern.
         matched = self.item_matcher.get(rule.to_return)
@@ -153,6 +162,9 @@ def add_rule(self, rule: ApplyRule) -> None:
                     f"{rules}."
                 )
 
+        self._rule_counter += 1
+        rule_id = self._rule_counter
+
         if rule.instead_of:
             self.overrides_matcher[rule.instead_of].add_or_update(
                 rule_id, rule.for_patterns

From 3891a04fd8e8c579775b82eefed5a076e7fb094c Mon Sep 17 00:00:00 2001
From: Kevin Lloyd Bernal <kevinoxy@gmail.com>
Date: Fri, 6 Jan 2023 16:46:02 +0800
Subject: [PATCH 03/16] ensure RequestUrl/ResponseUrl works

---
 tests/test_rules.py | 56 ++++++++++++++++++++-------------------------
 web_poet/rules.py   |  2 ++
 2 files changed, 27 insertions(+), 31 deletions(-)

diff --git a/tests/test_rules.py b/tests/test_rules.py
index df2ae7d7..5ec76ea5 100644
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -42,6 +42,7 @@
     default_registry,
     handle_urls,
 )
+from web_poet.page_inputs.url import RequestUrl, ResponseUrl
 
 POS = {
     CustomProductPage,
@@ -357,40 +358,33 @@ def test_add_rule() -> None:
 
 
 def test_overrides_for() -> None:
-    # TODO: Test with RequestUrl/ResponseUrl as well
-
-    assert default_registry.overrides_for("https://example.com") == {
-        POTopLevelOverriden1: POTopLevel1,
-        POTopLevelOverriden2: POTopLevel2,
-        POModuleOverriden: POModule,
-        PONestedPkgOverriden: PONestedPkg,
-        PONestedModuleOverriden: PONestedModule,
-        ProductPage: CustomProductPageNoReturns,
-    }
-
-    assert default_registry.overrides_for("https://example.org") == {
-        PONestedModuleOverriden: PONestedModule,
-        PONestedPkgOverriden: PONestedPkg,
-    }
+    for cls in [str, RequestUrl, ResponseUrl]:
+        assert default_registry.overrides_for(cls("https://example.com")) == {
+            POTopLevelOverriden1: POTopLevel1,
+            POTopLevelOverriden2: POTopLevel2,
+            POModuleOverriden: POModule,
+            PONestedPkgOverriden: PONestedPkg,
+            PONestedModuleOverriden: PONestedModule,
+            ProductPage: CustomProductPageNoReturns,
+        }
+
+        assert default_registry.overrides_for(cls("https://example.org")) == {
+            PONestedModuleOverriden: PONestedModule,
+            PONestedPkgOverriden: PONestedPkg,
+        }
 
 
 def test_page_object_for() -> None:
-    # TODO: Test with RequestUrl/ResponseUrl as well
-
-    assert default_registry.page_object_for("https://example.com") == {
-        ProductSimilar: CustomProductPageNoReturns,
-        Product: CustomProductPageDataTypeOnly,
-        ProductSeparate: SeparateProductPage,
-        ProductFewerFields: LessProductPage,
-        ProductMoreFields: MoreProductPage,
-    }
-
-    assert not default_registry.page_object_for("https://example.org")
-
-
-# TODO: test for updating the hash
-
-# TODO: Add the other new tests from scrapy-poet
+    for cls in [str, RequestUrl, ResponseUrl]:
+        assert default_registry.page_object_for(cls("https://example.com")) == {
+            ProductSimilar: CustomProductPageNoReturns,
+            Product: CustomProductPageDataTypeOnly,
+            ProductSeparate: SeparateProductPage,
+            ProductFewerFields: LessProductPage,
+            ProductMoreFields: MoreProductPage,
+        }
+
+        assert not default_registry.page_object_for(cls("https://example.org"))
 
 
 def test_from_override_rules_deprecation_using_ApplyRule() -> None:
diff --git a/web_poet/rules.py b/web_poet/rules.py
index 4153522c..3c4af98a 100644
--- a/web_poet/rules.py
+++ b/web_poet/rules.py
@@ -320,6 +320,8 @@ def _rules_for_url(
     ) -> Mapping[Type, Type[ItemPage]]:
         result: Dict[Type, Type[ItemPage]] = {}
 
+        url = str(url)
+
         for target, matcher in url_matcher.items():
             rule_id = matcher.match(url)
             if rule_id is not None:

From 513235de81052b7cbc7339f6cca6e1e3d5a85204 Mon Sep 17 00:00:00 2001
From: Kevin Lloyd Bernal <kevinoxy@gmail.com>
Date: Fri, 6 Jan 2023 16:52:14 +0800
Subject: [PATCH 04/16] rename variables for clarity

---
 web_poet/rules.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/web_poet/rules.py b/web_poet/rules.py
index 3c4af98a..9fd1f08b 100644
--- a/web_poet/rules.py
+++ b/web_poet/rules.py
@@ -117,10 +117,10 @@ class ExampleComProductPage(WebPage[Product]):
 
     def __init__(self, *, rules: Optional[Iterable[ApplyRule]] = None):
         self._rules: Dict[int, ApplyRule] = {}
-        self.overrides_matcher: Dict[Type[ItemPage], URLMatcher] = defaultdict(
+        self._overrides_matchers: Dict[Type[ItemPage], URLMatcher] = defaultdict(
             URLMatcher
         )
-        self.item_matcher: Dict[Optional[Type], URLMatcher] = defaultdict(URLMatcher)
+        self._item_matchers: Dict[Optional[Type], URLMatcher] = defaultdict(URLMatcher)
 
         # Ensures that URLMatcher is deterministic in returning a rule when
         # matching. Currently, `URLMatcher._sort_domain` has this sorting
@@ -141,7 +141,7 @@ def add_rule(self, rule: ApplyRule) -> None:
         """Registers an :class:`web_poet.rules.ApplyRule` instance."""
         # A common case when a page object subclasses another one with the same
         # URL pattern.
-        matched = self.item_matcher.get(rule.to_return)
+        matched = self._item_matchers.get(rule.to_return)
         if matched:
             pattern_dupes = [
                 pattern
@@ -166,11 +166,13 @@ def add_rule(self, rule: ApplyRule) -> None:
         rule_id = self._rule_counter
 
         if rule.instead_of:
-            self.overrides_matcher[rule.instead_of].add_or_update(
+            self._overrides_matchers[rule.instead_of].add_or_update(
                 rule_id, rule.for_patterns
             )
         if rule.to_return:
-            self.item_matcher[rule.to_return].add_or_update(rule_id, rule.for_patterns)
+            self._item_matchers[rule.to_return].add_or_update(
+                rule_id, rule.for_patterns
+            )
 
         # TODO: test removing the rule
         self._rules[rule_id] = rule
@@ -316,13 +318,13 @@ def search_overrides(self, **kwargs) -> List[ApplyRule]:
         return self.search(**kwargs)
 
     def _rules_for_url(
-        self, url: Union[_Url, str], url_matcher: Dict[Any, URLMatcher]
+        self, url: Union[_Url, str], matchers: Dict[Any, URLMatcher]
     ) -> Mapping[Type, Type[ItemPage]]:
         result: Dict[Type, Type[ItemPage]] = {}
 
         url = str(url)
 
-        for target, matcher in url_matcher.items():
+        for target, matcher in matchers.items():
             rule_id = matcher.match(url)
             if rule_id is not None:
                 result[target] = self._rules[rule_id].use
@@ -334,14 +336,14 @@ def overrides_for(self, url: Union[_Url, str]) -> Mapping[Type, Type[ItemPage]]:
         returns a Mapping where the 'key' represents the page object that is
         **overridden** by the page object in 'value'.
         """
-        return self._rules_for_url(url, self.overrides_matcher)
+        return self._rules_for_url(url, self._overrides_matchers)
 
     def page_object_for(self, url: Union[_Url, str]) -> Mapping[Type, Type[ItemPage]]:
         """Finds all of the page objects associated with the given URL and
         returns a Mapping where the 'key' represents the item class that is
         **produced** by the page object in 'value'.
         """
-        return self._rules_for_url(url, self.item_matcher)
+        return self._rules_for_url(url, self._item_matchers)
 
 
 def _walk_module(module: str) -> Iterable:

From c53249cfb8372dd520e26cb5b40f72dfa2aa0080 Mon Sep 17 00:00:00 2001
From: Kevin Lloyd Bernal <kevinoxy@gmail.com>
Date: Mon, 9 Jan 2023 16:54:56 +0800
Subject: [PATCH 05/16] fix typing and comments

---
 web_poet/rules.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/web_poet/rules.py b/web_poet/rules.py
index 9fd1f08b..b2348538 100644
--- a/web_poet/rules.py
+++ b/web_poet/rules.py
@@ -123,14 +123,14 @@ def __init__(self, *, rules: Optional[Iterable[ApplyRule]] = None):
         self._item_matchers: Dict[Optional[Type], URLMatcher] = defaultdict(URLMatcher)
 
         # Ensures that URLMatcher is deterministic in returning a rule when
-        # matching. Currently, `URLMatcher._sort_domain` has this sorting
-        # criteria:
+        # matching. As of url_macher==0.2.0, `url_matcher.URLMatcher._sort_domain`
+        # has this sorting criteria:
         #   * Priority (descending)
         #   * Sorted list of includes for this domain (descending)
         #   * Rule identifier (descending)
         # This means that if the priority and domain are the same, the last tie
-        # breaker woulud be the "Rule identifier", this means we can base it on
-        # when a given rule was added to the registry, i.e. a counter.
+        # breaker would be the "Rule identifier", this means we can base it on
+        # the order of rule addition to the registry, i.e. a counter.
         self._rule_counter = 0
 
         if rules is not None:
@@ -331,7 +331,9 @@ def _rules_for_url(
 
         return result
 
-    def overrides_for(self, url: Union[_Url, str]) -> Mapping[Type, Type[ItemPage]]:
+    def overrides_for(
+        self, url: Union[_Url, str]
+    ) -> Mapping[Type[ItemPage], Type[ItemPage]]:
         """Finds all of the page objects associated with the given URL and
         returns a Mapping where the 'key' represents the page object that is
         **overridden** by the page object in 'value'.

From a59bd21ab56c3dc16c2987af4cddab3e766d096f Mon Sep 17 00:00:00 2001
From: Kevin Lloyd Bernal <kevinoxy@gmail.com>
Date: Mon, 9 Jan 2023 17:02:54 +0800
Subject: [PATCH 06/16] create page_object_for_item() registry method

---
 tests/test_rules.py | 22 ++++++++++++++++++++++
 web_poet/rules.py   | 17 +++++++++++++++++
 2 files changed, 39 insertions(+)

diff --git a/tests/test_rules.py b/tests/test_rules.py
index 5ec76ea5..b2ee181a 100644
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -387,6 +387,28 @@ def test_page_object_for() -> None:
         assert not default_registry.page_object_for(cls("https://example.org"))
 
 
+def test_page_object_for_item() -> None:
+    # This is not associated with any rule.
+    class FakeItem:
+        pass
+
+    method = default_registry.page_object_for_item
+
+    for cls in [str, RequestUrl, ResponseUrl]:
+        url = cls("https://example.com")
+        assert method(url, ProductSimilar) == CustomProductPageNoReturns
+        assert method(url, Product) == CustomProductPageDataTypeOnly
+        assert method(url, ProductSeparate) == SeparateProductPage
+        assert method(url, ProductFewerFields) == LessProductPage
+        assert method(url, ProductMoreFields) == MoreProductPage
+
+        # When there's no rule specifying to return this FakeItem
+        assert method(url, FakeItem) is None
+
+        # When the URL itself doesn't have any ``to_return`` in any of its rules
+        assert method(cls("https://example.org"), FakeItem) is None
+
+
 def test_from_override_rules_deprecation_using_ApplyRule() -> None:
     rules = [
         ApplyRule(
diff --git a/web_poet/rules.py b/web_poet/rules.py
index b2348538..a0e52370 100644
--- a/web_poet/rules.py
+++ b/web_poet/rules.py
@@ -347,6 +347,23 @@ def page_object_for(self, url: Union[_Url, str]) -> Mapping[Type, Type[ItemPage]
         """
         return self._rules_for_url(url, self._item_matchers)
 
+    def page_object_for_item(
+        self, url: Union[_Url, str], item_cls: Type
+    ) -> Optional[Type]:
+        """Return the page object class associated with the given URL that's able
+        to produce the given ``item_cls``.
+        """
+
+        url = str(url)
+
+        matcher = self._item_matchers.get(item_cls)
+        if matcher:
+            rule_id = matcher.match(url)
+            if rule_id is not None:
+                return self._rules[rule_id].use
+
+        return None
+
 
 def _walk_module(module: str) -> Iterable:
     """Return all modules from a module recursively.

From a5e8480f9d20bf65f6ecc8b0d6e56e666abb1223 Mon Sep 17 00:00:00 2001
From: Kevin Lloyd Bernal <kevinoxy@gmail.com>
Date: Mon, 9 Jan 2023 17:30:12 +0800
Subject: [PATCH 07/16] drop page_object_for() registry method

https://github.com/scrapinghub/web-poet/pull/112#discussion_r1063367500
---
 tests/test_rules.py | 13 -------------
 web_poet/rules.py   |  7 -------
 2 files changed, 20 deletions(-)

diff --git a/tests/test_rules.py b/tests/test_rules.py
index b2ee181a..1081c373 100644
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -374,19 +374,6 @@ def test_overrides_for() -> None:
         }
 
 
-def test_page_object_for() -> None:
-    for cls in [str, RequestUrl, ResponseUrl]:
-        assert default_registry.page_object_for(cls("https://example.com")) == {
-            ProductSimilar: CustomProductPageNoReturns,
-            Product: CustomProductPageDataTypeOnly,
-            ProductSeparate: SeparateProductPage,
-            ProductFewerFields: LessProductPage,
-            ProductMoreFields: MoreProductPage,
-        }
-
-        assert not default_registry.page_object_for(cls("https://example.org"))
-
-
 def test_page_object_for_item() -> None:
     # This is not associated with any rule.
     class FakeItem:
diff --git a/web_poet/rules.py b/web_poet/rules.py
index a0e52370..4373ae8a 100644
--- a/web_poet/rules.py
+++ b/web_poet/rules.py
@@ -340,13 +340,6 @@ def overrides_for(
         """
         return self._rules_for_url(url, self._overrides_matchers)
 
-    def page_object_for(self, url: Union[_Url, str]) -> Mapping[Type, Type[ItemPage]]:
-        """Finds all of the page objects associated with the given URL and
-        returns a Mapping where the 'key' represents the item class that is
-        **produced** by the page object in 'value'.
-        """
-        return self._rules_for_url(url, self._item_matchers)
-
     def page_object_for_item(
         self, url: Union[_Url, str], item_cls: Type
     ) -> Optional[Type]:

From ef520fac1bae02b47ee829578c2f66005fc8583b Mon Sep 17 00:00:00 2001
From: Kevin Lloyd Bernal <kevinoxy@gmail.com>
Date: Mon, 9 Jan 2023 18:19:18 +0800
Subject: [PATCH 08/16] create docs for overrides_for() and
 page_object_for_item()

---
 CHANGELOG.rst               |  9 +++++++
 docs/page-objects/rules.rst | 52 +++++++++++++++++++++++++++++++++++++
 web_poet/rules.py           |  4 +++
 3 files changed, 65 insertions(+)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index d39b3ac1..49375b7f 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -2,6 +2,15 @@
 Changelog
 =========
 
+TBR
+---
+
+* Introduce new methods for :class:`web_poet.rules.RulesRegistry`:
+
+    * :meth:`web_poet.rules.RulesRegistry.add_rule`
+    * :meth:`web_poet.rules.RulesRegistry.overrides_for`
+    * :meth:`web_poet.rules.RulesRegistry.page_object_for_item`
+
 0.6.0 (2022-11-08)
 ------------------
 
diff --git a/docs/page-objects/rules.rst b/docs/page-objects/rules.rst
index d19783ed..c9572143 100644
--- a/docs/page-objects/rules.rst
+++ b/docs/page-objects/rules.rst
@@ -390,6 +390,58 @@ in the :class:`~.ApplyRule` to be written into ``web_poet.default_registry``.
 
     The next section explores this caveat further.
 
+Using URLs against the registered rules
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+One of the important aspects of :class:`~.ApplyRule` is dictating which URLs it's
+able to work using its ``for_patterns`` attribute. There are a few methods
+available in :class:`~.RulesRegistry` which accepts a URL value (:class:`str`,
+:class:`~.RequestUrl`, or :class:`~.ResponseUrl`) to find specific information
+from the registered rules.
+
+.. _rules-overrides_for-example:
+
+Find the page object overrides
+""""""""""""""""""""""""""""""
+
+Suppose you want to see what are the :ref:`rules-intro-overrides` that are
+available from a given webpage, you can use :meth:`~.RulesRegistry.overrides_for`
+by passing the webpage URL. For example:
+
+.. code-block:: python
+
+    from web_poet import default_registry
+
+    overrides = default_registry.overrides_for("http://books.toscrape.com/")
+    print(overrides)
+
+    # {
+    #     <class 'OldProductPage'>: <class 'NewProductPage'>,
+    #     <class 'OverriddenPage'>: <class 'UseThisPage'>,
+    # }
+
+It returns a :class:`Mapping` where the *key* represents the page object class
+that is overridden or replaced by the page object class in the *value*.
+
+.. _rules-page_object_for_item-example:
+
+Identify the page object that could create the item
+"""""""""""""""""""""""""""""""""""""""""""""""""""
+
+Suppose you want to retrieve the page object class that is able to create the
+item class that you want from a given webpage, you can use
+:meth:`~.RulesRegistry.page_object_for_item`. For example:
+
+.. code-block:: python
+
+    from web_poet import default_registry
+
+    item_cls = default_registry.page_object_for_item(
+        "http://books.toscrape.com/catalogue/sapiens-a-brief-history-of-humankind_996/index.html",
+        Book
+    )
+    print(item_cls)  # BookPage
+
 
 Using rules from External Packages
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/web_poet/rules.py b/web_poet/rules.py
index 4373ae8a..1d167350 100644
--- a/web_poet/rules.py
+++ b/web_poet/rules.py
@@ -337,6 +337,8 @@ def overrides_for(
         """Finds all of the page objects associated with the given URL and
         returns a Mapping where the 'key' represents the page object that is
         **overridden** by the page object in 'value'.
+
+        See example: :ref:`rules-overrides_for-example`.
         """
         return self._rules_for_url(url, self._overrides_matchers)
 
@@ -345,6 +347,8 @@ def page_object_for_item(
     ) -> Optional[Type]:
         """Return the page object class associated with the given URL that's able
         to produce the given ``item_cls``.
+
+        See example: :ref:`rules-page_object_for_item-example`.
         """
 
         url = str(url)

From 37fb650147dd2d7e751f8644592a7794055b14ef Mon Sep 17 00:00:00 2001
From: Kevin Lloyd Bernal <kevinoxy@gmail.com>
Date: Mon, 9 Jan 2023 21:00:36 +0800
Subject: [PATCH 09/16] fix UserWarning for similar rules

---
 tests/test_rules.py | 13 +++++++------
 web_poet/rules.py   |  9 ++++-----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/test_rules.py b/tests/test_rules.py
index 1081c373..fbcae98e 100644
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -163,7 +163,7 @@ def test_apply_rule_kwargs_only() -> None:
             ApplyRule(
                 "example.com",
                 *[params[r] for r in remove],
-                **{k: v for k, v in params.items() if k not in remove}  # type: ignore[arg-type]
+                **{k: v for k, v in params.items() if k not in remove},  # type: ignore[arg-type]
             )
 
 
@@ -348,12 +348,13 @@ def test_add_rule() -> None:
         instead_of=POTopLevelOverriden2,
         to_return=Product,
     )
-    expected_msg = (
-        r"Similar URL patterns .+? were declared earlier that use "
-        r"to_return=<class 'tests.po_lib_to_return.Product'>."
-    )
-    with pytest.warns(UserWarning, match=expected_msg):
+    # Since we're using f-strings to compare the warning emitted, don't use
+    # ``pytest.warns()`` here since it treats the msg as regex which translates
+    # the "(" and ")" characters differently from the expected message.
+    with warnings.catch_warnings(record=True) as warnings_emitted:
         registry.add_rule(rule_3)
+    expected_msg = f"Consider updating the priority of these rules: {[rule_1, rule_3]}."
+    assert any([True for w in warnings_emitted if expected_msg in str(w.message)])
     assert registry.get_rules() == [rule_1, rule_2, rule_3]
 
 
diff --git a/web_poet/rules.py b/web_poet/rules.py
index 1d167350..4e62eec0 100644
--- a/web_poet/rules.py
+++ b/web_poet/rules.py
@@ -149,17 +149,17 @@ def add_rule(self, rule: ApplyRule) -> None:
                 if pattern == rule.for_patterns
             ]
             if pattern_dupes:
-                rules = [
+                rules_to_warn = [
                     r
                     for p in pattern_dupes
                     for r in self.search(for_patterns=p, to_return=rule.to_return)
-                ]
+                ] + [rule]
                 warnings.warn(
                     f"Similar URL patterns {pattern_dupes} were declared earlier "
                     f"that use to_return={rule.to_return}. The first, highest-priority "
-                    f"rule added to SCRAPY_POET_REGISTRY will be used when matching "
+                    f"rule added to the registry will be used when matching "
                     f"against URLs. Consider updating the priority of these rules: "
-                    f"{rules}."
+                    f"{rules_to_warn}."
                 )
 
         self._rule_counter += 1
@@ -174,7 +174,6 @@ def add_rule(self, rule: ApplyRule) -> None:
                 rule_id, rule.for_patterns
             )
 
-        # TODO: test removing the rule
         self._rules[rule_id] = rule
 
     @classmethod

From 299f1652521a8e8da956133250cc2501dc552918 Mon Sep 17 00:00:00 2001
From: Kevin Lloyd Bernal <kevinoxy@gmail.com>
Date: Tue, 10 Jan 2023 16:04:07 +0800
Subject: [PATCH 10/16] refactor overrides_for() and page_object_for_item()

---
 web_poet/rules.py | 38 ++++++++++++++++----------------------
 1 file changed, 16 insertions(+), 22 deletions(-)

diff --git a/web_poet/rules.py b/web_poet/rules.py
index 4e62eec0..11b84fb0 100644
--- a/web_poet/rules.py
+++ b/web_poet/rules.py
@@ -316,19 +316,16 @@ def search_overrides(self, **kwargs) -> List[ApplyRule]:
         warnings.warn(msg, DeprecationWarning, stacklevel=2)
         return self.search(**kwargs)
 
-    def _rules_for_url(
-        self, url: Union[_Url, str], matchers: Dict[Any, URLMatcher]
-    ) -> Mapping[Type, Type[ItemPage]]:
-        result: Dict[Type, Type[ItemPage]] = {}
+    def _match_url_for_po(
+        self, url: Union[_Url, str], matcher: Optional[URLMatcher] = None
+    ) -> Optional[Type[ItemPage]]:
+        """Returns the page object to use based on the URL and URLMatcher."""
+        if not url or matcher is None:
+            return None
 
-        url = str(url)
-
-        for target, matcher in matchers.items():
-            rule_id = matcher.match(url)
-            if rule_id is not None:
-                result[target] = self._rules[rule_id].use
-
-        return result
+        rule_id = matcher.match(str(url))
+        if rule_id is not None:
+            return self._rules[rule_id].use
 
     def overrides_for(
         self, url: Union[_Url, str]
@@ -339,7 +336,12 @@ def overrides_for(
 
         See example: :ref:`rules-overrides_for-example`.
         """
-        return self._rules_for_url(url, self._overrides_matchers)
+        result: Dict[Type[ItemPage], Type[ItemPage]] = {}
+        for target, matcher in self._overrides_matchers.items():
+            po = self._match_url_for_po(url, matcher)
+            if po:
+                result[target] = po
+        return result
 
     def page_object_for_item(
         self, url: Union[_Url, str], item_cls: Type
@@ -349,16 +351,8 @@ def page_object_for_item(
 
         See example: :ref:`rules-page_object_for_item-example`.
         """
-
-        url = str(url)
-
         matcher = self._item_matchers.get(item_cls)
-        if matcher:
-            rule_id = matcher.match(url)
-            if rule_id is not None:
-                return self._rules[rule_id].use
-
-        return None
+        return self._match_url_for_po(url, matcher)
 
 
 def _walk_module(module: str) -> Iterable:

From 05a832b82beae4242a3facc1920992cee91e00d2 Mon Sep 17 00:00:00 2001
From: Kevin Lloyd Bernal <kevinoxy@gmail.com>
Date: Tue, 10 Jan 2023 21:10:43 +0800
Subject: [PATCH 11/16] optimize RulesRegistry.search()

---
 CHANGELOG.rst       |  6 ++++++
 tests/test_rules.py | 12 ++++++------
 web_poet/rules.py   | 38 +++++++++++++++++++++++++++++++-------
 3 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 49375b7f..b343f8ae 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -11,6 +11,12 @@ TBR
     * :meth:`web_poet.rules.RulesRegistry.overrides_for`
     * :meth:`web_poet.rules.RulesRegistry.page_object_for_item`
 
+* Improved the performance of :meth:`web_poet.rules.RulesRegistry.search` where
+  passing a single parameter of either ``instead_of`` or ``to_return`` results
+  in *O(1)* look-up time instead of *O(N)*. Additionally, having either
+  ``instead_of`` or ``to_return`` present in multi-parameter search calls would
+  filter the initial candidate results resulting in a faster search.
+
 0.6.0 (2022-11-08)
 ------------------
 
diff --git a/tests/test_rules.py b/tests/test_rules.py
index fbcae98e..3a9d0ca3 100644
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -257,18 +257,18 @@ def test_registry_search() -> None:
     # param: to_return
     rules = default_registry.search(to_return=Product)
     assert rules == [
-        ApplyRule("example.com", use=ProductPage, to_return=Product),
         ApplyRule(
             "example.com",
-            use=ImprovedProductPage,
-            instead_of=ProductPage,
+            # mypy complains here since it's expecting a container class when
+            # declared, i.e, ``ItemPage[SomeItem]``
+            use=CustomProductPageDataTypeOnly,  # type: ignore[arg-type]
             to_return=Product,
         ),
+        ApplyRule("example.com", use=ProductPage, to_return=Product),
         ApplyRule(
             "example.com",
-            # mypy complains here since it's expecting a container class when
-            # declared, i.e, ``ItemPage[SomeItem]``
-            use=CustomProductPageDataTypeOnly,  # type: ignore[arg-type]
+            use=ImprovedProductPage,
+            instead_of=ProductPage,
             to_return=Product,
         ),
     ]
diff --git a/web_poet/rules.py b/web_poet/rules.py
index 11b84fb0..53458324 100644
--- a/web_poet/rules.py
+++ b/web_poet/rules.py
@@ -120,7 +120,7 @@ def __init__(self, *, rules: Optional[Iterable[ApplyRule]] = None):
         self._overrides_matchers: Dict[Type[ItemPage], URLMatcher] = defaultdict(
             URLMatcher
         )
-        self._item_matchers: Dict[Optional[Type], URLMatcher] = defaultdict(URLMatcher)
+        self._item_matchers: Dict[Type, URLMatcher] = defaultdict(URLMatcher)
 
         # Ensures that URLMatcher is deterministic in returning a rule when
         # matching. As of url_macher==0.2.0, `url_matcher.URLMatcher._sort_domain`
@@ -139,10 +139,12 @@ def __init__(self, *, rules: Optional[Iterable[ApplyRule]] = None):
 
     def add_rule(self, rule: ApplyRule) -> None:
         """Registers an :class:`web_poet.rules.ApplyRule` instance."""
-        # A common case when a page object subclasses another one with the same
-        # URL pattern.
-        matched = self._item_matchers.get(rule.to_return)
+
+        # Ignore the type since ``ApplyRule`` could have an empty ``.to_return``
+        matched = self._item_matchers.get(rule.to_return)  # type: ignore[arg-type]
         if matched:
+            # A common case when a page object subclasses another one with the
+            # same URL pattern.
             pattern_dupes = [
                 pattern
                 for pattern in matched.patterns.values()
@@ -293,10 +295,32 @@ def search(self, **kwargs) -> List[ApplyRule]:
             print(rules[0].instead_of)  # GenericPO
 
         """
+        rule_ids = set()
+
+        # Ignore the types since we're using None value as a quick look-up.
+
+        matcher = self._item_matchers.get(kwargs.get("to_return"))  # type: ignore[arg-type]
+        if matcher:
+            rule_ids.update(matcher.patterns.keys())
+
+        matcher = self._overrides_matchers.get(kwargs.get("instead_of"))  # type: ignore[arg-type]
+        if matcher:
+            if rule_ids:
+                # If both params are used then narrow down the rules.
+                rule_ids.union(matcher.patterns.keys())
+            else:
+                rule_ids.update(matcher.patterns.keys())
+
+        rules = [self._rules[id_] for id_ in rule_ids]
+
+        if len(kwargs) == 1 and rules:
+            return rules
+
+        # Search other parameters as well
 
         getter = attrgetter(*kwargs.keys())
 
-        def matcher(rule: ApplyRule):
+        def finder(rule: ApplyRule):
             attribs = getter(rule)
             if not isinstance(attribs, tuple):
                 attribs = (attribs,)
@@ -305,8 +329,8 @@ def matcher(rule: ApplyRule):
             return False
 
         results = []
-        for rule in self.get_rules():
-            if matcher(rule):
+        for rule in rules or self.get_rules():
+            if finder(rule):
                 results.append(rule)
         return results
 

From dc6181024758aa0c1038a70b03a6f529f479c327 Mon Sep 17 00:00:00 2001
From: Kevin Lloyd Bernal <kevinoxy@gmail.com>
Date: Fri, 13 Jan 2023 15:09:52 +0800
Subject: [PATCH 12/16] further optimize RulesRegistry.search()

---
 tests/test_rules.py | 41 +++++++++++++++++++++++-----
 web_poet/rules.py   | 65 ++++++++++++++++++++++++---------------------
 2 files changed, 69 insertions(+), 37 deletions(-)

diff --git a/tests/test_rules.py b/tests/test_rules.py
index 3a9d0ca3..0e6ca96f 100644
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -254,31 +254,57 @@ def test_registry_search() -> None:
     assert len(rules) == 1
     assert rules[0].instead_of == POTopLevelOverriden2
 
+    rules = default_registry.search(instead_of=None)
+    for rule in rules:
+        assert rule.instead_of is None
+
     # param: to_return
     rules = default_registry.search(to_return=Product)
     assert rules == [
+        ApplyRule("example.com", use=ProductPage, to_return=Product),
         ApplyRule(
             "example.com",
-            # mypy complains here since it's expecting a container class when
-            # declared, i.e, ``ItemPage[SomeItem]``
-            use=CustomProductPageDataTypeOnly,  # type: ignore[arg-type]
+            use=ImprovedProductPage,
+            instead_of=ProductPage,
             to_return=Product,
         ),
-        ApplyRule("example.com", use=ProductPage, to_return=Product),
         ApplyRule(
             "example.com",
-            use=ImprovedProductPage,
-            instead_of=ProductPage,
+            # mypy complains here since it's expecting a container class when
+            # declared, i.e, ``ItemPage[SomeItem]``
+            use=CustomProductPageDataTypeOnly,  # type: ignore[arg-type]
             to_return=Product,
         ),
     ]
 
+    rules = default_registry.search(to_return=None)
+    for rule in rules:
+        assert rule.to_return is None
+
     # params: to_return and use
     rules = default_registry.search(to_return=Product, use=ImprovedProductPage)
     assert len(rules) == 1
     assert rules[0].to_return == Product
     assert rules[0].use == ImprovedProductPage
 
+    # params: to_return and instead_of
+    rules = default_registry.search(to_return=Product, instead_of=None)
+    assert len(rules) == 2
+    assert rules[0].to_return == Product
+    assert rules[0].instead_of is None
+    assert rules[1].to_return == Product
+    assert rules[1].instead_of is None
+
+    rules = default_registry.search(to_return=None, instead_of=ProductPage)
+    for rule in rules:
+        assert rule.to_return is None
+        assert rule.instead_of is None
+
+    rules = default_registry.search(to_return=None, instead_of=None)
+    assert len(rules) == 1
+    assert rules[0].to_return is None
+    assert rules[0].instead_of is None
+
     # Such rules doesn't exist
     rules = default_registry.search(use=POModuleOverriden)
     assert len(rules) == 0
@@ -390,6 +416,9 @@ class FakeItem:
         assert method(url, ProductFewerFields) == LessProductPage
         assert method(url, ProductMoreFields) == MoreProductPage
 
+        # Type is ignored since item_cls shouldn't be None
+        assert method(url, None) is None  # type: ignore[arg-type]
+
         # When there's no rule specifying to return this FakeItem
         assert method(url, FakeItem) is None
 
diff --git a/web_poet/rules.py b/web_poet/rules.py
index 53458324..caa85062 100644
--- a/web_poet/rules.py
+++ b/web_poet/rules.py
@@ -117,10 +117,10 @@ class ExampleComProductPage(WebPage[Product]):
 
     def __init__(self, *, rules: Optional[Iterable[ApplyRule]] = None):
         self._rules: Dict[int, ApplyRule] = {}
-        self._overrides_matchers: Dict[Type[ItemPage], URLMatcher] = defaultdict(
-            URLMatcher
-        )
-        self._item_matchers: Dict[Type, URLMatcher] = defaultdict(URLMatcher)
+        self._overrides_matchers: Dict[
+            Optional[Type[ItemPage]], URLMatcher
+        ] = defaultdict(URLMatcher)
+        self._item_matchers: Dict[Optional[Type], URLMatcher] = defaultdict(URLMatcher)
 
         # Ensures that URLMatcher is deterministic in returning a rule when
         # matching. As of url_macher==0.2.0, `url_matcher.URLMatcher._sort_domain`
@@ -167,14 +167,10 @@ def add_rule(self, rule: ApplyRule) -> None:
         self._rule_counter += 1
         rule_id = self._rule_counter
 
-        if rule.instead_of:
-            self._overrides_matchers[rule.instead_of].add_or_update(
-                rule_id, rule.for_patterns
-            )
-        if rule.to_return:
-            self._item_matchers[rule.to_return].add_or_update(
-                rule_id, rule.for_patterns
-            )
+        self._overrides_matchers[rule.instead_of].add_or_update(
+            rule_id, rule.for_patterns
+        )
+        self._item_matchers[rule.to_return].add_or_update(rule_id, rule.for_patterns)
 
         self._rules[rule_id] = rule
 
@@ -295,25 +291,28 @@ def search(self, **kwargs) -> List[ApplyRule]:
             print(rules[0].instead_of)  # GenericPO
 
         """
-        rule_ids = set()
-
-        # Ignore the types since we're using None value as a quick look-up.
-
-        matcher = self._item_matchers.get(kwargs.get("to_return"))  # type: ignore[arg-type]
-        if matcher:
-            rule_ids.update(matcher.patterns.keys())
-
-        matcher = self._overrides_matchers.get(kwargs.get("instead_of"))  # type: ignore[arg-type]
-        if matcher:
-            if rule_ids:
-                # If both params are used then narrow down the rules.
-                rule_ids.union(matcher.patterns.keys())
-            else:
-                rule_ids.update(matcher.patterns.keys())
+        # Use a dict instead of set() to preserve the order.
+        rule_ids = {}
+
+        if "to_return" in kwargs:
+            matcher = self._item_matchers.get(kwargs["to_return"])
+            if matcher:
+                rule_ids.update(matcher.patterns)
+
+        if "instead_of" in kwargs:
+            matcher = self._overrides_matchers.get(kwargs["instead_of"])
+            if matcher:
+                if rule_ids:
+                    # If both params are used then narrow down the rules.
+                    rule_ids = {
+                        k: v for k, v in matcher.patterns.items() if k in rule_ids
+                    }
+                else:
+                    rule_ids.update(matcher.patterns)
 
         rules = [self._rules[id_] for id_ in rule_ids]
 
-        if len(kwargs) == 1 and rules:
+        if rules and kwargs.keys() <= {"to_return", "instead_of"}:
             return rules
 
         # Search other parameters as well
@@ -340,7 +339,7 @@ def search_overrides(self, **kwargs) -> List[ApplyRule]:
         warnings.warn(msg, DeprecationWarning, stacklevel=2)
         return self.search(**kwargs)
 
-    def _match_url_for_po(
+    def _match_url_for_page_object(
         self, url: Union[_Url, str], matcher: Optional[URLMatcher] = None
     ) -> Optional[Type[ItemPage]]:
         """Returns the page object to use based on the URL and URLMatcher."""
@@ -362,7 +361,9 @@ def overrides_for(
         """
         result: Dict[Type[ItemPage], Type[ItemPage]] = {}
         for target, matcher in self._overrides_matchers.items():
-            po = self._match_url_for_po(url, matcher)
+            if target is None:
+                continue
+            po = self._match_url_for_page_object(url, matcher)
             if po:
                 result[target] = po
         return result
@@ -375,8 +376,10 @@ def page_object_for_item(
 
         See example: :ref:`rules-page_object_for_item-example`.
         """
+        if item_cls is None:
+            return None
         matcher = self._item_matchers.get(item_cls)
-        return self._match_url_for_po(url, matcher)
+        return self._match_url_for_page_object(url, matcher)
 
 
 def _walk_module(module: str) -> Iterable:

From 1668e5fb1cb8e7aae762ce54f01ffce1c6713ce4 Mon Sep 17 00:00:00 2001
From: Kevin Lloyd Bernal <kevinoxy@gmail.com>
Date: Fri, 13 Jan 2023 16:07:49 +0800
Subject: [PATCH 13/16] use DefaultDict and remove type ignores

---
 web_poet/rules.py | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/web_poet/rules.py b/web_poet/rules.py
index caa85062..746f5f12 100644
--- a/web_poet/rules.py
+++ b/web_poet/rules.py
@@ -6,7 +6,18 @@
 import warnings
 from collections import defaultdict, deque
 from operator import attrgetter
-from typing import Any, Dict, Iterable, List, Mapping, Optional, Type, TypeVar, Union
+from typing import (
+    Any,
+    DefaultDict,
+    Dict,
+    Iterable,
+    List,
+    Mapping,
+    Optional,
+    Type,
+    TypeVar,
+    Union,
+)
 
 import attrs
 from url_matcher import Patterns, URLMatcher
@@ -117,10 +128,12 @@ class ExampleComProductPage(WebPage[Product]):
 
     def __init__(self, *, rules: Optional[Iterable[ApplyRule]] = None):
         self._rules: Dict[int, ApplyRule] = {}
-        self._overrides_matchers: Dict[
+        self._overrides_matchers: DefaultDict[
             Optional[Type[ItemPage]], URLMatcher
         ] = defaultdict(URLMatcher)
-        self._item_matchers: Dict[Optional[Type], URLMatcher] = defaultdict(URLMatcher)
+        self._item_matchers: DefaultDict[Optional[Type], URLMatcher] = defaultdict(
+            URLMatcher
+        )
 
         # Ensures that URLMatcher is deterministic in returning a rule when
         # matching. As of url_macher==0.2.0, `url_matcher.URLMatcher._sort_domain`
@@ -140,8 +153,7 @@ def __init__(self, *, rules: Optional[Iterable[ApplyRule]] = None):
     def add_rule(self, rule: ApplyRule) -> None:
         """Registers an :class:`web_poet.rules.ApplyRule` instance."""
 
-        # Ignore the type since ``ApplyRule`` could have an empty ``.to_return``
-        matched = self._item_matchers.get(rule.to_return)  # type: ignore[arg-type]
+        matched = self._item_matchers.get(rule.to_return)
         if matched:
             # A common case when a page object subclasses another one with the
             # same URL pattern.

From 16f05080bbe45b7e48369b8bd870c24bbecae779 Mon Sep 17 00:00:00 2001
From: Kevin Lloyd Bernal <kevin@zyte.com>
Date: Fri, 13 Jan 2023 16:09:41 +0800
Subject: [PATCH 14/16] use better variable names inside overrides_for()

Co-authored-by: Mikhail Korobov <kmike84@gmail.com>
---
 web_poet/rules.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/web_poet/rules.py b/web_poet/rules.py
index 746f5f12..c368eb13 100644
--- a/web_poet/rules.py
+++ b/web_poet/rules.py
@@ -372,12 +372,12 @@ def overrides_for(
         See example: :ref:`rules-overrides_for-example`.
         """
         result: Dict[Type[ItemPage], Type[ItemPage]] = {}
-        for target, matcher in self._overrides_matchers.items():
-            if target is None:
+        for replaced_page, matcher in self._overrides_matchers.items():
+            if replaced_page is None:
                 continue
-            po = self._match_url_for_page_object(url, matcher)
-            if po:
-                result[target] = po
+            page = self._match_url_for_page_object(url, matcher)
+            if page:
+                result[replaced_page] = page
         return result
 
     def page_object_for_item(

From 1ab1a43abf2a3dc4c490216ebb6bc301bec6522e Mon Sep 17 00:00:00 2001
From: Kevin Lloyd Bernal <kevinoxy@gmail.com>
Date: Mon, 16 Jan 2023 19:51:58 +0800
Subject: [PATCH 15/16] fix misleading variable name in docs

---
 docs/page-objects/rules.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/page-objects/rules.rst b/docs/page-objects/rules.rst
index c9572143..d521717d 100644
--- a/docs/page-objects/rules.rst
+++ b/docs/page-objects/rules.rst
@@ -436,11 +436,11 @@ item class that you want from a given webpage, you can use
 
     from web_poet import default_registry
 
-    item_cls = default_registry.page_object_for_item(
+    page_cls = default_registry.page_object_for_item(
         "http://books.toscrape.com/catalogue/sapiens-a-brief-history-of-humankind_996/index.html",
         Book
     )
-    print(item_cls)  # BookPage
+    print(page_cls)  # BookPage
 
 
 Using rules from External Packages

From e86bf379f8ca1ddd9c6479499a1988e1b47b794a Mon Sep 17 00:00:00 2001
From: Kevin Lloyd Bernal <kevinoxy@gmail.com>
Date: Mon, 16 Jan 2023 20:50:42 +0800
Subject: [PATCH 16/16] =?UTF-8?q?rename=20page=5Fobject=5Ffor=5Fitem()=20?=
 =?UTF-8?q?=E2=86=92=20page=5Fcls=5Ffor=5Fitem()?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CHANGELOG.rst               | 2 +-
 docs/page-objects/rules.rst | 6 +++---
 tests/test_rules.py         | 4 ++--
 web_poet/rules.py           | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index b343f8ae..e4a134e7 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -9,7 +9,7 @@ TBR
 
     * :meth:`web_poet.rules.RulesRegistry.add_rule`
     * :meth:`web_poet.rules.RulesRegistry.overrides_for`
-    * :meth:`web_poet.rules.RulesRegistry.page_object_for_item`
+    * :meth:`web_poet.rules.RulesRegistry.page_cls_for_item`
 
 * Improved the performance of :meth:`web_poet.rules.RulesRegistry.search` where
   passing a single parameter of either ``instead_of`` or ``to_return`` results
diff --git a/docs/page-objects/rules.rst b/docs/page-objects/rules.rst
index d521717d..c5597675 100644
--- a/docs/page-objects/rules.rst
+++ b/docs/page-objects/rules.rst
@@ -423,20 +423,20 @@ by passing the webpage URL. For example:
 It returns a :class:`Mapping` where the *key* represents the page object class
 that is overridden or replaced by the page object class in the *value*.
 
-.. _rules-page_object_for_item-example:
+.. _rules-page_cls_for_item-example:
 
 Identify the page object that could create the item
 """""""""""""""""""""""""""""""""""""""""""""""""""
 
 Suppose you want to retrieve the page object class that is able to create the
 item class that you want from a given webpage, you can use
-:meth:`~.RulesRegistry.page_object_for_item`. For example:
+:meth:`~.RulesRegistry.page_cls_for_item`. For example:
 
 .. code-block:: python
 
     from web_poet import default_registry
 
-    page_cls = default_registry.page_object_for_item(
+    page_cls = default_registry.page_cls_for_item(
         "http://books.toscrape.com/catalogue/sapiens-a-brief-history-of-humankind_996/index.html",
         Book
     )
diff --git a/tests/test_rules.py b/tests/test_rules.py
index 0e6ca96f..88faa457 100644
--- a/tests/test_rules.py
+++ b/tests/test_rules.py
@@ -401,12 +401,12 @@ def test_overrides_for() -> None:
         }
 
 
-def test_page_object_for_item() -> None:
+def test_page_cls_for_item() -> None:
     # This is not associated with any rule.
     class FakeItem:
         pass
 
-    method = default_registry.page_object_for_item
+    method = default_registry.page_cls_for_item
 
     for cls in [str, RequestUrl, ResponseUrl]:
         url = cls("https://example.com")
diff --git a/web_poet/rules.py b/web_poet/rules.py
index c368eb13..232f537c 100644
--- a/web_poet/rules.py
+++ b/web_poet/rules.py
@@ -380,13 +380,13 @@ def overrides_for(
                 result[replaced_page] = page
         return result
 
-    def page_object_for_item(
+    def page_cls_for_item(
         self, url: Union[_Url, str], item_cls: Type
     ) -> Optional[Type]:
         """Return the page object class associated with the given URL that's able
         to produce the given ``item_cls``.
 
-        See example: :ref:`rules-page_object_for_item-example`.
+        See example: :ref:`rules-page_cls_for_item-example`.
         """
         if item_cls is None:
             return None