Skip to content
Permalink

Comparing changes

This is a direct comparison between two commits made in this repository or its related repositories. View the default comparison for this range or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: pymedusa/Medusa
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 5651bb1423f8368a76208fd31c6f110f96232e03
Choose a base ref
..
head repository: pymedusa/Medusa
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: c8a15b3b2c570deb3ecb029548f156a44048789c
Choose a head ref
Showing with 1,602 additions and 715 deletions.
  1. +43 −16 ext/bs4/__init__.py
  2. +24 −1 ext/bs4/builder/_htmlparser.py
  3. +280 −0 ext/bs4/css.py
  4. +0 −15 ext/bs4/diagnose.py
  5. +298 −178 ext/bs4/element.py
  6. +1 −1 ext/bs4/formatter.py
  7. +8 −34 ext/bs4/tests/__init__.py
  8. +1 −0 ext/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-4818336571064320.testcase
  9. +1 −0 ext/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-4999465949331456.testcase
  10. BIN ext/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5167584867909632.testcase
  11. +2 −0 ext/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5703933063462912.testcase
  12. +1 −0 ext/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5843991618256896.testcase
  13. BIN ext/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-5984173902397440.testcase
  14. +1 −0 ext/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-6124268085182464.testcase
  15. +1 −0 ext/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-6241471367348224.testcase
  16. BIN ext/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-6450958476902400.testcase
  17. BIN ext/bs4/tests/fuzz/clusterfuzz-testcase-minimized-bs4_fuzzer-6600557255327744.testcase
  18. BIN ext/bs4/tests/fuzz/crash-0d306a50c8ed8bcd0785b67000fcd5dea1d33f08.testcase
  19. +487 −0 ext/bs4/tests/test_css.py
  20. +10 −10 ext/bs4/tests/test_formatter.py
  21. +91 −0 ext/bs4/tests/test_fuzz.py
  22. +24 −0 ext/bs4/tests/test_htmlparser.py
  23. +7 −5 ext/bs4/tests/test_lxml.py
  24. +51 −429 ext/bs4/tests/test_pageelement.py
  25. +42 −0 ext/bs4/tests/test_soup.py
  26. +4 −3 ext/cloudscraper/__init__.py
  27. +1 −1 ext/dogpile/__init__.py
  28. +9 −0 ext/dogpile/cache/api.py
  29. +20 −4 ext/dogpile/cache/region.py
  30. 0 ext/dogpile/py.typed
  31. +4 −4 ext/readme.md
  32. +1 −1 ext/urllib3/_version.py
  33. +30 −8 ext/urllib3/connectionpool.py
  34. +155 −0 ext/urllib3/packages/backports/weakref_finalize.py
  35. +1 −1 ext/urllib3/poolmanager.py
  36. +4 −4 requirements.txt
59 changes: 43 additions & 16 deletions ext/bs4/__init__.py
Original file line number Diff line number Diff line change
@@ -15,7 +15,7 @@
"""

__author__ = "Leonard Richardson (leonardr@segfault.org)"
__version__ = "4.11.2"
__version__ = "4.12.2"
__copyright__ = "Copyright (c) 2004-2023 Leonard Richardson"
# Use of this source code is governed by the MIT license.
__license__ = "MIT"
@@ -38,11 +38,13 @@
builder_registry,
ParserRejectedMarkup,
XMLParsedAsHTMLWarning,
HTMLParserTreeBuilder
)
from .dammit import UnicodeDammit
from .element import (
CData,
Comment,
CSS,
DEFAULT_OUTPUT_ENCODING,
Declaration,
Doctype,
@@ -116,7 +118,7 @@ class BeautifulSoup(Tag):
ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'

NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n"

def __init__(self, markup="", features=None, builder=None,
parse_only=None, from_encoding=None, exclude_encodings=None,
element_classes=None, **kwargs):
@@ -348,25 +350,49 @@ def deprecated_argument(old_name, new_name):
self.markup = None
self.builder.soup = None

def __copy__(self):
"""Copy a BeautifulSoup object by converting the document to a string and parsing it again."""
copy = type(self)(
self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
)
def _clone(self):
"""Create a new BeautifulSoup object with the same TreeBuilder,
but not associated with any markup.
# Although we encoded the tree to UTF-8, that may not have
# been the encoding of the original markup. Set the copy's
# .original_encoding to reflect the original object's
# .original_encoding.
copy.original_encoding = self.original_encoding
return copy
This is the first step of the deepcopy process.
"""
clone = type(self)("", None, self.builder)

# Keep track of the encoding of the original document,
# since we won't be parsing it again.
clone.original_encoding = self.original_encoding
return clone

def __getstate__(self):
# Frequently a tree builder can't be pickled.
d = dict(self.__dict__)
if 'builder' in d and d['builder'] is not None and not self.builder.picklable:
d['builder'] = None
d['builder'] = type(self.builder)
# Store the contents as a Unicode string.
d['contents'] = []
d['markup'] = self.decode()

# If _most_recent_element is present, it's a Tag object left
# over from initial parse. It might not be picklable and we
# don't need it.
if '_most_recent_element' in d:
del d['_most_recent_element']
return d

def __setstate__(self, state):
# If necessary, restore the TreeBuilder by looking it up.
self.__dict__ = state
if isinstance(self.builder, type):
self.builder = self.builder()
elif not self.builder:
# We don't know which builder was used to build this
# parse tree, so use a default we know is always available.
self.builder = HTMLParserTreeBuilder()
self.builder.soup = self
self.reset()
self._feed()
return state


@classmethod
def _decode_markup(cls, markup):
@@ -468,6 +494,7 @@ def reset(self):
self.open_tag_counter = Counter()
self.preserve_whitespace_tag_stack = []
self.string_container_stack = []
self._most_recent_element = None
self.pushTag(self)

def new_tag(self, name, namespace=None, nsprefix=None, attrs={},
@@ -749,7 +776,7 @@ def handle_data(self, data):

def decode(self, pretty_print=False,
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
formatter="minimal"):
formatter="minimal", iterator=None):
"""Returns a string or Unicode representation of the parse tree
as an HTML or XML document.
@@ -776,7 +803,7 @@ def decode(self, pretty_print=False,
else:
indent_level = 0
return prefix + super(BeautifulSoup, self).decode(
indent_level, eventual_encoding, formatter)
indent_level, eventual_encoding, formatter, iterator)

# Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup'
_s = BeautifulSoup
25 changes: 24 additions & 1 deletion ext/bs4/builder/_htmlparser.py
Original file line number Diff line number Diff line change
@@ -24,6 +24,7 @@

from bs4.builder import (
DetectsXMLParsedAsHTML,
ParserRejectedMarkup,
HTML,
HTMLTreeBuilder,
STRICT,
@@ -70,6 +71,22 @@ def __init__(self, *args, **kwargs):

self._initialize_xml_detector()

def error(self, message):
# NOTE: This method is required so long as Python 3.9 is
# supported. The corresponding code is removed from HTMLParser
# in 3.5, but not removed from ParserBase until 3.10.
# https://github.com/python/cpython/issues/76025
#
# The original implementation turned the error into a warning,
# but in every case I discovered, this made HTMLParser
# immediately crash with an error message that was less
# helpful than the warning. The new implementation makes it
# more clear that html.parser just can't parse this
# markup. The 3.10 implementation does the same, though it
# raises AssertionError rather than calling a method. (We
# catch this error and wrap it in a ParserRejectedMarkup.)
raise ParserRejectedMarkup(message)

def handle_startendtag(self, name, attrs):
"""Handle an incoming empty-element tag.
@@ -359,6 +376,12 @@ def feed(self, markup):
args, kwargs = self.parser_args
parser = BeautifulSoupHTMLParser(*args, **kwargs)
parser.soup = self.soup
parser.feed(markup)
try:
parser.feed(markup)
except AssertionError as e:
# html.parser raises AssertionError in rare cases to
# indicate a fatal problem with the markup, especially
# when there's an error in the doctype declaration.
raise ParserRejectedMarkup(e)
parser.close()
parser.already_closed_empty_element = []
Loading