Skip to content

Commit

Permalink
fix: ipv4 address regex
Browse files Browse the repository at this point in the history
  • Loading branch information
praktiskt committed Dec 4, 2024
1 parent 0fb814d commit 67ebcd5
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
4 changes: 2 additions & 2 deletions test_unstructured/cleaners/test_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from unstructured.cleaners import extract

EMAIL_META_DATA_INPUT = """from ABC.DEF.local ([ba23::58b5:2236:45g2:88h2]) by
\n ABC.DEF.local ([ba23::58b5:2236:45g2:88h2%25]) with mapi id\
\n ABC.DEF.local ([68.183.71.12]) with mapi id\
n 32.88.5467.123; Fri, 26 Mar 2021 11:04:09 +1200"""


Expand Down Expand Up @@ -37,7 +37,7 @@ def test_extract_email_address():
def test_extract_ip_address():
assert extract.extract_ip_address(EMAIL_META_DATA_INPUT) == [
"ba23::58b5:2236:45g2:88h2",
"ba23::58b5:2236:45g2:88h2%25",
"68.183.71.12",
]


Expand Down
4 changes: 2 additions & 2 deletions unstructured/nlp/patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,9 @@
ONE_LINE_BREAK_PARAGRAPH_PATTERN = r"^(?:(?!\.\s*$).)*$"
ONE_LINE_BREAK_PARAGRAPH_PATTERN_RE = re.compile(ONE_LINE_BREAK_PARAGRAPH_PATTERN)

# IP Address examples: ba23::58b5:2236:45g2:88h2 or 10.0.2.01
# IP Address examples: ba23::58b5:2236:45g2:88h2, 10.0.2.01 or 68.183.71.12
IP_ADDRESS_PATTERN = (
r"[0-9]{1,2}\.[0-9]{1,2}\.[0-9]{1,2}\.[0-9]{1,2}",
r"(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)){3}",
"[a-z0-9]{4}::[a-z0-9]{4}:[a-z0-9]{4}:[a-z0-9]{4}:[a-z0-9]{4}%?[0-9]*",
)
IP_ADDRESS_PATTERN_RE = re.compile(f"({'|'.join(IP_ADDRESS_PATTERN)})")
Expand Down

0 comments on commit 67ebcd5

Please sign in to comment.