Skip to content

Commit

Permalink
Merge pull request #5 from erichutchins/versionbump
Browse files Browse the repository at this point in the history
Version bump
  • Loading branch information
erichutchins authored Aug 16, 2024
2 parents 789b4ed + d5f358c commit 3834972
Show file tree
Hide file tree
Showing 11 changed files with 244 additions and 349 deletions.
353 changes: 171 additions & 182 deletions Cargo.lock

Large diffs are not rendered by default.

13 changes: 4 additions & 9 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "polars-iptools"
version = "0.1.5"
version = "0.1.6"
edition = "2021"
license = "MIT"
repository = "https://github.com/erichutchins/polars_iptools"
Expand All @@ -11,20 +11,15 @@ name = "polars_iptools"
crate-type = ["cdylib"]

[dependencies]
pyo3 = { version = "0.21.2", features = ["extension-module", "abi3-py38"] }
pyo3-polars = { version = "0.15.0", features = ["derive"] }
pyo3 = { version = "0.22.2", features = ["extension-module", "abi3-py38"] }
pyo3-polars = { version = "0.16.1", features = ["derive", "dtype-struct"] }
serde = { version = "1", features = ["derive"] }
polars = { version = "0.41.3", features = [
"dtype-struct",
], default-features = false }
polars = { version = "0.42.0", default-features = false }
maxminddb = { version = "0.24.0", features = ["mmap"] }
lazy_static = "1.5.0"
iptrie = "0.8.5"
ipnet = "2.9.0"

[target.'cfg(target_os = "linux")'.dependencies]
jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }

[profile.release]
codegen-units = 1
strip = "symbols"
Expand Down
2 changes: 1 addition & 1 deletion polars_iptools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
import polars_iptools.geoip as geoip # noqa: F401
from polars_iptools._internal import __version__ # noqa: F401
from polars_iptools._internal import __version__ as __version__
from polars_iptools.iptools import * # noqa: F403
1 change: 1 addition & 0 deletions polars_iptools/_internal.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__: str
27 changes: 11 additions & 16 deletions polars_iptools/geoip.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,22 @@
from __future__ import annotations

from pathlib import Path
from typing import TYPE_CHECKING

import polars as pl
from polars.plugins import register_plugin_function

if TYPE_CHECKING:
from polars.type_aliases import IntoExpr
from polars_iptools.typing import IntoExpr

LIB = Path(__file__).parent

from polars_iptools.utils import (
get_shared_lib_location,
parse_into_expr,
register_plugin,
)

__all__ = [
"asn",
"full",
]

lib = get_shared_lib_location()


def asn(expr: IntoExpr, reload_mmdb: bool = False) -> pl.Expr:
"""
Expand Down Expand Up @@ -61,15 +58,14 @@ def asn(expr: IntoExpr, reload_mmdb: bool = False) -> pl.Expr:
-----
- Invalid IP address strings or IPs not found in the database will result in an empty string output.
"""
expr = parse_into_expr(expr)
return register_plugin(
return register_plugin_function(
args=[expr],
symbol="pl_get_asn",
plugin_path=LIB,
function_name="pl_get_asn",
kwargs={
"reload_mmdb": reload_mmdb,
},
is_elementwise=True,
lib=lib,
)


Expand Down Expand Up @@ -131,15 +127,14 @@ def full(expr: IntoExpr, reload_mmdb: bool = False) -> pl.Expr:
-----
- IP addresses that are invalid or not found in the database will result in `null` values in the respective fields.
"""
expr = parse_into_expr(expr)
return register_plugin(
return register_plugin_function(
args=[expr],
symbol="pl_full_geoip",
plugin_path=LIB,
function_name="pl_full_geoip",
kwargs={
"reload_mmdb": reload_mmdb,
},
is_elementwise=True,
lib=lib,
)


Expand Down
60 changes: 31 additions & 29 deletions polars_iptools/iptools.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
from __future__ import annotations

from collections.abc import Iterable
from pathlib import Path
from typing import TYPE_CHECKING, Union

import polars as pl
from polars.plugins import register_plugin_function

if TYPE_CHECKING:
from polars.type_aliases import IntoExpr
from polars_iptools.typing import IntoExpr

from polars_iptools.utils import (
get_shared_lib_location,
parse_into_expr,
register_plugin,
)
LIB = Path(__file__).parent

__all__ = [
"is_valid",
Expand All @@ -23,8 +21,6 @@
"extract_all_ips",
]

lib = get_shared_lib_location()


# from https://github.com/erichutchins/geoipsed which also uses rust regex crate
IPV4_PATT = (
Expand All @@ -39,12 +35,11 @@ def is_valid(expr: IntoExpr) -> pl.Expr:
"""
Returns a boolean if string is a valid IPv4 or IPv6 address
"""
expr = parse_into_expr(expr)
return register_plugin(
return register_plugin_function(
args=[expr],
symbol="pl_is_valid",
plugin_path=LIB,
function_name="pl_is_valid",
is_elementwise=True,
lib=lib,
)


Expand All @@ -53,40 +48,44 @@ def is_private(expr: IntoExpr) -> pl.Expr:
Returns a boolean if string is an IETF RFC 1918 IPv4 address
If input is a IPv6 or an invalid IP, this returns False
"""
expr = parse_into_expr(expr)
return register_plugin(
return register_plugin_function(
args=[expr],
symbol="pl_is_private",
plugin_path=LIB,
function_name="pl_is_private",
is_elementwise=True,
lib=lib,
)


def ipv4_to_numeric(expr: IntoExpr) -> pl.Expr:
"""
Returns numeric representation (u32) of IPv4 address string
"""
expr = parse_into_expr(expr)
return register_plugin(
return register_plugin_function(
args=[expr],
symbol="pl_ipv4_to_numeric",
plugin_path=LIB,
function_name="pl_ipv4_to_numeric",
is_elementwise=True,
lib=lib,
)


def numeric_to_ipv4(expr: IntoExpr) -> pl.Expr:
"""
Returns IPv4 address string from its numeric representation
"""
expr = parse_into_expr(expr)
# Convert to a polars expression if not already one
if isinstance(expr, str):
expr = pl.col(expr)
elif isinstance(expr, pl.Series):
expr = pl.lit(expr)

# cast to UInt32 and leave any errors as nulls
expr = expr.cast(pl.UInt32, strict=False)
return register_plugin(

return register_plugin_function(
args=[expr],
symbol="pl_numeric_to_ipv4",
plugin_path=LIB,
function_name="pl_numeric_to_ipv4",
is_elementwise=True,
lib=lib,
)


Expand Down Expand Up @@ -136,8 +135,12 @@ def extract_all_ips(expr: IntoExpr, ipv6: bool = False) -> pl.Expr:
Expr
Expression of data type `List(String)`.
"""
# Convert to a polars expression if not already one
if isinstance(expr, str):
expr = pl.col(expr)
elif isinstance(expr, pl.Series):
expr = pl.lit(expr)

expr = parse_into_expr(expr)
if ipv6:
return expr.str.extract_all(ALL_IP_PATT)
else:
Expand Down Expand Up @@ -183,12 +186,11 @@ def is_in(expr: IntoExpr, networks: Union[pl.Expr, Iterable[str]]) -> pl.Expr:

nets = nets.unique().drop_nulls()

expr = parse_into_expr(expr)
return register_plugin(
return register_plugin_function(
args=[expr, nets],
symbol="pl_is_in",
plugin_path=LIB,
function_name="pl_is_in",
is_elementwise=True,
lib=lib,
)


Expand Down
15 changes: 15 additions & 0 deletions polars_iptools/typing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from typing import TYPE_CHECKING, Union

if TYPE_CHECKING:
import sys

import polars as pl

if sys.version_info >= (3, 10):
from typing import TypeAlias
else:
from typing_extensions import TypeAlias
from polars.datatypes import DataType, DataTypeClass

IntoExpr: TypeAlias = Union[pl.Expr, str, pl.Series]
PolarsDataType: TypeAlias = Union[DataType, DataTypeClass]
98 changes: 0 additions & 98 deletions polars_iptools/utils.py

This file was deleted.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[build-system]
requires = ["maturin>=1.0,<2.0", "polars>=0.20.6"]
requires = ["maturin>=1.0,<2.0", "polars>=1.3.0"]
build-backend = "maturin"

[project]
Expand Down
8 changes: 4 additions & 4 deletions src/geoip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ fn pl_full_geoip(inputs: &[Series], kwargs: GeoIPKwargs) -> PolarsResult<Series>
let longitude_series = longitude_builder.finish().into_series();
let timezone_series = timezone_builder.finish().into_series();

let out = StructChunked::new(
StructChunked::from_series(
"geoip",
&[
asnnum_series,
Expand All @@ -198,8 +198,8 @@ fn pl_full_geoip(inputs: &[Series], kwargs: GeoIPKwargs) -> PolarsResult<Series>
longitude_series,
timezone_series,
],
)?;
Ok(out.into_series())
)
.map(|ca| ca.into_series())
}

// Get ASN and org name for Internet routed IP addresses
Expand All @@ -222,7 +222,7 @@ fn pl_get_asn(inputs: &[Series], kwargs: GeoIPKwargs) -> PolarsResult<Series> {

let ca: &StringChunked = inputs[0].str()?;

let out: StringChunked = ca.apply_to_buffer(|value: &str, output: &mut String| {
let out: StringChunked = ca.apply_into_string_amortized(|value: &str, output: &mut String| {
if let Ok(ip) = value.parse::<IpAddr>() {
// only emit ASN information if we have a) a valid IP and b) it exists
// in the asn mmdb. if it's a valid ip but not in the mmdb (e.g. private IPs),
Expand Down
Loading

0 comments on commit 3834972

Please sign in to comment.