Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

YDB-2321 Add subnet operations in IP UDF #981

Merged
merged 15 commits into from
Jan 25, 2024
19 changes: 19 additions & 0 deletions ydb/docs/en/core/yql/reference/yql-core/udf/list/ip.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,17 @@ The `Ip` module supports both the IPv4 and IPv6 addresses. By default, they are
**List of functions**

* ```Ip::FromString(String{Flags:AutoMap}) -> String?``` - From a human-readable representation to a binary representation.
* ```Ip::SubnetFromString(String{Flags:AutoMap}) -> String?``` - From a human-readable representation of subnet to a binary representation.
* ```Ip::ToString(String{Flags:AutoMap}) -> String?``` - From a binary representation to a human-readable representation.
* ```Ip::SubnetToString(String{Flags:AutoMap}) -> String?``` - From a binary representation of subnet to a human-readable representation.
* ```Ip::IsIPv4(String?) -> Bool```
* ```Ip::IsIPv6(String?) -> Bool```
* ```Ip::IsEmbeddedIPv4(String?) -> Bool```
* ```Ip::ConvertToIPv6(String{Flags:AutoMap}) -> String```: IPv6 remains unchanged, and IPv4 becomes embedded in IPv6
* ```Ip::GetSubnet(String{Flags:AutoMap}, [Uint8?]) -> String```: The second argument is the subnet size, by default it's 24 for IPv4 and 64 for IPv6
* ```Ip::GetSubnetByMask(String{Flags:AutoMap}, String{Flags:AutoMap}) -> String```: The first argument is the base address, the second argument is the bit mask of a desired subnet.
* ```Ip::SubnetMatch(String{Flags:AutoMap}, String{Flags:AutoMap}) -> Bool```: The first argument is a subnet, the second argument is a subnet or an address.


**Examples**
avevad marked this conversation as resolved.
Show resolved Hide resolved

Expand All @@ -25,5 +30,19 @@ SELECT
Ip::FromString("213.180.193.3")
)
); -- "213.180.193.0"

SELECT
Ip::SubnetMatch(
Ip::SubnetFromString("192.168.0.1/16"),
Ip::FromString("192.168.1.14"),
); -- true

SELECT
Ip::ToString(
Ip::GetSubnetByMask(
Ip::FromString("192.168.0.1"),
Ip::FromString("255.255.0.0")
)
); -- "192.168.0.0"
```

18 changes: 18 additions & 0 deletions ydb/docs/ru/core/yql/reference/yql-core/udf/list/ip.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,16 @@
**Список функций**

* ```Ip::FromString(String{Flags:AutoMap}) -> String?``` - из человекочитаемого представления в бинарное
* ```Ip::SubnetFromString(String{Flags:AutoMap}) -> String?``` - из человекочитаемого представления подсети в бинарное
* ```Ip::ToString(String{Flags:AutoMap}) -> String?``` - из бинарного представления в человекочитаемое
* ```Ip::ToString(String{Flags:AutoMap}) -> String?``` - из бинарного представления подсети в человекочитаемое
* ```Ip::IsIPv4(String?) -> Bool```
* ```Ip::IsIPv6(String?) -> Bool```
* ```Ip::IsEmbeddedIPv4(String?) -> Bool```
* ```Ip::ConvertToIPv6(String{Flags:AutoMap}) -> String``` - IPv6 остается без изменений, а IPv4 становится embedded в IPv6
* ```Ip::GetSubnet(String{Flags:AutoMap}, [Uint8?]) -> String``` - во втором аргументе размер подсети, по умолчанию 24 для IPv4 и 64 для IPv6
* ```Ip::GetSubnetByMask(String{Flags:AutoMap}, String{Flags:AutoMap}) -> String``` - во втором аргументе битовая маска подсети
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
* ```Ip::GetSubnetByMask(String{Flags:AutoMap}, String{Flags:AutoMap}) -> String``` - во втором аргументе битовая маска подсети
* ```Ip::GetSubnetByMask(String{Flags:AutoMap}, String{Flags:AutoMap}) -> String``` - в первом аргументе базовый адрес, а во втором битовая маска подсети

* ```Ip::SubnetMatch(String{Flags:AutoMap}, String{Flags:AutoMap}) -> Bool``` - в первом аргументе подсеть, во втором аргументе подсеть или адрес

**Примеры**

Expand All @@ -24,4 +28,18 @@ SELECT
Ip::FromString("213.180.193.3")
)
); -- "213.180.193.0"

SELECT
Ip::SubnetMatch(
Ip::SubnetFromString("192.168.0.1/16"),
Ip::FromString("192.168.1.14"),
); -- true

SELECT
Ip::ToString(
Ip::GetSubnetByMask(
Ip::FromString("192.168.0.1"),
Ip::FromString("255.255.0.0")
)
); -- "192.168.0.0"
```
247 changes: 217 additions & 30 deletions ydb/library/yql/udfs/common/ip_base/lib/ip_base_udf.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,67 +14,214 @@ namespace {
using TUnboxedValue = NKikimr::NUdf::TUnboxedValue;
using TUnboxedValuePod = NKikimr::NUdf::TUnboxedValuePod;

ui8 GetAddressRangePrefix(const TIpAddressRange& range) {
if (range.Contains(TIpv6Address(ui128(0), TIpv6Address::Ipv6)) && range.Contains(TIpv6Address(ui128(-1), TIpv6Address::Ipv6))) {
return 0;
}
if (range.Size() == 0) {
return range.Type() == TIpv6Address::Ipv4 ? 32 : 128;
}
ui128 size = range.Size();
size_t sizeLog = MostSignificantBit(size);
return ui8((range.Type() == TIpv6Address::Ipv4 ? 32 : 128) - sizeLog);
}

struct TRawIp4 {
ui8 a, b, c, d;

static TRawIp4 FromIpAddress(const TIpv6Address& addr) {
ui128 x = addr;
return {
ui8(x >> 24 & 0xff),
ui8(x >> 16 & 0xff),
ui8(x >> 8 & 0xff),
ui8(x & 0xff)
};
}

static TRawIp4 MaskFromPrefix(ui8 prefix) {
ui128 x = ui128(-1) << int(32 - prefix);
x &= ui128(ui32(-1));
return FromIpAddress({x, TIpv6Address::Ipv4});
}

TIpv6Address ToIpAddress() const {
return {a, b, c, d};
}

std::pair<TRawIp4, TRawIp4> ApplyMask(const TRawIp4& mask) const {
return {{
ui8(a & mask.a),
ui8(b & mask.b),
ui8(c & mask.c),
ui8(d & mask.d)
},{
ui8(a | ~mask.a),
ui8(b | ~mask.b),
ui8(c | ~mask.c),
ui8(d | ~mask.d)
}};
}
};

struct TRawIp4Subnet {
TRawIp4 base, mask;

static TRawIp4Subnet FromIpRange(const TIpAddressRange& range) {
return {TRawIp4::FromIpAddress(*range.Begin()), TRawIp4::MaskFromPrefix(GetAddressRangePrefix(range))};
}

TIpAddressRange ToIpRange() const {
auto range = base.ApplyMask(mask);
return {range.first.ToIpAddress(), range.second.ToIpAddress()};
}
};

struct TRawIp6 {
ui8 a1, a0, b1, b0, c1, c0, d1, d0, e1, e0, f1, f0, g1, g0, h1, h0;

static TRawIp6 FromIpAddress(const TIpv6Address& addr) {
ui128 x = addr;
return {
ui8(x >> 120 & 0xff), ui8(x >> 112 & 0xff),
ui8(x >> 104 & 0xff), ui8(x >> 96 & 0xff),
ui8(x >> 88 & 0xff), ui8(x >> 80 & 0xff),
ui8(x >> 72 & 0xff), ui8(x >> 64 & 0xff),
ui8(x >> 56 & 0xff), ui8(x >> 48 & 0xff),
ui8(x >> 40 & 0xff), ui8(x >> 32 & 0xff),
ui8(x >> 24 & 0xff), ui8(x >> 16 & 0xff),
ui8(x >> 8 & 0xff), ui8(x & 0xff)
};
}

static TRawIp6 MaskFromPrefix(ui8 prefix) {
ui128 x = ui128(-1) << int(128 - prefix);
if (prefix == 0) x = 0;
return FromIpAddress({x, TIpv6Address::Ipv6});
}

TIpv6Address ToIpAddress() const {
return {ui16(ui32(a1) << ui32(8) | ui32(a0)),
ui16(ui32(b1) << ui32(8) | ui32(b0)),
ui16(ui32(c1) << ui32(8) | ui32(c0)),
ui16(ui32(d1) << ui32(8) | ui32(d0)),
ui16(ui32(e1) << ui32(8) | ui32(e0)),
ui16(ui32(f1) << ui32(8) | ui32(f0)),
ui16(ui32(g1) << ui32(8) | ui32(g0)),
ui16(ui32(h1) << ui32(8) | ui32(h0)),
};
}

std::pair<TRawIp6, TRawIp6> ApplyMask(const TRawIp6& mask) const {
return { {
ui8(a1 & mask.a1),
ui8(a0 & mask.a0),
ui8(b1 & mask.b1),
ui8(b0 & mask.b0),
ui8(c1 & mask.c1),
ui8(c0 & mask.c0),
ui8(d1 & mask.d1),
ui8(d0 & mask.d0),
ui8(e1 & mask.e1),
ui8(e0 & mask.e0),
ui8(f1 & mask.f1),
ui8(f0 & mask.f0),
ui8(g1 & mask.g1),
ui8(g0 & mask.g0),
ui8(h1 & mask.h1),
ui8(h0 & mask.h0)
}, {
ui8(a1 | ~mask.a1),
ui8(a0 | ~mask.a0),
ui8(b1 | ~mask.b1),
ui8(b0 | ~mask.b0),
ui8(c1 | ~mask.c1),
ui8(c0 | ~mask.c0),
ui8(d1 | ~mask.d1),
ui8(d0 | ~mask.d0),
ui8(e1 | ~mask.e1),
ui8(e0 | ~mask.e0),
ui8(f1 | ~mask.f1),
ui8(f0 | ~mask.f0),
ui8(g1 | ~mask.g1),
ui8(g0 | ~mask.g0),
ui8(h1 | ~mask.h1),
ui8(h0 | ~mask.h0)
}};
}
};

struct TRawIp6Subnet {
TRawIp6 base, mask;

static TRawIp6Subnet FromIpRange(const TIpAddressRange& range) {
return {TRawIp6::FromIpAddress(*range.Begin()), TRawIp6::MaskFromPrefix(GetAddressRangePrefix(range))};
}

TIpAddressRange ToIpRange() const {
auto range = base.ApplyMask(mask);
return {range.first.ToIpAddress(), range.second.ToIpAddress()};
}
};

TIpv6Address DeserializeAddress(const TStringRef& str) {
TIpv6Address addr;
if (str.Size() == 4) {
TRawIp4 addr4;
memcpy(&addr4, str.Data(), sizeof addr4);
addr = {addr4.a, addr4.b, addr4.c, addr4.d};
addr = addr4.ToIpAddress();
} else if (str.Size() == 16) {
TRawIp6 addr6;
memcpy(&addr6, str.Data(), sizeof addr6);
addr = {ui16(ui32(addr6.a1) << ui32(8) | ui32(addr6.a0)),
ui16(ui32(addr6.b1) << ui32(8) | ui32(addr6.b0)),
ui16(ui32(addr6.c1) << ui32(8) | ui32(addr6.c0)),
ui16(ui32(addr6.d1) << ui32(8) | ui32(addr6.d0)),
ui16(ui32(addr6.e1) << ui32(8) | ui32(addr6.e0)),
ui16(ui32(addr6.f1) << ui32(8) | ui32(addr6.f0)),
ui16(ui32(addr6.g1) << ui32(8) | ui32(addr6.g0)),
ui16(ui32(addr6.h1) << ui32(8) | ui32(addr6.h0)),
};
addr = addr6.ToIpAddress();
} else {
ythrow yexception() << "Incorrect size of input, expected "
<< "4 or 16, got " << str.Size();
}
return addr;
}

TIpAddressRange DeserializeSubnet(const TStringRef& str) {
TIpAddressRange range;
if (str.Size() == sizeof(TRawIp4Subnet)) {
TRawIp4Subnet subnet4;
memcpy(&subnet4, str.Data(), sizeof subnet4);
range = subnet4.ToIpRange();
} else if (str.Size() == sizeof(TRawIp6Subnet)) {
TRawIp6Subnet subnet6;
memcpy(&subnet6, str.Data(), sizeof subnet6);
range = subnet6.ToIpRange();
} else {
ythrow yexception() << "Invalid binary representation";
}
return range;
}

TString SerializeAddress(const TIpv6Address& addr) {
Y_ENSURE(addr.Type() == TIpv6Address::Ipv4 || addr.Type() == TIpv6Address::Ipv6);
TString res;
ui128 x = addr;
if (addr.Type() == TIpv6Address::Ipv4) {
TRawIp4 addr4 {
ui8(x >> 24 & 0xff),
ui8(x >> 16 & 0xff),
ui8(x >> 8 & 0xff),
ui8(x & 0xff)
};
auto addr4 = TRawIp4::FromIpAddress(addr);
res = TString(reinterpret_cast<const char *>(&addr4), sizeof addr4);
} else if (addr.Type() == TIpv6Address::Ipv6) {
TRawIp6 addr6 {
ui8(x >> 120 & 0xff), ui8(x >> 112 & 0xff),
ui8(x >> 104 & 0xff), ui8(x >> 96 & 0xff),
ui8(x >> 88 & 0xff), ui8(x >> 80 & 0xff),
ui8(x >> 72 & 0xff), ui8(x >> 64 & 0xff),
ui8(x >> 56 & 0xff), ui8(x >> 48 & 0xff),
ui8(x >> 40 & 0xff), ui8(x >> 32 & 0xff),
ui8(x >> 24 & 0xff), ui8(x >> 16 & 0xff),
ui8(x >> 8 & 0xff), ui8(x & 0xff)
};
auto addr6 = TRawIp6::FromIpAddress(addr);
res = TString(reinterpret_cast<const char *>(&addr6), sizeof addr6);
}
return res;
}

TString SerializeSubnet(const TIpAddressRange& range) {
TString res;
if (range.Type() == TIpv6Address::Ipv4) {
auto subnet4 = TRawIp4Subnet::FromIpRange(range);
res = TString(reinterpret_cast<const char *>(&subnet4), sizeof subnet4);
} else if (range.Type() == TIpv6Address::Ipv6) {
auto subnet6 = TRawIp6Subnet::FromIpRange(range);
res = TString(reinterpret_cast<const char *>(&subnet6), sizeof subnet6);
}
return res;
}

SIMPLE_STRICT_UDF(TFromString, TOptionalString(TAutoMapString)) {
TIpv6Address addr = TIpv6Address::FromString(args[0].AsStringRef());
if (addr.Type() != TIpv6Address::Ipv4 && addr.Type() != TIpv6Address::Ipv6) {
Expand All @@ -83,10 +230,37 @@ namespace {
return valueBuilder->NewString(SerializeAddress(addr));
}

SIMPLE_STRICT_UDF(TSubnetFromString, TOptionalString(TAutoMapString)) {
TIpAddressRange range = TIpAddressRange::FromCompactString(args[0].AsStringRef());
auto res = SerializeSubnet(range);
return res ? valueBuilder->NewString(res) : TUnboxedValue(TUnboxedValuePod());
}

SIMPLE_UDF(TToString, char*(TAutoMapString)) {
return valueBuilder->NewString(DeserializeAddress(args[0].AsStringRef()).ToString(false));
}

SIMPLE_UDF(TSubnetToString, char*(TAutoMapString)) {
TStringBuilder result;
auto range = DeserializeSubnet(args[0].AsStringRef());
result << (*range.Begin()).ToString(false);
result << '/';
result << ToString(GetAddressRangePrefix(range));
return valueBuilder->NewString(result);
}

SIMPLE_UDF(TSubnetMatch, bool(TAutoMapString, TAutoMapString)) {
Y_UNUSED(valueBuilder);
auto range1 = DeserializeSubnet(args[0].AsStringRef());
if (args[1].AsStringRef().Size() == sizeof(TRawIp4) || args[1].AsStringRef().Size() == sizeof(TRawIp6)) {
auto addr2 = DeserializeAddress(args[1].AsStringRef());
return TUnboxedValuePod(range1.Contains(addr2));
} else { // second argument is a whole subnet, not a single address
auto range2 = DeserializeSubnet(args[1].AsStringRef());
return TUnboxedValuePod(range1.Contains(range2));
}
}

SIMPLE_STRICT_UDF(TIsIPv4, bool(TOptionalString)) {
Y_UNUSED(valueBuilder);
bool result = false;
Expand Down Expand Up @@ -159,14 +333,27 @@ namespace {
return valueBuilder->NewString(SerializeAddress(beg));
}

SIMPLE_UDF(TGetSubnetByMask, char*(TAutoMapString, TAutoMapString)) {
const auto refBase = args[0].AsStringRef();
const auto refMask = args[1].AsStringRef();
TIpv6Address addrBase = DeserializeAddress(refBase);
TIpv6Address addrMask = DeserializeAddress(refMask);
if (addrBase.Type() != addrMask.Type()) {
ythrow yexception() << "Base and mask differ in length";
}
return valueBuilder->NewString(SerializeAddress(TIpv6Address(ui128(addrBase) & ui128(addrMask), addrBase.Type())));
}

#define EXPORTED_IP_BASE_UDF \
TFromString, \
TSubnetFromString, \
TToString, \
TSubnetToString, \
TIsIPv4, \
TIsIPv6, \
TIsEmbeddedIPv4, \
TConvertToIPv6, \
TGetSubnet
TGetSubnet, \
TSubnetMatch, \
TGetSubnetByMask
}


Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,10 @@
{
"uri": "file://test.test_Basic_/results.txt"
}
],
"test.test[Subnets]": [
{
"uri": "file://test.test_Subnets_/results.txt"
}
]
}
Loading
Loading