Skip to content

Commit

Permalink
YDB-2321 Add subnet operations in IP UDF (ydb-platform#981)
Browse files Browse the repository at this point in the history
* Add basic subnet functions in IP UDF

* Minor fixes

* Small fixes

* Post-merge fixes

* Add subnet match

* Add GetSubnetByMask

* Add & canonize tests

* Add RU docs for subnet ops

* Add EN docs for subnet ops

* Revert library changes

* Add usage clarification

* Add usage examples for IP subnet ops

* Add usage examples for IP subnet ops (RU)

* Fix codestyle

* Remove unnecessary string allocation
  • Loading branch information
avevad authored Jan 25, 2024
1 parent 2547396 commit 4eceb69
Show file tree
Hide file tree
Showing 8 changed files with 465 additions and 30 deletions.
19 changes: 19 additions & 0 deletions ydb/docs/en/core/yql/reference/yql-core/udf/list/ip.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,17 @@ The `Ip` module supports both the IPv4 and IPv6 addresses. By default, they are
**List of functions**

* ```Ip::FromString(String{Flags:AutoMap}) -> String?``` - From a human-readable representation to a binary representation.
* ```Ip::SubnetFromString(String{Flags:AutoMap}) -> String?``` - From a human-readable representation of subnet to a binary representation.
* ```Ip::ToString(String{Flags:AutoMap}) -> String?``` - From a binary representation to a human-readable representation.
* ```Ip::SubnetToString(String{Flags:AutoMap}) -> String?``` - From a binary representation of subnet to a human-readable representation.
* ```Ip::IsIPv4(String?) -> Bool```
* ```Ip::IsIPv6(String?) -> Bool```
* ```Ip::IsEmbeddedIPv4(String?) -> Bool```
* ```Ip::ConvertToIPv6(String{Flags:AutoMap}) -> String```: IPv6 remains unchanged, and IPv4 becomes embedded in IPv6
* ```Ip::GetSubnet(String{Flags:AutoMap}, [Uint8?]) -> String```: The second argument is the subnet size, by default it's 24 for IPv4 and 64 for IPv6
* ```Ip::GetSubnetByMask(String{Flags:AutoMap}, String{Flags:AutoMap}) -> String```: The first argument is the base address, the second argument is the bit mask of a desired subnet.
* ```Ip::SubnetMatch(String{Flags:AutoMap}, String{Flags:AutoMap}) -> Bool```: The first argument is a subnet, the second argument is a subnet or an address.


**Examples**

Expand All @@ -25,5 +30,19 @@ SELECT
Ip::FromString("213.180.193.3")
)
); -- "213.180.193.0"

SELECT
Ip::SubnetMatch(
Ip::SubnetFromString("192.168.0.1/16"),
Ip::FromString("192.168.1.14"),
); -- true

SELECT
Ip::ToString(
Ip::GetSubnetByMask(
Ip::FromString("192.168.0.1"),
Ip::FromString("255.255.0.0")
)
); -- "192.168.0.0"
```

18 changes: 18 additions & 0 deletions ydb/docs/ru/core/yql/reference/yql-core/udf/list/ip.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,16 @@
**Список функций**

* ```Ip::FromString(String{Flags:AutoMap}) -> String?``` - из человекочитаемого представления в бинарное
* ```Ip::SubnetFromString(String{Flags:AutoMap}) -> String?``` - из человекочитаемого представления подсети в бинарное
* ```Ip::ToString(String{Flags:AutoMap}) -> String?``` - из бинарного представления в человекочитаемое
* ```Ip::ToString(String{Flags:AutoMap}) -> String?``` - из бинарного представления подсети в человекочитаемое
* ```Ip::IsIPv4(String?) -> Bool```
* ```Ip::IsIPv6(String?) -> Bool```
* ```Ip::IsEmbeddedIPv4(String?) -> Bool```
* ```Ip::ConvertToIPv6(String{Flags:AutoMap}) -> String``` - IPv6 остается без изменений, а IPv4 становится embedded в IPv6
* ```Ip::GetSubnet(String{Flags:AutoMap}, [Uint8?]) -> String``` - во втором аргументе размер подсети, по умолчанию 24 для IPv4 и 64 для IPv6
* ```Ip::GetSubnetByMask(String{Flags:AutoMap}, String{Flags:AutoMap}) -> String``` - во втором аргументе битовая маска подсети
* ```Ip::SubnetMatch(String{Flags:AutoMap}, String{Flags:AutoMap}) -> Bool``` - в первом аргументе подсеть, во втором аргументе подсеть или адрес

**Примеры**

Expand All @@ -24,4 +28,18 @@ SELECT
Ip::FromString("213.180.193.3")
)
); -- "213.180.193.0"

SELECT
Ip::SubnetMatch(
Ip::SubnetFromString("192.168.0.1/16"),
Ip::FromString("192.168.1.14"),
); -- true

SELECT
Ip::ToString(
Ip::GetSubnetByMask(
Ip::FromString("192.168.0.1"),
Ip::FromString("255.255.0.0")
)
); -- "192.168.0.0"
```
247 changes: 217 additions & 30 deletions ydb/library/yql/udfs/common/ip_base/lib/ip_base_udf.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,67 +14,214 @@ namespace {
using TUnboxedValue = NKikimr::NUdf::TUnboxedValue;
using TUnboxedValuePod = NKikimr::NUdf::TUnboxedValuePod;

ui8 GetAddressRangePrefix(const TIpAddressRange& range) {
if (range.Contains(TIpv6Address(ui128(0), TIpv6Address::Ipv6)) && range.Contains(TIpv6Address(ui128(-1), TIpv6Address::Ipv6))) {
return 0;
}
if (range.Size() == 0) {
return range.Type() == TIpv6Address::Ipv4 ? 32 : 128;
}
ui128 size = range.Size();
size_t sizeLog = MostSignificantBit(size);
return ui8((range.Type() == TIpv6Address::Ipv4 ? 32 : 128) - sizeLog);
}

struct TRawIp4 {
ui8 a, b, c, d;

static TRawIp4 FromIpAddress(const TIpv6Address& addr) {
ui128 x = addr;
return {
ui8(x >> 24 & 0xff),
ui8(x >> 16 & 0xff),
ui8(x >> 8 & 0xff),
ui8(x & 0xff)
};
}

static TRawIp4 MaskFromPrefix(ui8 prefix) {
ui128 x = ui128(-1) << int(32 - prefix);
x &= ui128(ui32(-1));
return FromIpAddress({x, TIpv6Address::Ipv4});
}

TIpv6Address ToIpAddress() const {
return {a, b, c, d};
}

std::pair<TRawIp4, TRawIp4> ApplyMask(const TRawIp4& mask) const {
return {{
ui8(a & mask.a),
ui8(b & mask.b),
ui8(c & mask.c),
ui8(d & mask.d)
},{
ui8(a | ~mask.a),
ui8(b | ~mask.b),
ui8(c | ~mask.c),
ui8(d | ~mask.d)
}};
}
};

struct TRawIp4Subnet {
TRawIp4 base, mask;

static TRawIp4Subnet FromIpRange(const TIpAddressRange& range) {
return {TRawIp4::FromIpAddress(*range.Begin()), TRawIp4::MaskFromPrefix(GetAddressRangePrefix(range))};
}

TIpAddressRange ToIpRange() const {
auto range = base.ApplyMask(mask);
return {range.first.ToIpAddress(), range.second.ToIpAddress()};
}
};

struct TRawIp6 {
ui8 a1, a0, b1, b0, c1, c0, d1, d0, e1, e0, f1, f0, g1, g0, h1, h0;

static TRawIp6 FromIpAddress(const TIpv6Address& addr) {
ui128 x = addr;
return {
ui8(x >> 120 & 0xff), ui8(x >> 112 & 0xff),
ui8(x >> 104 & 0xff), ui8(x >> 96 & 0xff),
ui8(x >> 88 & 0xff), ui8(x >> 80 & 0xff),
ui8(x >> 72 & 0xff), ui8(x >> 64 & 0xff),
ui8(x >> 56 & 0xff), ui8(x >> 48 & 0xff),
ui8(x >> 40 & 0xff), ui8(x >> 32 & 0xff),
ui8(x >> 24 & 0xff), ui8(x >> 16 & 0xff),
ui8(x >> 8 & 0xff), ui8(x & 0xff)
};
}

static TRawIp6 MaskFromPrefix(ui8 prefix) {
ui128 x = ui128(-1) << int(128 - prefix);
if (prefix == 0) x = 0;
return FromIpAddress({x, TIpv6Address::Ipv6});
}

TIpv6Address ToIpAddress() const {
return {ui16(ui32(a1) << ui32(8) | ui32(a0)),
ui16(ui32(b1) << ui32(8) | ui32(b0)),
ui16(ui32(c1) << ui32(8) | ui32(c0)),
ui16(ui32(d1) << ui32(8) | ui32(d0)),
ui16(ui32(e1) << ui32(8) | ui32(e0)),
ui16(ui32(f1) << ui32(8) | ui32(f0)),
ui16(ui32(g1) << ui32(8) | ui32(g0)),
ui16(ui32(h1) << ui32(8) | ui32(h0)),
};
}

std::pair<TRawIp6, TRawIp6> ApplyMask(const TRawIp6& mask) const {
return { {
ui8(a1 & mask.a1),
ui8(a0 & mask.a0),
ui8(b1 & mask.b1),
ui8(b0 & mask.b0),
ui8(c1 & mask.c1),
ui8(c0 & mask.c0),
ui8(d1 & mask.d1),
ui8(d0 & mask.d0),
ui8(e1 & mask.e1),
ui8(e0 & mask.e0),
ui8(f1 & mask.f1),
ui8(f0 & mask.f0),
ui8(g1 & mask.g1),
ui8(g0 & mask.g0),
ui8(h1 & mask.h1),
ui8(h0 & mask.h0)
}, {
ui8(a1 | ~mask.a1),
ui8(a0 | ~mask.a0),
ui8(b1 | ~mask.b1),
ui8(b0 | ~mask.b0),
ui8(c1 | ~mask.c1),
ui8(c0 | ~mask.c0),
ui8(d1 | ~mask.d1),
ui8(d0 | ~mask.d0),
ui8(e1 | ~mask.e1),
ui8(e0 | ~mask.e0),
ui8(f1 | ~mask.f1),
ui8(f0 | ~mask.f0),
ui8(g1 | ~mask.g1),
ui8(g0 | ~mask.g0),
ui8(h1 | ~mask.h1),
ui8(h0 | ~mask.h0)
}};
}
};

struct TRawIp6Subnet {
TRawIp6 base, mask;

static TRawIp6Subnet FromIpRange(const TIpAddressRange& range) {
return {TRawIp6::FromIpAddress(*range.Begin()), TRawIp6::MaskFromPrefix(GetAddressRangePrefix(range))};
}

TIpAddressRange ToIpRange() const {
auto range = base.ApplyMask(mask);
return {range.first.ToIpAddress(), range.second.ToIpAddress()};
}
};

TIpv6Address DeserializeAddress(const TStringRef& str) {
TIpv6Address addr;
if (str.Size() == 4) {
TRawIp4 addr4;
memcpy(&addr4, str.Data(), sizeof addr4);
addr = {addr4.a, addr4.b, addr4.c, addr4.d};
addr = addr4.ToIpAddress();
} else if (str.Size() == 16) {
TRawIp6 addr6;
memcpy(&addr6, str.Data(), sizeof addr6);
addr = {ui16(ui32(addr6.a1) << ui32(8) | ui32(addr6.a0)),
ui16(ui32(addr6.b1) << ui32(8) | ui32(addr6.b0)),
ui16(ui32(addr6.c1) << ui32(8) | ui32(addr6.c0)),
ui16(ui32(addr6.d1) << ui32(8) | ui32(addr6.d0)),
ui16(ui32(addr6.e1) << ui32(8) | ui32(addr6.e0)),
ui16(ui32(addr6.f1) << ui32(8) | ui32(addr6.f0)),
ui16(ui32(addr6.g1) << ui32(8) | ui32(addr6.g0)),
ui16(ui32(addr6.h1) << ui32(8) | ui32(addr6.h0)),
};
addr = addr6.ToIpAddress();
} else {
ythrow yexception() << "Incorrect size of input, expected "
<< "4 or 16, got " << str.Size();
}
return addr;
}

TIpAddressRange DeserializeSubnet(const TStringRef& str) {
TIpAddressRange range;
if (str.Size() == sizeof(TRawIp4Subnet)) {
TRawIp4Subnet subnet4;
memcpy(&subnet4, str.Data(), sizeof subnet4);
range = subnet4.ToIpRange();
} else if (str.Size() == sizeof(TRawIp6Subnet)) {
TRawIp6Subnet subnet6;
memcpy(&subnet6, str.Data(), sizeof subnet6);
range = subnet6.ToIpRange();
} else {
ythrow yexception() << "Invalid binary representation";
}
return range;
}

TString SerializeAddress(const TIpv6Address& addr) {
Y_ENSURE(addr.Type() == TIpv6Address::Ipv4 || addr.Type() == TIpv6Address::Ipv6);
TString res;
ui128 x = addr;
if (addr.Type() == TIpv6Address::Ipv4) {
TRawIp4 addr4 {
ui8(x >> 24 & 0xff),
ui8(x >> 16 & 0xff),
ui8(x >> 8 & 0xff),
ui8(x & 0xff)
};
auto addr4 = TRawIp4::FromIpAddress(addr);
res = TString(reinterpret_cast<const char *>(&addr4), sizeof addr4);
} else if (addr.Type() == TIpv6Address::Ipv6) {
TRawIp6 addr6 {
ui8(x >> 120 & 0xff), ui8(x >> 112 & 0xff),
ui8(x >> 104 & 0xff), ui8(x >> 96 & 0xff),
ui8(x >> 88 & 0xff), ui8(x >> 80 & 0xff),
ui8(x >> 72 & 0xff), ui8(x >> 64 & 0xff),
ui8(x >> 56 & 0xff), ui8(x >> 48 & 0xff),
ui8(x >> 40 & 0xff), ui8(x >> 32 & 0xff),
ui8(x >> 24 & 0xff), ui8(x >> 16 & 0xff),
ui8(x >> 8 & 0xff), ui8(x & 0xff)
};
auto addr6 = TRawIp6::FromIpAddress(addr);
res = TString(reinterpret_cast<const char *>(&addr6), sizeof addr6);
}
return res;
}

TString SerializeSubnet(const TIpAddressRange& range) {
TString res;
if (range.Type() == TIpv6Address::Ipv4) {
auto subnet4 = TRawIp4Subnet::FromIpRange(range);
res = TString(reinterpret_cast<const char *>(&subnet4), sizeof subnet4);
} else if (range.Type() == TIpv6Address::Ipv6) {
auto subnet6 = TRawIp6Subnet::FromIpRange(range);
res = TString(reinterpret_cast<const char *>(&subnet6), sizeof subnet6);
}
return res;
}

SIMPLE_STRICT_UDF(TFromString, TOptionalString(TAutoMapString)) {
TIpv6Address addr = TIpv6Address::FromString(args[0].AsStringRef());
if (addr.Type() != TIpv6Address::Ipv4 && addr.Type() != TIpv6Address::Ipv6) {
Expand All @@ -83,10 +230,37 @@ namespace {
return valueBuilder->NewString(SerializeAddress(addr));
}

SIMPLE_STRICT_UDF(TSubnetFromString, TOptionalString(TAutoMapString)) {
TIpAddressRange range = TIpAddressRange::FromCompactString(args[0].AsStringRef());
auto res = SerializeSubnet(range);
return res ? valueBuilder->NewString(res) : TUnboxedValue(TUnboxedValuePod());
}

SIMPLE_UDF(TToString, char*(TAutoMapString)) {
return valueBuilder->NewString(DeserializeAddress(args[0].AsStringRef()).ToString(false));
}

SIMPLE_UDF(TSubnetToString, char*(TAutoMapString)) {
TStringBuilder result;
auto range = DeserializeSubnet(args[0].AsStringRef());
result << (*range.Begin()).ToString(false);
result << '/';
result << ToString(GetAddressRangePrefix(range));
return valueBuilder->NewString(result);
}

SIMPLE_UDF(TSubnetMatch, bool(TAutoMapString, TAutoMapString)) {
Y_UNUSED(valueBuilder);
auto range1 = DeserializeSubnet(args[0].AsStringRef());
if (args[1].AsStringRef().Size() == sizeof(TRawIp4) || args[1].AsStringRef().Size() == sizeof(TRawIp6)) {
auto addr2 = DeserializeAddress(args[1].AsStringRef());
return TUnboxedValuePod(range1.Contains(addr2));
} else { // second argument is a whole subnet, not a single address
auto range2 = DeserializeSubnet(args[1].AsStringRef());
return TUnboxedValuePod(range1.Contains(range2));
}
}

SIMPLE_STRICT_UDF(TIsIPv4, bool(TOptionalString)) {
Y_UNUSED(valueBuilder);
bool result = false;
Expand Down Expand Up @@ -159,14 +333,27 @@ namespace {
return valueBuilder->NewString(SerializeAddress(beg));
}

SIMPLE_UDF(TGetSubnetByMask, char*(TAutoMapString, TAutoMapString)) {
const auto refBase = args[0].AsStringRef();
const auto refMask = args[1].AsStringRef();
TIpv6Address addrBase = DeserializeAddress(refBase);
TIpv6Address addrMask = DeserializeAddress(refMask);
if (addrBase.Type() != addrMask.Type()) {
ythrow yexception() << "Base and mask differ in length";
}
return valueBuilder->NewString(SerializeAddress(TIpv6Address(ui128(addrBase) & ui128(addrMask), addrBase.Type())));
}

#define EXPORTED_IP_BASE_UDF \
TFromString, \
TSubnetFromString, \
TToString, \
TSubnetToString, \
TIsIPv4, \
TIsIPv6, \
TIsEmbeddedIPv4, \
TConvertToIPv6, \
TGetSubnet
TGetSubnet, \
TSubnetMatch, \
TGetSubnetByMask
}


Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,10 @@
{
"uri": "file://test.test_Basic_/results.txt"
}
],
"test.test[Subnets]": [
{
"uri": "file://test.test_Subnets_/results.txt"
}
]
}
Loading

0 comments on commit 4eceb69

Please sign in to comment.