Azure · bambriz · Oct 10, 2023 · Jul 4, 2023 · Jul 13, 2023 · Jul 13, 2023
@@ -3,6 +3,7 @@
 ### 4.5.2 (Unreleased)
 
 #### Features Added
+* Added Support for Subpartitioning in Python SDK. See [PR 31121](https://github.com/Azure/azure-sdk-for-python/pull/31121)
 
 #### Breaking Changes
 

@@ -215,7 +215,12 @@ def GetHeaders(  # pylint: disable=too-many-statements,too-many-branches
             headers[http_constants.HttpHeaders.PartitionKey] = []
         # else serialize using json dumps method which apart from regular values will serialize None into null
         else:
-            headers[http_constants.HttpHeaders.PartitionKey] = json.dumps([options["partitionKey"]])
+            # single partitioning uses a string and needs to be turned into a list
+            if isinstance(options["partitionKey"], list) and options["partitionKey"]:
+                pk_val = json.dumps(options["partitionKey"], separators=(',', ':'))
+            else:
+                pk_val = json.dumps([options["partitionKey"]])
+            headers[http_constants.HttpHeaders.PartitionKey] = pk_val
 
     if options.get("enableCrossPartitionQuery"):
         headers[http_constants.HttpHeaders.EnableCrossPartitionQuery] = options["enableCrossPartitionQuery"]
@@ -224,7 +229,7 @@ def GetHeaders(  # pylint: disable=too-many-statements,too-many-branches
         headers[http_constants.HttpHeaders.PopulateQueryMetrics] = options["populateQueryMetrics"]
 
     if options.get("responseContinuationTokenLimitInKb"):
-        headers[http_constants.HttpHeaders.ResponseContinuationTokenLimitInKb] = options["responseContinuationTokenLimitInKb"] # pylint: disable=line-too-long
+        headers[http_constants.HttpHeaders.ResponseContinuationTokenLimitInKb] = options["responseContinuationTokenLimitInKb"]  # pylint: disable=line-too-long
 
     if cosmos_client_connection.master_key:
         #formatedate guarantees RFC 1123 date format regardless of current locale
@@ -606,56 +611,53 @@ def TrimBeginningAndEndingSlashes(path):
 
 # Parses the paths into a list of token each representing a property
 def ParsePaths(paths):
-    if len(paths) != 1:
-        raise ValueError("Unsupported paths count.")
-
     segmentSeparator = "/"
-    path = paths[0]
     tokens = []
-    currentIndex = 0
-
-    while currentIndex < len(path):
-        if path[currentIndex] != segmentSeparator:
-            raise ValueError("Invalid path character at index " + currentIndex)
-
-        currentIndex += 1
-        if currentIndex == len(path):
-            break
-
-        # " and ' are treated specially in the sense that they can have the / (segment separator)
-        # between them which is considered part of the token
-        if path[currentIndex] == '"' or path[currentIndex] == "'":
-            quote = path[currentIndex]
-            newIndex = currentIndex + 1
-
-            while True:
-                newIndex = path.find(quote, newIndex)
-                if newIndex == -1:
-                    raise ValueError("Invalid path character at index " + currentIndex)
-
-                # check if the quote itself is escaped by a preceding \ in which case it's part of the token
-                if path[newIndex - 1] != "\\":
-                    break
-                newIndex += 1
-
-            # This will extract the token excluding the quote chars
-            token = path[currentIndex + 1: newIndex]
-            tokens.append(token)
-            currentIndex = newIndex + 1
-        else:
-            newIndex = path.find(segmentSeparator, currentIndex)
-            token = None
-            if newIndex == -1:
-                # This will extract the token from currentIndex to end of the string
-                token = path[currentIndex:]
-                currentIndex = len(path)
+    for path in paths:
+        currentIndex = 0
+
+        while currentIndex < len(path):
+            if path[currentIndex] != segmentSeparator:
+                raise ValueError("Invalid path character at index " + currentIndex)
+
+            currentIndex += 1
+            if currentIndex == len(path):
+                break
+
+            # " and ' are treated specially in the sense that they can have the / (segment separator)
+            # between them which is considered part of the token
+            if path[currentIndex] == '"' or path[currentIndex] == "'":
+                quote = path[currentIndex]
+                newIndex = currentIndex + 1
+
+                while True:
+                    newIndex = path.find(quote, newIndex)
+                    if newIndex == -1:
+                        raise ValueError("Invalid path character at index " + currentIndex)
+
+                    # check if the quote itself is escaped by a preceding \ in which case it's part of the token
+                    if path[newIndex - 1] != "\\":
+                        break
+                    newIndex += 1
+
+                # This will extract the token excluding the quote chars
+                token = path[currentIndex + 1: newIndex]
+                tokens.append(token)
+                currentIndex = newIndex + 1
             else:
-                # This will extract the token from currentIndex to the char before the segmentSeparator
-                token = path[currentIndex:newIndex]
-                currentIndex = newIndex
-
-            token = token.strip()
-            tokens.append(token)
+                newIndex = path.find(segmentSeparator, currentIndex)
+                token = None
+                if newIndex == -1:
+                    # This will extract the token from currentIndex to end of the string
+                    token = path[currentIndex:]
+                    currentIndex = len(path)
+                else:
+                    # This will extract the token from currentIndex to the char before the segmentSeparator
+                    token = path[currentIndex:newIndex]
+                    currentIndex = newIndex
+
+                token = token.strip()
+                tokens.append(token)
 
     return tokens
 

@@ -52,11 +52,11 @@
 from . import _request_object
 from . import _synchronized_request as synchronized_request
 from . import _global_endpoint_manager as global_endpoint_manager
-from ._routing import routing_map_provider
+from ._routing import routing_map_provider, routing_range
 from ._retry_utility import ConnectionRetryPolicy
 from . import _session
 from . import _utils
-from .partition_key import _Undefined, _Empty
+from .partition_key import _Undefined, _Empty, PartitionKey
 from ._auth_policy import CosmosBearerTokenCredentialPolicy
 from ._cosmos_http_logging_policy import CosmosHttpLoggingPolicy
 
@@ -2539,6 +2539,46 @@ def __GetBodiesFromQueryResult(result):
         # Query operations will use ReadEndpoint even though it uses POST(for regular query operations)
         request_params = _request_object.RequestObject(typ, documents._OperationType.SqlQuery)
         req_headers = base.GetHeaders(self, initial_headers, "post", path, id_, typ, options, partition_key_range_id)
+
+        #check if query has prefix partition key
+        isPrefixPartitionQuery = kwargs.pop("isPrefixPartitionQuery", None)
+        if isPrefixPartitionQuery:
+            # here get the over lapping ranges
+            partition_key_definition = kwargs.pop("partitionKeyDefinition", None)
+            pk_properties = partition_key_definition
+            partition_key_definition = PartitionKey(path=pk_properties["paths"], kind=pk_properties["kind"])
+            partition_key_value = pk_properties["partition_key"]
+            feedrangeEPK = partition_key_definition._get_epk_range_for_prefix_partition_key(partition_key_value)  # cspell:disable-line # pylint: disable=line-too-long
+            over_lapping_ranges = self._routing_map_provider.get_overlapping_ranges(id_, [feedrangeEPK])
+            # It is possible to get more than one over lapping range. We need to get the query results for each one
+            results = None
+            for over_lapping_range in over_lapping_ranges:
+                single_range = routing_range.Range.PartitionKeyRangeToRange(over_lapping_range)
+                if single_range.min == feedrangeEPK.min and single_range.max == feedrangeEPK.max:
+                    # The EpkRange spans exactly one physical partition
+                    # In this case we can route to the physical pk range id
+                    req_headers[http_constants.HttpHeaders.PartitionKeyRangeID] = over_lapping_range["id"]
+                else:
+                    # The EpkRange spans less than single physical partition
+                    # In this case we route to the physical partition and
+                    # pass the epk range headers to filter within partition
+                    req_headers[http_constants.HttpHeaders.PartitionKeyRangeID] = over_lapping_range["id"]
+                    req_headers[http_constants.HttpHeaders.StartEpkString] = feedrangeEPK.min
+                    req_headers[http_constants.HttpHeaders.EndEpkString] = feedrangeEPK.max
+                req_headers[http_constants.HttpHeaders.ReadFeedKeyType] = "EffectivePartitionKeyRange"
+                r, self.last_response_headers = self.__Post(path, request_params, query, req_headers, **kwargs)
+                if results:
+                    # add up all the query results from all over lapping ranges
+                    results["Documents"].extend(r["Documents"])
+                    results["_count"] += r["_count"]
+                else:
+                    results = r
+                if response_hook:
+                    response_hook(self.last_response_headers, results)
+            # if the prefix partition query has results lets return it
+            if results:
+                return __GetBodiesFromQueryResult(results)
+
         result, self.last_response_headers = self.__Post(path, request_params, query, req_headers, **kwargs)
 
         if response_hook:
@@ -2576,6 +2616,8 @@ def _GetQueryPlanThroughGateway(self, query, resource_link, **kwargs):
                                 is_query_plan=True,
                                 **kwargs)
 
+
+
     def __CheckAndUnifyQueryFormat(self, query_body):
         """Checks and unifies the format of the query body.
 
@@ -2650,21 +2692,36 @@ def _AddPartitionKey(self, collection_link, document, options):
 
     # Extracts the partition key from the document using the partitionKey definition
     def _ExtractPartitionKey(self, partitionKeyDefinition, document):
+        if partitionKeyDefinition["kind"] == "MultiHash":
+            ret = []
+            for partition_key_level in partitionKeyDefinition.get("paths"):
+                # Parses the paths into a list of token each representing a property
+                partition_key_parts = base.ParsePaths([partition_key_level])
+                # Check if the partitionKey is system generated or not
+                is_system_key = partitionKeyDefinition["systemKey"] if "systemKey" in partitionKeyDefinition else False
+
+                # Navigates the document to retrieve the partitionKey specified in the paths
+                val = self._retrieve_partition_key(partition_key_parts, document, is_system_key)
+                if val is _Undefined:
+                    break
+                ret.append(val)
+            return ret
+
 
         # Parses the paths into a list of token each representing a property
         partition_key_parts = base.ParsePaths(partitionKeyDefinition.get("paths"))
         # Check if the partitionKey is system generated or not
         is_system_key = partitionKeyDefinition["systemKey"] if "systemKey" in partitionKeyDefinition else False
 
         # Navigates the document to retrieve the partitionKey specified in the paths
+
         return self._retrieve_partition_key(partition_key_parts, document, is_system_key)
 
     # Navigates the document to retrieve the partitionKey specified in the partition key parts
     def _retrieve_partition_key(self, partition_key_parts, document, is_system_key):
         expected_matchCount = len(partition_key_parts)
         matchCount = 0
         partitionKey = document
-
         for part in partition_key_parts:
             # At any point if we don't find the value of a sub-property in the document, we return as Undefined
             if part not in partitionKey:

@@ -35,7 +35,7 @@
 from ..http_constants import StatusCodes
 from ..offer import ThroughputProperties
 from ._scripts import ScriptsProxy
-from ..partition_key import NonePartitionKeyValue
+from ..partition_key import NonePartitionKeyValue, PartitionKey
 
 __all__ = ("ContainerProxy",)
 
@@ -361,6 +361,7 @@ def query_items(
         partition_key = kwargs.pop('partition_key', None)
         if partition_key is not None:
             feed_options["partitionKey"] = self._set_partition_key(partition_key)
+            kwargs["containerProperties"] = self._get_properties
         else:
             feed_options["enableCrossPartitionQuery"] = True
         max_integrated_cache_staleness_in_ms = kwargs.pop('max_integrated_cache_staleness_in_ms', None)

@@ -24,6 +24,7 @@
 
 """Document client class for the Azure Cosmos database service.
 """
+import json
 # https://github.com/PyCQA/pylint/issues/3112
 # Currently pylint is locked to 2.3.3 and this is fixed in 2.4.4
 from typing import Dict, Any, Optional, TypeVar  # pylint: disable=unused-import
@@ -44,6 +45,7 @@
 
 from .. import _base as base
 from .. import documents
+from .._routing import routing_range
 from ..documents import ConnectionPolicy
 from .. import _constants as constants
 from .. import http_constants
@@ -56,7 +58,7 @@
 from ._retry_utility_async import _ConnectionRetryPolicy
 from .. import _session
 from .. import _utils
-from ..partition_key import _Undefined, _Empty
+from ..partition_key import _Undefined, _Empty, PartitionKey
 from ._auth_policy_async import AsyncCosmosBearerTokenCredentialPolicy
 from .._cosmos_http_logging_policy import CosmosHttpLoggingPolicy
 
@@ -2349,6 +2351,55 @@ def __GetBodiesFromQueryResult(result):
         # Query operations will use ReadEndpoint even though it uses POST(for regular query operations)
         request_params = _request_object.RequestObject(typ, documents._OperationType.SqlQuery)
         req_headers = base.GetHeaders(self, initial_headers, "post", path, id_, typ, options, partition_key_range_id)
+
+        # check if query has prefix partition key
+        cont_prop = kwargs.pop("containerProperties", None)
+        partition_key = options.get("partitionKey", None)
+        isPrefixPartitionQuery = False
+        partition_key_definition = None
+        if cont_prop:
+            cont_prop = await cont_prop()
+            pk_properties = cont_prop["partitionKey"]
+            partition_key_definition = PartitionKey(path=pk_properties["paths"], kind=pk_properties["kind"])
+            if partition_key_definition.kind == "MultiHash" and\
+                    (type(partition_key) == list and len(partition_key_definition['paths']) != len(partition_key)):
+                isPrefixPartitionQuery = True
+
+        if isPrefixPartitionQuery:
+            # here get the overlapping ranges
+            req_headers.pop(http_constants.HttpHeaders.PartitionKey, None)
+            feedrangeEPK = partition_key_definition._get_epk_range_for_prefix_partition_key(partition_key)  # cspell:disable-line # pylint: disable=line-too-long
+            over_lapping_ranges = await self._routing_map_provider.get_overlapping_ranges(id_, [feedrangeEPK])
+            results = None
+            for over_lapping_range in over_lapping_ranges:
+                # It is possible for the over lapping range to include multiple physical partitions
+                # we should return query results for all the partitions that are overlapped.
+                single_range = routing_range.Range.PartitionKeyRangeToRange(over_lapping_range)
+                if single_range.min == feedrangeEPK.min and single_range.max == feedrangeEPK.max:
+                    # The EpkRange spans exactly one physical partition
+                    # In this case we can route to the physical pk range id
+                    req_headers[http_constants.HttpHeaders.PartitionKeyRangeID] = over_lapping_range["id"]
+                else:
+                    # The EpkRange spans less than single physical partition
+                    # In this case we route to the physical partition and
+                    # pass the epk range headers to filter within partition
+                    req_headers[http_constants.HttpHeaders.PartitionKeyRangeID] = over_lapping_range["id"]
+                    req_headers[http_constants.HttpHeaders.StartEpkString] = feedrangeEPK.min
+                    req_headers[http_constants.HttpHeaders.EndEpkString] = feedrangeEPK.max
+                req_headers[http_constants.HttpHeaders.ReadFeedKeyType] = "EffectivePartitionKeyRange"
+                r, self.last_response_headers = await self.__Post(path, request_params, query, req_headers, **kwargs)
+                if results:
+                    # add up all the query results from all over lapping ranges
+                    results["Documents"].extend(r["Documents"])
+                    results["_count"] += r["_count"]
+                else:
+                    results = r
+                if response_hook:
+                    response_hook(self.last_response_headers, results)
+            # if the prefix partition query has results lets return it
+            if results:
+                return __GetBodiesFromQueryResult(results)
+
         result, self.last_response_headers = await self.__Post(path, request_params, query, req_headers, **kwargs)
 
         if response_hook:
@@ -2516,13 +2567,29 @@ async def _AddPartitionKey(self, collection_link, document, options):
 
     # Extracts the partition key from the document using the partitionKey definition
     def _ExtractPartitionKey(self, partitionKeyDefinition, document):
+        if partitionKeyDefinition["kind"] == "MultiHash":
+            ret = []
+            for partition_key_level in partitionKeyDefinition.get("paths"):
+                # Parses the paths into a list of token each representing a property
+                partition_key_parts = base.ParsePaths([partition_key_level])
+                # Check if the partitionKey is system generated or not
+                is_system_key = partitionKeyDefinition["systemKey"] if "systemKey" in partitionKeyDefinition else False
+
+                # Navigates the document to retrieve the partitionKey specified in the paths
+                val = self._retrieve_partition_key(partition_key_parts, document, is_system_key)
+                if val is _Undefined:
+                    break
+                ret.append(val)
+            return ret
+
 
         # Parses the paths into a list of token each representing a property
         partition_key_parts = base.ParsePaths(partitionKeyDefinition.get("paths"))
         # Check if the partitionKey is system generated or not
         is_system_key = partitionKeyDefinition["systemKey"] if "systemKey" in partitionKeyDefinition else False
 
         # Navigates the document to retrieve the partitionKey specified in the paths
+
         return self._retrieve_partition_key(partition_key_parts, document, is_system_key)
 
     # Navigates the document to retrieve the partitionKey specified in the partition key parts