Skip to content

Commit

Permalink
Added authentication with service principals for Doc Int and CosmosDB
Browse files Browse the repository at this point in the history
  • Loading branch information
kmavrodis committed Sep 25, 2024
1 parent 69ca75a commit becf5fc
Show file tree
Hide file tree
Showing 8 changed files with 167 additions and 33 deletions.
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,12 @@ To run the Streamlit app `app.py` located in the `frontend` folder, follow these

3. Populate the `.env` file with the necessary environment variables. Open the `.env` file in a text editor and provide the required values for each variable.

4. Start the Streamlit app by running the following command in your terminal:
4. Run the `frontend_prep.sh` script. You will be prompted to login to the Azure tenant that the deployment is hosted:
```sh
./frontend/frontend_prep.sh
```

5. Start the Streamlit app by running the following command in your terminal:
```sh
streamlit run frontend/app.py
```
Expand Down
2 changes: 2 additions & 0 deletions frontend/.env.temp
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
RESOURCE_GROUP_NAME=""
BLOB_CONN_STR=""
CONTAINER_NAME="datasets"
COSMOS_URL=""
COSMOS_KEY=""
COSMOS_ACCOUNT_NAME=""
COSMOS_DB_NAME="doc-extracts"
COSMOS_DOCUMENTS_CONTAINER_NAME="documents"
COSMOS_CONFIG_CONTAINER_NAME="configuration"
66 changes: 66 additions & 0 deletions frontend/frontend_prep.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/bin/bash

# Load environment variables
if [ -f .env ]; then
export $(cat .env | xargs)
else
echo "Error: .env file not found"
exit 1
fi

# Check if required environment variables are set
required_vars=(
"BLOB_CONN_STR" "CONTAINER_NAME" "COSMOS_URL" "COSMOS_DB_NAME"
"COSMOS_DOCUMENTS_CONTAINER_NAME" "COSMOS_CONFIG_CONTAINER_NAME"
"RESOURCE_GROUP_NAME" "COSMOS_ACCOUNT_NAME"
)
for var in "${required_vars[@]}"; do
if [ -z "${!var}" ]; then
echo "Error: $var is not set in the .env file"
exit 1
fi
done

# Use the values from .env file
resourceGroupName=$RESOURCE_GROUP_NAME
accountName=$COSMOS_ACCOUNT_NAME

az login

# Get the principal ID of the signed-in user
principalId=$(az ad signed-in-user show --query id -o tsv)

if [ -z "$principalId" ]; then
echo "Error: Failed to get the principal ID of the signed-in user"
exit 1
fi

# Get the scope of the Cosmos DB account
scope=$(
az cosmosdb show \
--resource-group $resourceGroupName \
--name $accountName \
--query id \
--output tsv
)

if [ -z "$scope" ]; then
echo "Error: Failed to get the scope of the Cosmos DB account"
exit 1
fi

# Create role assignment
echo "Creating role assignment for the signed-in user..."
az cosmosdb sql role assignment create \
--resource-group $resourceGroupName \
--account-name $accountName \
--role-definition-name "Cosmos DB Built-in Data Contributor" \
--principal-id $principalId \
--scope $scope

if [ $? -eq 0 ]; then
echo "Role assignment created successfully for the signed-in user"
else
echo "Error: Failed to create role assignment"
exit 1
fi
49 changes: 39 additions & 10 deletions infra/main.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ resource documentIntelligence 'Microsoft.CognitiveServices/accounts@2021-04-30'
properties: {
apiProperties: {}
customSubDomainName: documentIntelligenceName
publicNetworkAccess: 'Enabled'
}
tags: commonTags
}
Expand All @@ -247,8 +248,8 @@ resource functionApp 'Microsoft.Web/sites@2021-03-01' = {
alwaysOn: true
appSettings: [
{
name: 'AzureWebJobsStorage'
value: 'DefaultEndpointsProtocol=https;AccountName=${storageAccount.name};AccountKey=${listKeys(storageAccount.id, storageAccount.apiVersion).keys[0].value};EndpointSuffix=core.windows.net'
name: 'AzureWebJobsStorage__accountName'
value: storageAccount.name
}
{
name: 'WEBSITE_CONTENTAZUREFILECONNECTIONSTRING'
Expand Down Expand Up @@ -282,10 +283,6 @@ resource functionApp 'Microsoft.Web/sites@2021-03-01' = {
name: 'COSMOS_DB_ENDPOINT'
value: cosmosDbAccount.properties.documentEndpoint
}
{
name: 'COSMOS_DB_KEY'
value: cosmosDbAccount.listKeys().primaryMasterKey
}
{
name: 'COSMOS_DB_DATABASE_NAME'
value: cosmosDbDatabaseName
Expand All @@ -298,10 +295,6 @@ resource functionApp 'Microsoft.Web/sites@2021-03-01' = {
name: 'DOCUMENT_INTELLIGENCE_ENDPOINT'
value: documentIntelligence.properties.endpoint
}
{
name: 'DOCUMENT_INTELLIGENCE_KEY'
value: documentIntelligence.listKeys().key1
}
{
name: 'AZURE_OPENAI_ENDPOINT'
value: azureOpenaiEndpoint
Expand All @@ -327,6 +320,42 @@ resource functionApp 'Microsoft.Web/sites@2021-03-01' = {
}
}

// Role assignments for the Function App's managed identity
resource functionAppStorageBlobDataContributorRole 'Microsoft.Authorization/roleAssignments@2020-04-01-preview' = {
name: guid(functionApp.id, storageAccount.id, 'StorageBlobDataContributor')
scope: storageAccount
properties: {
roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', 'ba92f5b4-2d11-453d-a403-e96b0029c9fe') // Storage Blob Data Contributor
principalId: functionApp.identity.principalId
principalType: 'ServicePrincipal'
}
}

// Cosmos DB role assignment
resource cosmosDBDataContributorRoleDefinition 'Microsoft.DocumentDB/databaseAccounts/sqlRoleDefinitions@2021-04-15' existing = {
parent: cosmosDbAccount
name: '00000000-0000-0000-0000-000000000002' // Built-in Data Contributor Role
}

resource cosmosDBRoleAssignment 'Microsoft.DocumentDB/databaseAccounts/sqlRoleAssignments@2021-04-15' = {
parent: cosmosDbAccount
name: guid(cosmosDbAccount.id, functionApp.id, cosmosDBDataContributorRoleDefinition.id)
properties: {
roleDefinitionId: cosmosDBDataContributorRoleDefinition.id
principalId: functionApp.identity.principalId
scope: cosmosDbAccount.id
}
}

resource functionAppDocumentIntelligenceContributorRole 'Microsoft.Authorization/roleAssignments@2020-04-01-preview' = {
name: guid(functionApp.id, documentIntelligence.id, 'CognitiveServicesUser')
scope: documentIntelligence
properties: {
roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', 'a97b65f3-24c7-4388-baec-2e87135dc908') // Cognitive Services User
principalId: functionApp.identity.principalId
principalType: 'ServicePrincipal'
}
}

param roleDefinitionId string = 'ba92f5b4-2d11-453d-a403-e96b0029c9fe' //Default as Storage Blob Data Contributor role

Expand Down
63 changes: 50 additions & 13 deletions infra/main.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"_generator": {
"name": "bicep",
"version": "0.30.3.12046",
"templateHash": "10680848260313514895"
"templateHash": "11125348922574127635"
}
},
"parameters": {
Expand Down Expand Up @@ -68,7 +68,7 @@
}
},
"variables": {
"$fxv#0": "{\r\n \"definition\": {\r\n \"$schema\": \"https://schema.management.azure.com/providers/Microsoft.Logic/schemas/2016-06-01/workflowdefinition.json#\",\r\n \"contentVersion\": \"1.0.0.0\",\r\n \"triggers\": {},\r\n \"actions\": {\r\n \"If_email_has_attachments_and_key_subject_phrase\": {\r\n \"type\": \"If\",\r\n \"expression\": {\r\n \"and\": [\r\n {\r\n \"equals\": [\r\n \"@triggerBody()?['hasAttachments']\",\r\n true\r\n ]\r\n }\r\n ]\r\n },\r\n \"actions\": {\r\n \"For_each\": {\r\n \"type\": \"Foreach\",\r\n \"foreach\": \"@triggerBody()?['attachments']\",\r\n \"actions\": {\r\n \"Create_blob_(V2)_1\": {\r\n \"type\": \"ApiConnection\",\r\n \"inputs\": {\r\n \"host\": {\r\n \"connection\": {\r\n \"name\": \"@parameters('$connections')['azureblob']['connectionId']\"\r\n }\r\n },\r\n \"method\": \"post\",\r\n \"body\": \"@base64ToBinary(item()?['contentBytes'])\",\r\n \"headers\": {\r\n \"ReadFileMetadataFromServer\": true\r\n },\r\n \"path\": \"/v2/datasets/@{encodeURIComponent(encodeURIComponent(parameters('storageAccount')))}/files\",\r\n \"queries\": {\r\n \"folderPath\": \"datasets/default-dataset\",\r\n \"name\": \"@item()?['name']\",\r\n \"queryParametersSingleEncoded\": true\r\n }\r\n },\r\n \"runtimeConfiguration\": {\r\n \"contentTransfer\": {\r\n \"transferMode\": \"Chunked\"\r\n }\r\n }\r\n }\r\n }\r\n }\r\n },\r\n \"else\": {\r\n \"actions\": {}\r\n },\r\n \"runAfter\": {}\r\n }\r\n },\r\n \"outputs\": {},\r\n \"parameters\": {\r\n \"storageAccount\": {\r\n \"defaultValue\": \"\",\r\n \"type\": \"String\"\r\n },\r\n \"$connections\": {\r\n \"type\": \"Object\",\r\n \"defaultValue\": {}\r\n }\r\n }\r\n }\r\n }",
"$fxv#0": "{\n \"definition\": {\n \"$schema\": \"https://schema.management.azure.com/providers/Microsoft.Logic/schemas/2016-06-01/workflowdefinition.json#\",\n \"contentVersion\": \"1.0.0.0\",\n \"triggers\": {},\n \"actions\": {\n \"If_email_has_attachments_and_key_subject_phrase\": {\n \"type\": \"If\",\n \"expression\": {\n \"and\": [\n {\n \"equals\": [\n \"@triggerBody()?['hasAttachments']\",\n true\n ]\n }\n ]\n },\n \"actions\": {\n \"For_each\": {\n \"type\": \"Foreach\",\n \"foreach\": \"@triggerBody()?['attachments']\",\n \"actions\": {\n \"Create_blob_(V2)_1\": {\n \"type\": \"ApiConnection\",\n \"inputs\": {\n \"host\": {\n \"connection\": {\n \"name\": \"@parameters('$connections')['azureblob']['connectionId']\"\n }\n },\n \"method\": \"post\",\n \"body\": \"@base64ToBinary(item()?['contentBytes'])\",\n \"headers\": {\n \"ReadFileMetadataFromServer\": true\n },\n \"path\": \"/v2/datasets/@{encodeURIComponent(encodeURIComponent(parameters('storageAccount')))}/files\",\n \"queries\": {\n \"folderPath\": \"datasets/default-dataset\",\n \"name\": \"@item()?['name']\",\n \"queryParametersSingleEncoded\": true\n }\n },\n \"runtimeConfiguration\": {\n \"contentTransfer\": {\n \"transferMode\": \"Chunked\"\n }\n }\n }\n }\n }\n },\n \"else\": {\n \"actions\": {}\n },\n \"runAfter\": {}\n }\n },\n \"outputs\": {},\n \"parameters\": {\n \"storageAccount\": {\n \"defaultValue\": \"\",\n \"type\": \"String\"\n },\n \"$connections\": {\n \"type\": \"Object\",\n \"defaultValue\": {}\n }\n }\n }\n }",
"sanitizedTimestamp": "[replace(replace(parameters('timestamp'), '-', ''), ':', '')]",
"roleAssignmentName": "[guid(format('ra-uniqueString-{0}-{1}-{2}', resourceGroup().id, parameters('roleDefinitionId'), variables('sanitizedTimestamp')))]",
"commonTags": {
Expand Down Expand Up @@ -278,7 +278,8 @@
"kind": "FormRecognizer",
"properties": {
"apiProperties": {},
"customSubDomainName": "[parameters('documentIntelligenceName')]"
"customSubDomainName": "[parameters('documentIntelligenceName')]",
"publicNetworkAccess": "Enabled"
},
"tags": "[variables('commonTags')]"
},
Expand All @@ -301,8 +302,8 @@
"alwaysOn": true,
"appSettings": [
{
"name": "AzureWebJobsStorage",
"value": "[format('DefaultEndpointsProtocol=https;AccountName={0};AccountKey={1};EndpointSuffix=core.windows.net', parameters('storageAccountName'), listKeys(resourceId('Microsoft.Storage/storageAccounts', parameters('storageAccountName')), '2022-05-01').keys[0].value)]"
"name": "AzureWebJobsStorage__accountName",
"value": "[parameters('storageAccountName')]"
},
{
"name": "WEBSITE_CONTENTAZUREFILECONNECTIONSTRING",
Expand Down Expand Up @@ -336,10 +337,6 @@
"name": "COSMOS_DB_ENDPOINT",
"value": "[reference(resourceId('Microsoft.DocumentDB/databaseAccounts', parameters('cosmosDbAccountName')), '2021-04-15').documentEndpoint]"
},
{
"name": "COSMOS_DB_KEY",
"value": "[listKeys(resourceId('Microsoft.DocumentDB/databaseAccounts', parameters('cosmosDbAccountName')), '2021-04-15').primaryMasterKey]"
},
{
"name": "COSMOS_DB_DATABASE_NAME",
"value": "[parameters('cosmosDbDatabaseName')]"
Expand All @@ -352,10 +349,6 @@
"name": "DOCUMENT_INTELLIGENCE_ENDPOINT",
"value": "[reference(resourceId('Microsoft.CognitiveServices/accounts', parameters('documentIntelligenceName')), '2021-04-30').endpoint]"
},
{
"name": "DOCUMENT_INTELLIGENCE_KEY",
"value": "[listKeys(resourceId('Microsoft.CognitiveServices/accounts', parameters('documentIntelligenceName')), '2021-04-30').key1]"
},
{
"name": "AZURE_OPENAI_ENDPOINT",
"value": "[parameters('azureOpenaiEndpoint')]"
Expand Down Expand Up @@ -388,6 +381,50 @@
"[resourceId('Microsoft.Storage/storageAccounts', parameters('storageAccountName'))]"
]
},
{
"type": "Microsoft.Authorization/roleAssignments",
"apiVersion": "2020-04-01-preview",
"scope": "[format('Microsoft.Storage/storageAccounts/{0}', parameters('storageAccountName'))]",
"name": "[guid(resourceId('Microsoft.Web/sites', parameters('functionAppName')), resourceId('Microsoft.Storage/storageAccounts', parameters('storageAccountName')), 'StorageBlobDataContributor')]",
"properties": {
"roleDefinitionId": "[subscriptionResourceId('Microsoft.Authorization/roleDefinitions', 'ba92f5b4-2d11-453d-a403-e96b0029c9fe')]",
"principalId": "[reference(resourceId('Microsoft.Web/sites', parameters('functionAppName')), '2021-03-01', 'full').identity.principalId]",
"principalType": "ServicePrincipal"
},
"dependsOn": [
"[resourceId('Microsoft.Web/sites', parameters('functionAppName'))]",
"[resourceId('Microsoft.Storage/storageAccounts', parameters('storageAccountName'))]"
]
},
{
"type": "Microsoft.DocumentDB/databaseAccounts/sqlRoleAssignments",
"apiVersion": "2021-04-15",
"name": "[format('{0}/{1}', parameters('cosmosDbAccountName'), guid(resourceId('Microsoft.DocumentDB/databaseAccounts', parameters('cosmosDbAccountName')), resourceId('Microsoft.Web/sites', parameters('functionAppName')), resourceId('Microsoft.DocumentDB/databaseAccounts/sqlRoleDefinitions', parameters('cosmosDbAccountName'), '00000000-0000-0000-0000-000000000002')))]",
"properties": {
"roleDefinitionId": "[resourceId('Microsoft.DocumentDB/databaseAccounts/sqlRoleDefinitions', parameters('cosmosDbAccountName'), '00000000-0000-0000-0000-000000000002')]",
"principalId": "[reference(resourceId('Microsoft.Web/sites', parameters('functionAppName')), '2021-03-01', 'full').identity.principalId]",
"scope": "[resourceId('Microsoft.DocumentDB/databaseAccounts', parameters('cosmosDbAccountName'))]"
},
"dependsOn": [
"[resourceId('Microsoft.DocumentDB/databaseAccounts', parameters('cosmosDbAccountName'))]",
"[resourceId('Microsoft.Web/sites', parameters('functionAppName'))]"
]
},
{
"type": "Microsoft.Authorization/roleAssignments",
"apiVersion": "2020-04-01-preview",
"scope": "[format('Microsoft.CognitiveServices/accounts/{0}', parameters('documentIntelligenceName'))]",
"name": "[guid(resourceId('Microsoft.Web/sites', parameters('functionAppName')), resourceId('Microsoft.CognitiveServices/accounts', parameters('documentIntelligenceName')), 'CognitiveServicesUser')]",
"properties": {
"roleDefinitionId": "[subscriptionResourceId('Microsoft.Authorization/roleDefinitions', 'a97b65f3-24c7-4388-baec-2e87135dc908')]",
"principalId": "[reference(resourceId('Microsoft.Web/sites', parameters('functionAppName')), '2021-03-01', 'full').identity.principalId]",
"principalType": "ServicePrincipal"
},
"dependsOn": [
"[resourceId('Microsoft.CognitiveServices/accounts', parameters('documentIntelligenceName'))]",
"[resourceId('Microsoft.Web/sites', parameters('functionAppName'))]"
]
},
{
"type": "Microsoft.Web/connections",
"apiVersion": "2018-07-01-preview",
Expand Down
3 changes: 1 addition & 2 deletions src/functionapp/ai_ocr/azure/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@ def get_config():
load_dotenv()
return {
"doc_intelligence_endpoint": os.getenv("DOCUMENT_INTELLIGENCE_ENDPOINT", None),
"doc_intelligence_key": os.getenv("DOCUMENT_INTELLIGENCE_KEY", None),
"openai_api_key": os.getenv("AZURE_OPENAI_KEY", None),
"openai_api_endpoint": os.getenv("AZURE_OPENAI_ENDPOINT", None),
"openai_api_version": "2023-12-01-preview",
"openai_api_version": "2024-08-01-preview",
"openai_model_deployment": os.getenv("AZURE_OPENAI_MODEL_DEPLOYMENT_NAME", None),
"temp_images_outdir" : os.getenv("TEMP_IMAGES_OUTDIR", "/tmp/")
}
4 changes: 2 additions & 2 deletions src/functionapp/ai_ocr/azure/doc_intelligence.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.formrecognizer import DocumentAnalysisClient, AnalysisFeature
from azure.ai.documentintelligence.models import AnalyzeResult
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient
from azure.identity import DefaultAzureCredential

from ai_ocr.azure.config import get_config

config = get_config()

# kwargs = {"api_version": doc_int_api_version}
client = document_analysis_client = DocumentAnalysisClient(endpoint=config["doc_intelligence_endpoint"],
credential=AzureKeyCredential(config["doc_intelligence_key"]),
credential=DefaultAzureCredential(),
headers={"solution":"ARGUS-1.0"})
# **kwargs)

Expand Down
6 changes: 1 addition & 5 deletions src/functionapp/ai_ocr/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,9 @@

def connect_to_cosmos():
endpoint = os.environ['COSMOS_DB_ENDPOINT']
key = os.environ['COSMOS_DB_KEY']
database_name = os.environ['COSMOS_DB_DATABASE_NAME']
container_name = os.environ['COSMOS_DB_CONTAINER_NAME']
try:
client = CosmosClient(endpoint, key)
except:
client = CosmosClient(endpoint, DefaultAzureCredential())
client = CosmosClient(endpoint, DefaultAzureCredential())
database = client.get_database_client(database_name)
docs_container = database.get_container_client(container_name)
conf_container = database.get_container_client('configuration')
Expand Down

0 comments on commit becf5fc

Please sign in to comment.