-
Notifications
You must be signed in to change notification settings - Fork 58
/
Copy pathIngestAndVectorize.cs
96 lines (73 loc) · 3.46 KB
/
IngestAndVectorize.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
using Microsoft.Azure.Functions.Worker;
using Microsoft.Extensions.Logging;
using System.Net;
using Microsoft.Azure.Functions.Worker.Http;
using Azure.Storage.Blobs;
using Azure.Storage.Blobs.Models;
using SharedLib.Services;
namespace Vectorize
{
public class IngestAndVectorize
{
private readonly MongoDbService _mongo;
private readonly ILogger _logger;
public IngestAndVectorize(MongoDbService mongo, ILoggerFactory loggerFactory)
{
_mongo = mongo;
_logger = loggerFactory.CreateLogger<IngestAndVectorize>();
}
[Function("IngestAndVectorize")]
public async Task<HttpResponseData> Run(
[HttpTrigger(AuthorizationLevel.Anonymous, "post", "get", Route = null)] HttpRequestData req)
{
_logger.LogInformation("Ingest and Vectorize HTTP trigger function is processing a request.");
try
{
// Ingest json data into MongoDB collections
await IngestDataFromBlobStorageAsync();
var response = req.CreateResponse(HttpStatusCode.OK);
response.Headers.Add("Content-Type", "text/plain; charset=utf-8");
await response.WriteStringAsync("Ingest and Vectorize HTTP trigger function executed successfully.");
return response;
}
catch (Exception ex)
{
var response = req.CreateResponse(HttpStatusCode.BadRequest);
await response.WriteStringAsync(ex.ToString());
return response;
}
}
public async Task IngestDataFromBlobStorageAsync()
{
try
{
BlobContainerClient blobContainerClient = new BlobContainerClient(new Uri("https://cosmosdbcosmicworks.blob.core.windows.net/cosmic-works-mongo-vcore/"));
//hard-coded here. In a real-world scenario, you would want to dynamically get the list of blobs in the container and iterate through them.
//as well as drive all of the schema and meta-data from a configuration file.
List<string> blobIds = new List<string>() { "products", "customers", "salesOrders" };
foreach(string blobId in blobIds)
{
BlobClient blob = blobContainerClient.GetBlobClient($"{blobId}.json");
if (await blob.ExistsAsync())
{
//Download and ingest products.json
_logger.LogInformation($"Ingesting {blobId} data from blob storage.");
BlobClient blobClient = blobContainerClient.GetBlobClient($"{blobId}.json");
BlobDownloadStreamingResult blobResult = await blobClient.DownloadStreamingAsync();
using (StreamReader pReader = new StreamReader(blobResult.Content))
{
string json = await pReader.ReadToEndAsync();
await _mongo.ImportAndVectorizeAsync(blobId, json);
}
_logger.LogInformation($"{blobId} data ingestion complete.");
}
}
}
catch(Exception ex)
{
_logger.LogError($"Exception: IngestDataFromBlobStorageAsync(): {ex.Message}");
throw;
}
}
}
}