-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathProgram.cs
117 lines (103 loc) · 3.97 KB
/
Program.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
using System.Net.Http.Json;
using System.Text;
using System.Text.Json;
class ApiResponse
{
public required float[][] embeddings { get; set; }
}
class Program
{
static async Task Main()
{
var connString = "Host=localhost;Database=pgvector_example";
var dataSourceBuilder = new NpgsqlDataSourceBuilder(connString);
dataSourceBuilder.UseVector();
await using var dataSource = dataSourceBuilder.Build();
var conn = dataSource.OpenConnection();
await using (var cmd = new NpgsqlCommand("CREATE EXTENSION IF NOT EXISTS vector", conn))
{
await cmd.ExecuteNonQueryAsync();
}
conn.ReloadTypes();
await using (var cmd = new NpgsqlCommand("DROP TABLE IF EXISTS documents", conn))
{
await cmd.ExecuteNonQueryAsync();
}
await using (var cmd = new NpgsqlCommand("CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding vector(768))", conn))
{
await cmd.ExecuteNonQueryAsync();
}
await using (var cmd = new NpgsqlCommand("CREATE INDEX ON documents USING GIN (to_tsvector('english', content))", conn))
{
await cmd.ExecuteNonQueryAsync();
}
string[] input = {
"The dog is barking",
"The cat is purring",
"The bear is growling"
};
var embeddings = await FetchEmbeddings(input);
for (int i = 0; i < input.Length; i++)
{
await using (var cmd = new NpgsqlCommand("INSERT INTO documents (content, embedding) VALUES ($1, $2)", conn))
{
cmd.Parameters.AddWithValue(input[i]);
cmd.Parameters.AddWithValue(new Vector(embeddings[i]));
await cmd.ExecuteNonQueryAsync();
}
}
var sql = @"
WITH semantic_search AS (
SELECT id, RANK () OVER (ORDER BY embedding <=> $2) AS rank
FROM documents
ORDER BY embedding <=> $2
LIMIT 20
),
keyword_search AS (
SELECT id, RANK () OVER (ORDER BY ts_rank_cd(to_tsvector('english', content), query) DESC)
FROM documents, plainto_tsquery('english', $1) query
WHERE to_tsvector('english', content) @@ query
ORDER BY ts_rank_cd(to_tsvector('english', content), query) DESC
LIMIT 20
)
SELECT
COALESCE(semantic_search.id, keyword_search.id) AS id,
COALESCE(1.0 / ($3 + semantic_search.rank), 0.0) +
COALESCE(1.0 / ($3 + keyword_search.rank), 0.0) AS score
FROM semantic_search
FULL OUTER JOIN keyword_search ON semantic_search.id = keyword_search.id
ORDER BY score DESC
LIMIT 5
";
var query = "growling bear";
var queryEmbedding = await FetchEmbeddings(new string[] { query });
var k = 60;
await using (var cmd = new NpgsqlCommand(sql, conn))
{
cmd.Parameters.AddWithValue(query);
cmd.Parameters.AddWithValue(new Vector(queryEmbedding[0]));
cmd.Parameters.AddWithValue(k);
await using (var reader = await cmd.ExecuteReaderAsync())
{
while (await reader.ReadAsync())
{
Console.WriteLine("document: {0}, RRF score: {1}", (long)reader.GetValue(0), (decimal)reader.GetValue(1));
}
}
}
}
private static async Task<float[][]> FetchEmbeddings(string[] input)
{
var url = "http://localhost:11434/api/embed";
var data = new
{
input = input,
model = "nomic-embed-text"
};
var client = new HttpClient();
using HttpResponseMessage response = await client.PostAsJsonAsync(url, data);
response.EnsureSuccessStatusCode();
var apiResponse = await response.Content.ReadFromJsonAsync<ApiResponse>();
return apiResponse!.embeddings.ToArray();
}
}