-
Notifications
You must be signed in to change notification settings - Fork 2k
/
Copy pathextract-redis.ts
118 lines (105 loc) · 3.3 KB
/
extract-redis.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import { redisConnection } from "../../services/queue-service";
import { logger as _logger } from "../logger";
export enum ExtractStep {
INITIAL = "initial",
MAP = "map",
MAP_RERANK = "map-rerank",
MULTI_ENTITY = "multi-entity",
MULTI_ENTITY_SCRAPE = "multi-entity-scrape",
MULTI_ENTITY_EXTRACT = "multi-entity-extract",
SCRAPE = "scrape",
EXTRACT = "extract",
COMPLETE = "complete",
}
export type ExtractedStep = {
step: ExtractStep;
startedAt: number;
finishedAt: number;
error?: any;
discoveredLinks?: string[];
};
export type StoredExtract = {
id: string;
team_id: string;
plan?: string;
createdAt: number;
status: "processing" | "completed" | "failed" | "cancelled";
error?: any;
showSteps?: boolean;
steps?: ExtractedStep[];
showLLMUsage?: boolean;
showSources?: boolean;
llmUsage?: number;
sources?: {
[key: string]: string[];
};
};
// Reduce TTL to 6 hours instead of 24
const EXTRACT_TTL = 6 * 60 * 60;
const STEPS_MAX_DISCOVERED_LINKS = 100;
export async function saveExtract(id: string, extract: StoredExtract) {
_logger.debug("Saving extract " + id + " to Redis...");
// Only store essential data
const minimalExtract = {
...extract,
steps: extract.steps?.map(step => ({
step: step.step,
startedAt: step.startedAt,
finishedAt: step.finishedAt,
error: step.error,
// Only store first 20 discovered links per step
discoveredLinks: step.discoveredLinks?.slice(0, STEPS_MAX_DISCOVERED_LINKS)
}))
};
await redisConnection.set("extract:" + id, JSON.stringify(minimalExtract));
await redisConnection.expire("extract:" + id, EXTRACT_TTL);
}
export async function getExtract(id: string): Promise<StoredExtract | null> {
const x = await redisConnection.get("extract:" + id);
return x ? JSON.parse(x) : null;
}
export async function updateExtract(
id: string,
extract: Partial<StoredExtract>,
) {
const current = await getExtract(id);
if (!current) return;
// Handle steps aggregation with cleanup
if (extract.steps && current.steps) {
// Keep only the last 5 steps to prevent unbounded growth
const allSteps = [...current.steps, ...extract.steps];
extract.steps = allSteps.slice(Math.max(0, allSteps.length - 5));
}
// Limit links in steps to 20 instead of 100 to reduce memory usage
if (extract.steps) {
extract.steps = extract.steps.map((step) => {
if (step.discoveredLinks && step.discoveredLinks.length > STEPS_MAX_DISCOVERED_LINKS) {
return {
...step,
discoveredLinks: step.discoveredLinks.slice(0, STEPS_MAX_DISCOVERED_LINKS),
};
}
return step;
});
}
const minimalExtract = {
...current,
...extract,
steps: extract.steps?.map(step => ({
step: step.step,
startedAt: step.startedAt,
finishedAt: step.finishedAt,
error: step.error,
discoveredLinks: step.discoveredLinks?.slice(0, STEPS_MAX_DISCOVERED_LINKS)
}))
};
await redisConnection.set("extract:" + id, JSON.stringify(minimalExtract));
await redisConnection.expire("extract:" + id, EXTRACT_TTL);
}
export async function getExtractExpiry(id: string): Promise<Date> {
const d = new Date();
const ttl = await redisConnection.pttl("extract:" + id);
d.setMilliseconds(d.getMilliseconds() + ttl);
d.setMilliseconds(0);
return d;
}