-
Notifications
You must be signed in to change notification settings - Fork 0
/
archive_org_search_v2.js
171 lines (159 loc) · 7.2 KB
/
archive_org_search_v2.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
let settings = input.config({
title: "Search Archive.org",
description: "This script searches Archive.org for Japanese texts based on a user-defined search string and number of records, and displays the results in an output table.",
items: [
input.config.text("searchString", {
label: "🔍 Search String",
description: "Enter the search string to use for Archive.org",
}),
input.config.text("maxRecords", {
label: "🔢 Max Records",
description: "Enter the maximum number of records to fetch (1-1000)",
}),
input.config.table("table", {
label: "📄 Table",
description: "Select the table that contains the identifier field",
}),
input.config.field("identifierField", {
parentTable: "table",
label: "📄 Identifier Field",
description: "Select the field that stores the identifier in your Airtable table",
}),
input.config.field("titleField", {
parentTable: "table",
label: "📄 Title Field",
description: "Select the field to store the title",
}),
input.config.field("creatorField", {
parentTable: "table",
label: "📄 Creator Field",
description: "Select the field to store the creator",
}),
input.config.field("languageField", {
parentTable: "table",
label: "📄 Language Field",
description: "Select the field to store the language",
}),
input.config.field("publicDateField", {
parentTable: "table",
label: "📄 Public Date Field",
description: "Select the field to store the public date",
}),
input.config.field("uploaderField", {
parentTable: "table",
label: "📄 Uploader Field",
description: "Select the field to store the uploader",
}),
input.config.field("descriptionField", {
parentTable: "table",
label: "📄 Description Field",
description: "Select the field to store the description",
}),
input.config.field("itemUrlField", {
parentTable: "table",
label: "📄 Item URL Field",
description: "Select the field to store the item URL",
}),
],
});
let { searchString, maxRecords, table, identifierField, titleField, creatorField, languageField, publicDateField, uploaderField, descriptionField, itemUrlField } = settings;
let searchLanguage = "jpn"; // Hardcoded search language
let mediaType = "texts"; // Hardcoded media type
// Validate max records input
let maxRecordsNum = parseInt(maxRecords, 10);
if (isNaN(maxRecordsNum) || maxRecordsNum < 1 || maxRecordsNum > 1000) {
throw new Error('Invalid number of records. Please enter a number between 1 and 1000.');
}
// Function to fetch search results from Archive.org
async function fetchArchiveResults(searchString, maxRecordsNum, searchLanguage, mediaType) {
let searchURL = `https://archive.org/advancedsearch.php?q=${encodeURIComponent(searchString)} AND language:${searchLanguage} AND mediatype:${mediaType}&fl[]=identifier&fl[]=title&fl[]=creator&fl[]=language&fl[]=publicdate&fl[]=uploader&fl[]=description&fl[]=identifier&output=json&rows=${maxRecordsNum}`;
let response = await fetch(searchURL);
if (!response.ok) throw new Error('Failed to fetch search results from Archive.org.');
let data = await response.json();
return data.response.docs;
}
// Function to fetch existing identifiers from Airtable
async function fetchExistingIdentifiers(table, identifierField) {
let query = await table.selectRecordsAsync();
let identifiers = new Set();
for (let record of query.records) {
let identifier = record.getCellValue(identifierField);
if (identifier) {
identifiers.add(identifier);
}
}
return identifiers;
}
// Helper function to ensure a field value is a string
function ensureString(value) {
return value && typeof value === 'string' ? value : '';
}
// Function to add new records to Airtable in batches
async function addNewRecords(table, newRecords) {
const batchSize = 50;
for (let i = 0; i < newRecords.length; i += batchSize) {
const batch = newRecords.slice(i, i + batchSize);
let createRecords = batch.map(record => ({
fields: {
[identifierField.name]: ensureString(record.Identifier),
[titleField.name]: ensureString(record.Title),
[creatorField.name]: ensureString(record.Creator),
[languageField.name]: ensureString(record.Language),
[publicDateField.name]: ensureString(record.PublicDate),
[uploaderField.name]: ensureString(record.Uploader),
[descriptionField.name]: ensureString(record.Description),
[itemUrlField.name]: `https://archive.org/details/${ensureString(record.Identifier)}`
}
}));
await table.createRecordsAsync(createRecords);
}
}
// Main script logic
async function main() {
try {
// Fetch existing identifiers from Airtable
let existingIdentifiers = await fetchExistingIdentifiers(table, identifierField);
// Fetch search results from Archive.org
let results = await fetchArchiveResults(searchString, maxRecordsNum, searchLanguage, mediaType);
output.text(`Fetched ${results.length} results from Archive.org`);
// Prepare data for output table, checking for existing identifiers and collecting new records
let newRecords = [];
let outputData = results.map(result => {
let existsInAirtable = existingIdentifiers.has(result.identifier);
if (!existsInAirtable) {
newRecords.push({
Identifier: result.identifier,
Title: result.title,
Creator: result.creator,
Language: result.language,
PublicDate: result.publicdate,
Uploader: result.uploader,
Description: result.description,
});
}
return {
Identifier: result.identifier,
Title: ensureString(result.title),
Creator: ensureString(result.creator),
Language: ensureString(result.language),
PublicDate: ensureString(result.publicdate),
Uploader: ensureString(result.uploader),
Description: ensureString(result.description),
ItemURL: `https://archive.org/details/${result.identifier}`,
ExistsInAirtable: existsInAirtable ? "Yes" : "No"
};
});
// Display results in an output table
output.table(outputData);
// Add new records to Airtable if any
if (newRecords.length > 0) {
await addNewRecords(table, newRecords);
output.text(`Added ${newRecords.length} new records to Airtable.`);
} else {
output.text('No new records to add to Airtable.');
}
} catch (error) {
output.text(`Error: ${error.message}`);
}
}
await main();