Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add suport to GCS storage and stream writting to local filesystem #4

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 20 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,18 @@ Logs are output as a JSON file.

## Usage

Local storage:

```
DD_API_KEY=... DD_APP_KEY=... npx github:wegift/datadog-downloader --query '"Redeem failed"'
```

GCS storage:

```
DD_API_KEY=... DD_APP_KEY=... npx github:wegift/datadog-downloader --query '"Redeem failed"' --storage gcs --gcs-bucket-name 'test' --gcs-credential-file ./key.json
```

## Authentication

You will need an API key and an app key to access the DataDog api.
Expand All @@ -21,16 +29,22 @@ App keys are personal to your profile and can be generated in personal settings.
## Options

```
--query The filter query (aka search term). Take care when quoting on the command line, single quote the entire query for best results.
--query The filter query (aka search term). Take care when quoting on the command line, single quote the entire query for best results.

--index Which index to read from, default 'main'

--index Which index to read from, default 'main'
--from Start date/time defaults to 1y ago
--to End date/time, omit for results up to the current time

--from Start date/time defaults to 1y ago
--to End date/time, omit for results up to the current time
--pageSize How many results to download at a time, default 1000 limit of 5000

--pageSize How many results to download at a time, default 1000 limit of 5000
--output Path of json file to write results to, default 'fromDate-toDate.json'

--output Path of json file to write results to, default 'results.json'
--storage Which storage to use, available options: local, gcs

--gcs-bucket-name The destination bucket name to save file for the gcs storage

--gcs-credential-file The destination bucket name to save file for the gcs storage
```

Note: Date/times are parsed by JS `Date` constructor, e.g. 2022-01-01
Expand All @@ -47,13 +61,6 @@ Copy `.env.example` to `.env` and add a valid DataDog API key and app key.
node index.mjs --query '"Redeem token failure" -@redeem_failure_reason:"Invalid token"'
```

## Caveats

Logs are not streamed, they are all stored in memory and stringified / written as as single action.
I have tested with 25k logs and there were no issues, the resultant JSON file was only 100mb so it seems likely that
you could download 100k+ without running into memory or performance limits but ymmv.


## Contributing

**Pull requests welcome!**
30 changes: 30 additions & 0 deletions gcsStorage.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import { Storage } from "@google-cloud/storage";
export default class GCSStorage {
bucketName = null
filename = null
stream = null
storage = null

constructor(gcsCredentialFile, bucketName, filename) {
this.bucketName = bucketName
this.filename = filename
this.storage = new Storage({ keyFilename: gcsCredentialFile });
}

init() {
if (this.bucketName === null || this.bucketName === '') throw new Error('Missing bucket name');
if (this.filename === null || this.filename === '') throw new Error('Missing filename');

const destBucket = this.storage.bucket(this.bucketName);
const file = destBucket.file(this.filename);
this.stream = file.createWriteStream();
}

write(chunk) {
this.stream.write(chunk);
}

end() {
if (this.stream) this.stream.end();
}
}
61 changes: 46 additions & 15 deletions index.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,39 @@
import { v2 } from "@datadog/datadog-api-client";
import chalk from "chalk";
import * as dotenv from "dotenv";
import * as fs from "fs";
import yargs from "yargs";
import GCSStorage from "./gcsStorage.mjs";
import LocalStorage from "./localStorage.mjs";

const argv = yargs(process.argv).argv;

dotenv.config();

const configuration = v2.createConfiguration();
const apiInstance = new v2.LogsApi(configuration);

async function getLogs(apiInstance, params) {
const data = [];

async function getLogs(apiInstance, storageClass, params) {
let nextPage = null;
let total = 0
let n = 0;

storageClass.write('[');

do {
console.log(`Requesting page ${++n} ${nextPage ? `with cursor ${nextPage} ` : ``}`);
console.log(`Requesting page ${n + 1} ${nextPage ? `with cursor ${nextPage} ` : ``}`);
const query = nextPage ? { ...params, pageCursor: nextPage } : params;
const result = await apiInstance.listLogsGet(query);
data.push(...result.data);
if (n >= 1 && result.data.length > 0) storageClass.write(',')
for (const [logLine, logItem] of result.data.entries()) {
storageClass.write((logLine >= 1 ? ',' : '') + JSON.stringify(logItem))
}
total += result.data.length
nextPage = result?.meta?.page?.after;
console.log(`${result.data.length} results (${data.length} total)`);
n++
console.log(`${result.data.length} results (${total} total)`);
} while (nextPage);

return data;
storageClass.write(']');
}

function oneYearAgo() {
Expand All @@ -43,23 +52,45 @@ const initialParams = {

if (!initialParams.filterQuery) {
console.log(chalk.red("Error: No query supplied, use --query"));
process.exit();
process.exit(1);
}

let storage = null
let filename = argv.output ? argv.output : `${initialParams.filterFrom.toJSON().slice(0, 19).replaceAll(':', '_')}-${initialParams.filterTo.toJSON().slice(0, 19).replaceAll(':', '_')}.json`

if (argv.storage == 'gcs') {
const gcsCredentialFile = argv.gcsCredentialFile
const gcsBucketName = argv.gcsBucketName

if (!gcsBucketName) {
console.log(chalk.red("Error: No bucket name supplied, use --gcs-bucket-name"));
process.exit(1);
}

if (!gcsCredentialFile) {
console.log(chalk.red("Error: No gcs credential file supplied, use --gcs-credential-file"));
process.exit(1);
}

storage = new GCSStorage(gcsCredentialFile, gcsBucketName, filename)
} else {
storage = new LocalStorage(filename)
}

console.log(chalk.cyan("Downloading logs:\n" + JSON.stringify(initialParams, null, 2) + "\n"));

(async function () {
let data;
try {
data = await getLogs(apiInstance, initialParams);
storage.init()
try {
await getLogs(apiInstance, storage, initialParams);
} finally {
storage.end()
}
} catch (e) {
console.log(chalk.red(e.message));
process.exit(1);
}

const outputFile = argv.output ?? "results.json";
console.log(chalk.cyan(`\nWriting ${data.length} logs to ${outputFile}`));
fs.writeFileSync(outputFile, JSON.stringify(data, null, 2));

console.log(chalk.green("Done!"));
})();
23 changes: 23 additions & 0 deletions localStorage.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import * as fs from 'fs';

export default class LocalStorage {
filename = null
stream = null

constructor(filename) {
this.filename = filename
}

init() {
if (this.filename === null || this.filename === '') throw new Error('Missing filename');
this.stream = fs.createWriteStream(this.filename, { flags: 'w' });
}

write(chunk) {
this.stream.write(chunk);
}

end() {
if (this.stream) this.stream.end();
}
}
Loading