Skip to content

Commit

Permalink
Merge pull request #620 from yashkohli88/yk/add-default-headers-centr…
Browse files Browse the repository at this point in the history
…ally

Update fetch file to centralize default headers
  • Loading branch information
qtomlinson authored Nov 5, 2024
2 parents a1d12ac + 178b4a3 commit 59e652b
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 23 deletions.
6 changes: 5 additions & 1 deletion lib/fetch.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@

const axios = require('axios')

const defaultHeaders = Object.freeze({ 'User-Agent': 'clearlydefined.io crawler (clearlydefined@outlook.com)' })

axios.defaults.headers = defaultHeaders

function buildRequestOptions(request) {
let responseType = 'text'
if (request.json) {
Expand Down Expand Up @@ -45,4 +49,4 @@ function withDefaults(opts) {
return request => callFetch(request, axiosInstance)
}

module.exports = { callFetch, withDefaults }
module.exports = { callFetch, withDefaults, defaultHeaders }
4 changes: 1 addition & 3 deletions providers/fetch/cratesioFetch.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ class CratesioFetch extends AbstractFetch {
try {
registryData = await request({
url: `https://crates.io/api/v1/crates/${spec.name}`,
json: true,
headers: { 'User-Agent': 'clearlydefined.io crawler (clearlydefined@outlook.com)' }
json: true
})
} catch (exception) {
if (exception.statusCode !== 404) throw exception
Expand All @@ -72,7 +71,6 @@ class CratesioFetch extends AbstractFetch {
url: `https://crates.io${version.dl_path}`,
encoding: null,
headers: {
'User-Agent': 'clearlydefined.io crawler (clearlydefined@outlook.com)',
Accept: 'text/html'
}
})
Expand Down
8 changes: 3 additions & 5 deletions providers/fetch/mavenBasedFetch.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// SPDX-License-Identifier: MIT

const AbstractFetch = require('./abstractFetch')
const { withDefaults } = require('../../lib/fetch')
const { callFetch, defaultHeaders } = require('../../lib/fetch')
const nodeRequest = require('request')
const { clone, get } = require('lodash')
const { promisify } = require('util')
Expand All @@ -23,14 +23,12 @@ const extensionMap = {
jar: '.jar'
}

const defaultHeaders = { headers: { 'User-Agent': 'clearlydefined.io crawler (clearlydefined@outlook.com)' } }

class MavenBasedFetch extends AbstractFetch {
constructor(providerMap, options) {
super(options)
this._providerMap = { ...providerMap }
this._handleRequestPromise = options.requestPromise || withDefaults(defaultHeaders)
this._handleRequestStream = options.requestStream || nodeRequest.defaults(defaultHeaders).get
this._handleRequestPromise = options.requestPromise || callFetch
this._handleRequestStream = options.requestStream || nodeRequest.defaults({ headers: defaultHeaders }).get
}

canHandle(request) {
Expand Down
5 changes: 2 additions & 3 deletions providers/fetch/packagistFetch.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ const nodeRequest = require('request')
const { promisify } = require('util')
const readdir = promisify(fs.readdir)
const FetchResult = require('../../lib/fetchResult')
const { defaultHeaders } = require('../../lib/fetch')

const providerMap = {
packagist: 'https://repo.packagist.org/'
Expand Down Expand Up @@ -62,9 +63,7 @@ class PackagistFetch extends AbstractFetch {
return new Promise((resolve, reject) => {
const options = {
url: distUrl,
headers: {
'User-Agent': 'clearlydefined.io crawler (clearlydefined@outlook.com)'
}
headers: defaultHeaders
}
nodeRequest
.get(options, (error, response) => {
Expand Down
46 changes: 42 additions & 4 deletions test/unit/lib/fetchTests.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
const { fail } = require('assert')
const { callFetch, withDefaults } = require('../../../lib/fetch')
const { callFetch, withDefaults, defaultHeaders } = require('../../../lib/fetch')
const { expect } = require('chai')
const fs = require('fs')
const mockttp = require('mockttp')

function checkDefaultHeaders(headers) {
for (const [key, value] of Object.entries(defaultHeaders)) {
expect(headers).to.have.property(key.toLowerCase()).that.equals(value)
}
}
describe('CallFetch', () => {
describe('with mock server', () => {
const mockServer = mockttp.getLocal()
Expand All @@ -23,6 +28,37 @@ describe('CallFetch', () => {
expect(response).to.be.deep.equal(JSON.parse(expected))
})

it('checks if the default header user-agent and other header is present in crate components', async () => {
const path = '/crates.io/api/v1/crates/name/1.0.0/download'
const endpointMock = await mockServer.forGet(path).thenReply(200, 'success')

await callFetch({
url: mockServer.urlFor(path),
method: 'GET',
json: true,
encoding: null,
headers: {
Accept: 'text/html'
}
})
const requests = await endpointMock.getSeenRequests()
checkDefaultHeaders(requests[0].headers)
expect(requests[0].headers).to.include({ accept: 'text/html' })
})

it('checks if the default header user-agent is present in crate components', async () => {
const path = '/crates.io/api/v1/crates/name'
const endpointMock = await mockServer.forGet(path).thenReply(200, 'success')

await callFetch({
url: mockServer.urlFor(path),
method: 'GET',
json: true
})
const requests = await endpointMock.getSeenRequests()
checkDefaultHeaders(requests[0].headers)
})

it('checks if the full response is fetched', async () => {
const path = '/registry.npmjs.com/redis/0.1.0'
const expected = fs.readFileSync('test/fixtures/fetch/redis-0.1.0.json')
Expand Down Expand Up @@ -87,17 +123,17 @@ describe('CallFetch', () => {
const url = mockServer.urlFor(path)
const endpointMock = await mockServer.forGet(path).thenReply(200)

const defaultOptions = { headers: { 'user-agent': 'clearlydefined.io crawler (clearlydefined@outlook.com)' } }
const defaultOptions = { headers: defaultHeaders }
const requestWithDefaults = withDefaults(defaultOptions)
await requestWithDefaults({ url })
await requestWithDefaults({ url })

const requests = await endpointMock.getSeenRequests()
expect(requests.length).to.equal(2)
expect(requests[0].url).to.equal(url)
expect(requests[0].headers).to.include(defaultOptions.headers)
checkDefaultHeaders(requests[0].headers)
expect(requests[1].url).to.equal(url)
expect(requests[1].headers).to.include(defaultOptions.headers)
checkDefaultHeaders(requests[1].headers)
})

it('checks if the response is text with uri option in GET request', async () => {
Expand Down Expand Up @@ -129,6 +165,8 @@ describe('CallFetch', () => {
const json = await requests[0].body.getJson()
expect(json).to.deep.equal({ test: 'test' })
expect(requests[0].headers).to.include({ 'x-crawler': 'secret' })
//Check for the default header value
checkDefaultHeaders(requests[0].headers)
})

describe('test simple', () => {
Expand Down
31 changes: 24 additions & 7 deletions test/unit/providers/fetch/mavenBasedFetchTests.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,15 @@ const { expect } = require('chai')
const MavenBasedFetch = require('../../../../providers/fetch/mavenBasedFetch')
const mockttp = require('mockttp')
const sinon = require('sinon')
const { defaultHeaders } = require('../../../../lib/fetch')
const Request = require('../../../../ghcrawler').request

function checkDefaultHeaders(headers) {
for (const [key, value] of Object.entries(defaultHeaders)) {
expect(headers).to.have.property(key.toLowerCase()).that.equals(value)
}
}

describe('MavenBasedFetch', () => {
describe('find contained file stat', () => {
it('file contained in root dir', async () => {
Expand All @@ -20,24 +27,34 @@ describe('MavenBasedFetch', () => {
})
})

describe('Integration test for component not found', function () {
describe('Integration test', function () {
const path = '/remotecontent?filepath='
const mockServer = mockttp.getLocal()
beforeEach(async () => await mockServer.start())
afterEach(async () => await mockServer.stop())

it('should handle maven components not found', async () => {
const handler = new MavenBasedFetch(
let endpointMock
let handler
beforeEach(async () => {
await mockServer.start()
handler = new MavenBasedFetch(
{
mavencentral: mockServer.urlFor(path)
},
{ logger: { log: sinon.stub() } }
)
await mockServer.forAnyRequest().thenReply(404)
endpointMock = await mockServer.forAnyRequest().thenReply(404)
})
afterEach(async () => await mockServer.stop())

it('should handle maven components not found', async () => {
const request = await handler.handle(
new Request('test', 'cd:/maven/mavencentral/org.apache.httpcomponents/httpcore/4.')
)
expect(request.processControl).to.be.equal('skip')
})

it('should check for default header in any request', async () => {
await handler.handle(new Request('test', 'cd:/maven/mavencentral/org.apache.httpcomponents/httpcore/4.4.16'))
const requests = await endpointMock.getSeenRequests()
checkDefaultHeaders(requests[0].headers)
})
})
})

0 comments on commit 59e652b

Please sign in to comment.