Skip to content

Commit

Permalink
Merge pull request #1 from Answers-AI/bt/contentful-doc-loader
Browse files Browse the repository at this point in the history
Added Support for Contentful Delivery API as a Document loader
  • Loading branch information
bradtaylorsf authored Jan 11, 2024
2 parents e7a58f6 + 4197741 commit 73637ae
Show file tree
Hide file tree
Showing 5 changed files with 350 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import { INodeParams, INodeCredential } from '../src/Interface'

class ContentfulDeliveryApi implements INodeCredential {
label: string
name: string
version: number
description: string
inputs: INodeParams[]

constructor() {
this.label = 'Contentful Delivery API'
this.name = 'contetnfulDeliveryApi'
this.version = 1.0
this.description =
'Refer to <a target="_blank" href="https://www.contentful.com/developers/docs/references/content-delivery-api/">official guide</a> on how to get your delivery keys in Contentful'
this.inputs = [
{
label: 'Delivery or Preview Token',
name: 'accessToken',
type: 'password',
placeholder: '<CONTENTFUL_DELIVERY_TOKEN>'
},
{
label: 'Space Id',
name: 'spaceId',
type: 'string',
placeholder: 'asdf1234'
},
{
label: 'CDN Location',
name: 'cdn',
type: 'string',
placeholder: 'cdn.contentful.com'
}
]
}
}

module.exports = { credClass: ContentfulDeliveryApi }
306 changes: 306 additions & 0 deletions packages/components/nodes/documentloaders/Contentful/Contentful.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,306 @@
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { BaseDocumentLoader } from 'langchain/document_loaders/base'
import { Document } from 'langchain/document'
import * as contentful from 'contentful'
import { documentToPlainTextString } from '@contentful/rich-text-plain-text-renderer'
import { getCredentialData, getCredentialParam } from '../../../src/utils'

class Contentful_DocumentLoaders implements INode {
label: string
name: string
version: number
description: string
type: string
icon: string
category: string
baseClasses: string[]
credential: INodeParams
inputs?: INodeParams[]

constructor() {
this.label = 'Contentful'
this.name = 'contentful'
this.version = 1.0
this.type = 'Document'
this.icon = 'contentful.png'
this.category = 'Document Loaders'
this.description = `Load data from a Contentful Space`
this.baseClasses = [this.type]
this.credential = {
label: 'Connect Credential',
name: 'credential',
type: 'credential',
credentialNames: ['contetnfulDeliveryApi']
}
this.inputs = [
{
label: 'Text Splitter',
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: ' Content Type',
name: 'contentType',
type: 'string',
placeholder: 'pageBlog',
default: 'pageBlog',
description: 'The content type to query'
},
{
label: 'Environment Id',
name: 'environmentId',
type: 'string',
placeholder: 'master',
default: 'master',
additionalParams: true,
description:
'If your table URL looks like: https://app.contentful.com/spaces/abjv67t9l34s/environments/master-starter-v2/views/entries, master-starter-v2 is the environment id'
},
{
label: 'Include Levels',
name: 'include',
type: 'number',
optional: true,
additionalParams: true,
description: 'The number of levels to include in the response'
},
{
label: 'Include All',
name: 'includeAll',
type: 'boolean',
optional: true,
additionalParams: true,
description: 'Include all entries in the response'
},
{
label: 'Limit',
name: 'limit',
type: 'number',
optional: true,
additionalParams: true,
description: 'The limit of items to return default is 50'
},
{
label: 'Search Query',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)

const environmentId = nodeData.inputs?.environmentId as string
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const metadata = nodeData.inputs?.metadata
const include = nodeData.inputs?.include as number
const limit = nodeData.inputs?.limit as number
const contentType = nodeData.inputs?.contentType as string
const includeAll = nodeData.inputs?.includeAll as boolean

const accessToken = getCredentialParam('accessToken', credentialData, nodeData)
const spaceId = getCredentialParam('spaceId', credentialData, nodeData)

const contentfulOptions: ContentfulLoaderParams = {
spaceId,
environmentId,
accessToken,
include,
includeAll,
limit,
metadata,
contentType
}

const loader = new ContentfulLoader(contentfulOptions)

let docs = []

if (textSplitter) {
docs = await loader.loadAndSplit(textSplitter)
} else {
docs = await loader.load()
}

if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}

return docs
}
}

interface ContentfulLoaderParams {
spaceId: string
environmentId: string
accessToken: string
include?: number
limit?: number
contentType: string
includeAll?: boolean
metadata?: any
}

interface ContentfulLoaderResponse {
items: ContentfulEntry[]
skip?: number
limit?: number
total?: number
}

interface ContentfulEntry {
sys: ICommonObject
fields: ICommonObject
}

interface IField {
[key: string]: any
}

interface IContentObject {
fields: IField
sys: any // Adjust this type according to your sys object structure
}

class ContentfulLoader extends BaseDocumentLoader {
public readonly spaceId: string

public readonly environmentId: string

public readonly accessToken: string

public readonly textField?: string

public readonly include?: number

public readonly limit?: number

public readonly contentType?: string

public readonly includeAll?: boolean

public readonly metadata?: ICommonObject

constructor({ spaceId, environmentId, accessToken, metadata = {}, include, limit, contentType, includeAll }: ContentfulLoaderParams) {
super()
this.spaceId = spaceId
this.environmentId = environmentId
this.accessToken = accessToken
this.contentType = contentType
this.includeAll = includeAll
this.include = include
this.limit = limit

// Check if metadata is a non-empty string, then try to parse it.
// If parsing fails or if metadata is not a string, use the default empty object.
if (typeof metadata === 'string' && metadata.trim() !== '') {
try {
this.metadata = JSON.parse(metadata)
} catch (error) {
console.warn('Failed to parse metadata:', error)
this.metadata = {}
}
} else if (typeof metadata === 'object') {
this.metadata = metadata
} else {
this.metadata = {}
}
}

public async load(): Promise<Document[]> {
return this.runQuery()
}

private processContentObject(contentObject: IContentObject): string {
const { fields } = contentObject

return Object.entries(fields)
.map(([fieldName, fieldValue]) => {
// Check if the field is a rich text field
if (typeof fieldValue === 'object' && fieldValue.nodeType === 'document') {
const plainText = documentToPlainTextString(fieldValue) // TODO: add support for embedded assets and entries
return `${fieldName}: ${plainText}\n\n`
}
// For string fields
else if (typeof fieldValue === 'string') {
return `${fieldName}: ${fieldValue}\n\n`
}

// TODO: Handle references to other entries and assets

// TODO: Return empty for now, handle other types as needed
return ``
})
.join('')
}

private createDocumentFromEntry(entry: ContentfulEntry): Document {
const textContent = this.processContentObject(entry)
const entryUrl = `https://app.contentful.com/spaces/${this.spaceId}/environments/${this.environmentId}/entries/${entry.sys.id}`
// console.log('Entry', entry)

// Return a langchain document
return new Document({
pageContent: textContent,
metadata: {
contentType: this.contentType,
source: entryUrl,
entryId: entry.sys.id,
doctype: 'contentfulEntry'
}
})
}

private async runQuery(): Promise<Document[]> {
const params: ICommonObject = { pageSize: 100, skip: 0 }
let data: ContentfulLoaderResponse
let returnPages: ContentfulEntry[] = []
let query = this.metadata || {}

if (this.limit && !this.includeAll) {
query.limit = this.limit
}
if (this.include) {
query.include = this.include
}

if (this.contentType) {
query.content_type = this.contentType
}

const client = contentful.createClient({
space: this.spaceId,
environment: this.environmentId,
accessToken: this.accessToken
})

do {
console.log('Metadata', query)
data = await client.getEntries(query)
console.log('Items', data.items.length)
returnPages.push.apply(returnPages, data.items)
query.skip = (data?.skip || 0) + (data?.limit || 1)
} while (this.includeAll && data.total !== 0)
return returnPages.map((page) => this.createDocumentFromEntry(page))
}
}

module.exports = {
nodeClass: Contentful_DocumentLoaders
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 2 additions & 1 deletion packages/components/nodes/vectorstores/Pinecone/Pinecone.ts
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,9 @@ class Pinecone_VectorStores implements INode {
}

if (pineconeNamespace) obj.namespace = pineconeNamespace
let metadatafilter = {}
if (pineconeMetadataFilter) {
const metadatafilter = typeof pineconeMetadataFilter === 'object' ? pineconeMetadataFilter : JSON.parse(pineconeMetadataFilter)
metadatafilter = typeof pineconeMetadataFilter === 'object' ? pineconeMetadataFilter : JSON.parse(pineconeMetadataFilter)
obj.filter = metadatafilter
}

Expand Down
3 changes: 3 additions & 0 deletions packages/components/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"@aws-sdk/client-bedrock-runtime": "3.422.0",
"@aws-sdk/client-dynamodb": "^3.360.0",
"@aws-sdk/client-s3": "^3.427.0",
"@contentful/rich-text-plain-text-renderer": "^16.0.7",
"@dqbd/tiktoken": "^1.0.7",
"@elastic/elasticsearch": "^8.9.0",
"@getzep/zep-js": "^0.9.0",
Expand All @@ -41,6 +42,8 @@
"cheerio": "^1.0.0-rc.12",
"chromadb": "^1.5.11",
"cohere-ai": "^6.2.0",
"contentful": "^10.6.14",
"contentful-management": "^11.7.3",
"d3-dsv": "2",
"dotenv": "^16.0.0",
"express": "^4.17.3",
Expand Down

0 comments on commit 73637ae

Please sign in to comment.