Skip to content

Commit

Permalink
feat: add trigram matching for the search pre-query
Browse files Browse the repository at this point in the history
  • Loading branch information
juanmahidalgo committed Sep 27, 2023
1 parent e2e2ca6 commit 9f520b9
Show file tree
Hide file tree
Showing 6 changed files with 137 additions and 30 deletions.
4 changes: 0 additions & 4 deletions src/ports/catalog/component.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ export function createCatalogComponent(options: {
filters.category
)
)
console.log('filteredItemsById: ', filteredItemsById)
filters.ids = [
...(filters.ids ?? []),
...filteredItemsById.rows.map(({ id }) => id),
Expand All @@ -67,10 +66,7 @@ export function createCatalogComponent(options: {
}
}
const query = getCatalogQuery(reducedSchemas, filters)
console.log('query: ', query.text)
console.log('query: ', query.values)
const results = await client.query<CollectionsItemDBResult>(query)
console.log('results: ', results)
catalogItems = results.rows.map((res) =>
fromCollectionsItemDbResultToCatalogItem(res, network)
)
Expand Down
9 changes: 3 additions & 6 deletions src/ports/catalog/queries.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ const WEARABLE_ITEM_TYPES = [
const MAX_ORDER_TIMESTAMP = 253378408747000 // some orders have a timestmap that can't be cast by Postgres, this is the max possible value

export function getOrderBy(filters: CatalogFilters) {
const { sortBy, sortDirection, isOnSale } = filters
const { sortBy, sortDirection, isOnSale, search } = filters
const sortByParam = sortBy ?? CatalogSortBy.NEWEST
const sortDirectionParam = sortDirection ?? CatalogSortDirection.DESC

Expand All @@ -30,7 +30,8 @@ export function getOrderBy(filters: CatalogFilters) {
return ''
}

if (filters.search) {
if (search) {
// If the filters have a search term, we need to order by the position of the item in the search results that is pre-computed and passed in the ids filter.
return SQL`ORDER BY array_position(${filters.ids}::text[], id) `
}

Expand Down Expand Up @@ -125,7 +126,6 @@ export const getEmotePlayModeWhere = (filters: CatalogFilters) => {
}

export const getSearchWhere = (filters: CatalogFilters) => {
console.log('filters.category: ', filters.category)
if (filters.category === NFTCategory.EMOTE) {
return SQL`word % ${filters.search}`
} else if (filters.category === NFTCategory.WEARABLE) {
Expand Down Expand Up @@ -310,7 +310,6 @@ const getWhereWordsJoin = (category: CatalogQueryFilters['category']) => {
return SQL` LEFT JOIN LATERAL unnest(string_to_array(metadata_wearable.name, ' ')) AS word_wearable ON TRUE
LEFT JOIN LATERAL unnest(string_to_array(metadata_emote.name, ' ')) AS word_emote ON TRUE
`
// return SQL`JOIN LATERAL unnest(string_to_array(metadata_wearable.name, ' ')) AS word ON TRUE `
}

const getMetadataJoins = (schemaVersion: string) => {
Expand Down Expand Up @@ -472,7 +471,5 @@ export const getItemIdsBySearchTextQuery = (
: SQL` ORDER BY GREATEST(similarity(word_wearable, ${search}), similarity(word_emote, ${search})) DESC;`
)

console.log('query: ', query.text)
console.log('query: ', query.values)
return query
}
30 changes: 30 additions & 0 deletions src/ports/nfts/component.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,24 @@
import nodeFetch from 'node-fetch'

Check failure on line 1 in src/ports/nfts/component.ts

View workflow job for this annotation

GitHub Actions / test

Module '"../../subgraphUtils"' has no exported member 'getMarketplaceSubgraphNameChain'.
import { NFTCategory, NFTFilters, NFTSortBy } from '@dcl/schemas'
import { IPgComponent } from '@well-known-components/pg-component'
import { ISubgraphComponent } from '@well-known-components/thegraph-component'
import { INFTsComponent, NFTResult } from './types'
import {
getByTokenIdQuery,
getFetchOneQuery,
getFetchQuery,
getFuzzySearchQueryForENS,
getQueryVariables,
} from './utils'
import { getMarketplaceChainId } from '../../logic/chainIds'
import {
getLatestSubgraphSchema,
getMarketplaceSubgraphNameChain,
} from '../../subgraphUtils'

export function createNFTComponent<T extends { id: string }>(options: {
subgraph: ISubgraphComponent
db?: IPgComponent
listsServer?: string
fragmentName: string
getFragment: () => string
Expand All @@ -22,6 +30,7 @@ export function createNFTComponent<T extends { id: string }>(options: {
}): INFTsComponent {
const {
subgraph,
db,
fragmentName,
getFragment,
getSortByProp,
Expand Down Expand Up @@ -89,6 +98,27 @@ export function createNFTComponent<T extends { id: string }>(options: {
)
}

if (options.category === NFTCategory.ENS && options.search && db) {
try {
const client = await db.getPool().connect()
const schemaName = await client.query<{
entity_schema: string
}>(
getLatestSubgraphSchema(
getMarketplaceSubgraphNameChain(getMarketplaceChainId())
)
)
const ids = await client.query<{ id: string }>(
getFuzzySearchQueryForENS(
schemaName.rows[0].entity_schema,
options.search
)
)
options.ids = ids.rows.map(({ id }) => id)
options.search = undefined
} catch (error) {}
}

const fetchFragments = getFragmentFetcher(options)
const fragments = await fetchFragments()
const nfts = fragments.map((fragment) =>
Expand Down
8 changes: 7 additions & 1 deletion src/ports/nfts/utils.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import SQL from 'sql-template-strings'
import { EmotePlayMode, NFTCategory, NFTFilters, NFTSortBy } from '@dcl/schemas'
import { getGenderFilterQuery } from '../utils'
import { QueryVariables } from './types'
Expand Down Expand Up @@ -201,7 +202,6 @@ export function getFetchQuery(
let wrapWhere = false

if (bannedNames.length) {
console.log('bannedNames inside getFetchQuery: ', bannedNames);
where.push(
`name_not_in: [${bannedNames.map((name) => `"${name}"`).join(', ')}]`
)
Expand Down Expand Up @@ -355,3 +355,9 @@ export function getByTokenIdQuery(
export function getId(contractAddress: string, tokenId: string) {
return `${contractAddress}-${tokenId}`
}

export function getFuzzySearchQueryForENS(schema: string, searchTerm: string) {
return SQL`SELECT id from `
.append(schema)
.append(SQL`.ens_active WHERE subdomain % ${searchTerm}`)
}
73 changes: 66 additions & 7 deletions src/tests/ports/catalog-queries.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,21 @@ test('catalog utils', () => {
)
})
})
describe('when the `search` filter is passed in the filter object', () => {
let ids: string[]
let search: string
beforeEach(() => {
ids = ['anId', 'anotherId']
search = 'a search string'
})
it('should ORDER BY the ids order in the array', () => {
const orderBy = getOrderBy({ search, ids }) as SQLStatement
expect(orderBy.text).toContain(
`ORDER BY array_position($1::text[], id) `
)
expect(orderBy.values).toContain(ids)
})
})
})

describe('when adding the catalog "LIMIT" and "OFFSET" statements to the query', () => {
Expand Down Expand Up @@ -487,17 +502,61 @@ test('catalog utils', () => {
describe('and passing the "search" filter', () => {
let search: string
let schema: string
let category: NFTCategory
beforeEach(() => {
schema = 'aSchema'
search = 'a search string'
})
it('should add the is search definition to the WHERE', () => {
expect(getItemIdsBySearchTextQuery(schema, search).text).toContain(
`items.search_text ILIKE '%' || $1 || '%'`
)
expect(
getItemIdsBySearchTextQuery(schema, search).values
).toStrictEqual([search])

describe('and there is no category', () => {
it('should both JOINs with metadata_wearable and metadata_emote and trigram matching operator', () => {
const query = getItemIdsBySearchTextQuery(schema, search, category)
expect(query.text).toContain(
`LEFT JOIN LATERAL unnest(string_to_array(metadata_wearable.name, ' ')) AS word_wearable ON TRUE `
)
expect(query.text).toContain(
`LEFT JOIN LATERAL unnest(string_to_array(metadata_emote.name, ' ')) AS word_emote ON TRUE `
)
expect(query.text).toContain(
`word_wearable % $1 OR word_emote % $2 `
)
// it appears four times `JOIN LATERAL unnest(string_to_array(metadata_emote.name, ' ')) AS word ON TRUE WHERE word % $1 ORDER BY GREATEST(similarity(word, $2))`
expect(query.values).toStrictEqual(Array(4).fill(search))
})
})

describe('and the category is Wearable', () => {
beforeEach(() => {
category = NFTCategory.WEARABLE
})
it('should add JOIN with the metadata_wearable and trigram matching operator', () => {
const query = getItemIdsBySearchTextQuery(schema, search, category)
expect(query.text).toContain(
`JOIN LATERAL unnest(string_to_array(metadata_wearable.name, ' ')) AS word ON TRUE `
)
expect(query.text).toContain(
`word % $1 `
)
// it appears twice `JOIN LATERAL unnest(string_to_array(metadata_wearable.name, ' ')) AS word ON TRUE WHERE word % $1 ORDER BY GREATEST(similarity(word, $2))`
expect(query.values).toStrictEqual(Array(2).fill(search))
})
})

describe('and the category is Emote', () => {
beforeEach(() => {
category = NFTCategory.EMOTE
})
it('should add JOIN with the metadata_emote', () => {
const query = getItemIdsBySearchTextQuery(schema, search, category)
expect(query.text).toContain(
`JOIN LATERAL unnest(string_to_array(metadata_emote.name, ' ')) AS word ON TRUE `
)
expect(query.text).toContain(
`word % $1 `
)
// it appears twice `JOIN LATERAL unnest(string_to_array(metadata_emote.name, ' ')) AS word ON TRUE WHERE word % $1 ORDER BY GREATEST(similarity(word, $2))`
expect(query.values).toStrictEqual(Array(2).fill(search))
})
})
})
})
Expand Down
43 changes: 31 additions & 12 deletions src/tests/ports/catalog.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@ import { CatalogFilters, ChainId, Network } from '@dcl/schemas'
import { IPgComponent } from '@well-known-components/pg-component'
import { createTestDbComponent, test } from '../../../src/tests/components'
import { createCatalogComponent } from '../../ports/catalog/component'
import {
getItemIdsBySearchTextQuery,
} from '../../ports/catalog/queries'
import { getItemIdsBySearchTextQuery } from '../../ports/catalog/queries'
import {
CollectionsItemDBResult,
ICatalogComponent,
Expand All @@ -14,7 +12,10 @@ import {
getCatalogQuery,
} from '../../ports/catalog/utils'
import { IFavoritesComponent, PickStats } from '../../ports/favorites/types'
import { getLatestSubgraphSchema, getSubgraphNameForNetwork } from '../../subgraphUtils'
import {
getLatestSubgraphSchema,
getSubgraphNameForNetwork,
} from '../../subgraphUtils'

const mockedDBItemResponse: CollectionsItemDBResult = {
id: '0xe42257bb4aada439179d736a64a736be0693a4ec-2',
Expand All @@ -28,7 +29,7 @@ const mockedDBItemResponse: CollectionsItemDBResult = {
name: 'Descension',
loop: false,
has_geometry: false,
has_sound: false
has_sound: false,
},
image:
'https://peer-lb.decentraland.org/lambdas/collections/contents/urn:decentraland:matic:collections-v2:0xe42257bb4aada439179d736a64a736be0693a4ec:2/thumbnail',
Expand Down Expand Up @@ -133,7 +134,7 @@ test('catalog component', function () {
name: 'Descension',
loop: false,
has_geometry: false,
has_sound: false
has_sound: false,
},
image:
'https://peer-lb.decentraland.org/lambdas/collections/contents/urn:decentraland:matic:collections-v2:0xe42257bb4aada439179d736a64a736be0693a4ec:2/thumbnail',
Expand Down Expand Up @@ -254,7 +255,10 @@ test('catalog component', function () {
expect(dbClientQueryMock.mock.calls.length).toEqual(3) // 2 for the schema name and 1 for the catalog query
expect(dbClientQueryMock.mock.calls[0][0]).toEqual(
getLatestSubgraphSchema(
getSubgraphNameForNetwork(Network.ETHEREUM, ChainId.ETHEREUM_SEPOLIA)
getSubgraphNameForNetwork(
Network.ETHEREUM,
ChainId.ETHEREUM_SEPOLIA
)
)
)
expect(dbClientQueryMock.mock.calls[1][0]).toEqual(
Expand Down Expand Up @@ -289,6 +293,7 @@ test('catalog component', function () {
filters = {
network,
search,
// ids: ['id1', 'id2'],
}

latestSubgraphSchemaResponse = {
Expand Down Expand Up @@ -321,9 +326,16 @@ test('catalog component', function () {
)
)
expect(dbClientQueryMock.mock.calls[1][0]).toEqual(
getItemIdsBySearchTextQuery(latestSchema, filters.search)
getItemIdsBySearchTextQuery(
latestSchema,
filters.search,
filters.category
)
)
// It's repeated 4 times due to this WHERE statement: `WHERE word_wearable % $1 OR word_emote % $2 ORDER BY GREATEST(similarity(word_wearable, $3), similarity(word_emote, $4)) DESC;`
expect(dbClientQueryMock.mock.calls[1][0].values).toEqual(
Array(4).fill(search)
)
expect(dbClientQueryMock.mock.calls[1][0].values).toEqual([search])
})
})

Expand All @@ -348,7 +360,7 @@ test('catalog component', function () {
rowCount: 1,
})
})
it('should use the ids returned by the search query in the main catalog query', async () => {
it('should use the ids returned by the search query in the main catalog query and be sorted by them', async () => {
expect(await catalogComponent.fetch(filters)).toEqual({
data: [
{
Expand All @@ -367,9 +379,16 @@ test('catalog component', function () {
)
)
expect(dbClientQueryMock.mock.calls[1][0]).toEqual(
getItemIdsBySearchTextQuery(latestSchema, filters.search)
getItemIdsBySearchTextQuery(
latestSchema,
filters.search,
filters.category
)
)
// It's repeated 4 times due to this WHERE statement: `WHERE word_wearable % $1 OR word_emote % $2 ORDER BY GREATEST(similarity(word_wearable, $3), similarity(word_emote, $4)) DESC;`
expect(dbClientQueryMock.mock.calls[1][0].values).toEqual(
Array(4).fill(search)
)
expect(dbClientQueryMock.mock.calls[1][0].values).toEqual([search])
expect(dbClientQueryMock.mock.calls[2][0]).toEqual(
getCatalogQuery({ [network]: latestSchema }, filters)
)
Expand Down

0 comments on commit 9f520b9

Please sign in to comment.