From 9f520b94aaebdbf9ac3310d5e0764e1539f46f5a Mon Sep 17 00:00:00 2001 From: Juanma Hidalgo Date: Wed, 27 Sep 2023 19:09:38 +0200 Subject: [PATCH] feat: add trigram matching for the search pre-query --- src/ports/catalog/component.ts | 4 -- src/ports/catalog/queries.ts | 9 +-- src/ports/nfts/component.ts | 30 ++++++++++ src/ports/nfts/utils.ts | 8 ++- src/tests/ports/catalog-queries.spec.ts | 73 ++++++++++++++++++++++--- src/tests/ports/catalog.spec.ts | 43 +++++++++++---- 6 files changed, 137 insertions(+), 30 deletions(-) diff --git a/src/ports/catalog/component.ts b/src/ports/catalog/component.ts index 4d05d09..8ee2b7f 100644 --- a/src/ports/catalog/component.ts +++ b/src/ports/catalog/component.ts @@ -54,7 +54,6 @@ export function createCatalogComponent(options: { filters.category ) ) - console.log('filteredItemsById: ', filteredItemsById) filters.ids = [ ...(filters.ids ?? []), ...filteredItemsById.rows.map(({ id }) => id), @@ -67,10 +66,7 @@ export function createCatalogComponent(options: { } } const query = getCatalogQuery(reducedSchemas, filters) - console.log('query: ', query.text) - console.log('query: ', query.values) const results = await client.query(query) - console.log('results: ', results) catalogItems = results.rows.map((res) => fromCollectionsItemDbResultToCatalogItem(res, network) ) diff --git a/src/ports/catalog/queries.ts b/src/ports/catalog/queries.ts index 4162e6a..c6d37aa 100644 --- a/src/ports/catalog/queries.ts +++ b/src/ports/catalog/queries.ts @@ -21,7 +21,7 @@ const WEARABLE_ITEM_TYPES = [ const MAX_ORDER_TIMESTAMP = 253378408747000 // some orders have a timestmap that can't be cast by Postgres, this is the max possible value export function getOrderBy(filters: CatalogFilters) { - const { sortBy, sortDirection, isOnSale } = filters + const { sortBy, sortDirection, isOnSale, search } = filters const sortByParam = sortBy ?? CatalogSortBy.NEWEST const sortDirectionParam = sortDirection ?? CatalogSortDirection.DESC @@ -30,7 +30,8 @@ export function getOrderBy(filters: CatalogFilters) { return '' } - if (filters.search) { + if (search) { + // If the filters have a search term, we need to order by the position of the item in the search results that is pre-computed and passed in the ids filter. return SQL`ORDER BY array_position(${filters.ids}::text[], id) ` } @@ -125,7 +126,6 @@ export const getEmotePlayModeWhere = (filters: CatalogFilters) => { } export const getSearchWhere = (filters: CatalogFilters) => { - console.log('filters.category: ', filters.category) if (filters.category === NFTCategory.EMOTE) { return SQL`word % ${filters.search}` } else if (filters.category === NFTCategory.WEARABLE) { @@ -310,7 +310,6 @@ const getWhereWordsJoin = (category: CatalogQueryFilters['category']) => { return SQL` LEFT JOIN LATERAL unnest(string_to_array(metadata_wearable.name, ' ')) AS word_wearable ON TRUE LEFT JOIN LATERAL unnest(string_to_array(metadata_emote.name, ' ')) AS word_emote ON TRUE ` - // return SQL`JOIN LATERAL unnest(string_to_array(metadata_wearable.name, ' ')) AS word ON TRUE ` } const getMetadataJoins = (schemaVersion: string) => { @@ -472,7 +471,5 @@ export const getItemIdsBySearchTextQuery = ( : SQL` ORDER BY GREATEST(similarity(word_wearable, ${search}), similarity(word_emote, ${search})) DESC;` ) - console.log('query: ', query.text) - console.log('query: ', query.values) return query } diff --git a/src/ports/nfts/component.ts b/src/ports/nfts/component.ts index 4e92efb..8d8dbe2 100644 --- a/src/ports/nfts/component.ts +++ b/src/ports/nfts/component.ts @@ -1,16 +1,24 @@ import nodeFetch from 'node-fetch' import { NFTCategory, NFTFilters, NFTSortBy } from '@dcl/schemas' +import { IPgComponent } from '@well-known-components/pg-component' import { ISubgraphComponent } from '@well-known-components/thegraph-component' import { INFTsComponent, NFTResult } from './types' import { getByTokenIdQuery, getFetchOneQuery, getFetchQuery, + getFuzzySearchQueryForENS, getQueryVariables, } from './utils' +import { getMarketplaceChainId } from '../../logic/chainIds' +import { + getLatestSubgraphSchema, + getMarketplaceSubgraphNameChain, +} from '../../subgraphUtils' export function createNFTComponent(options: { subgraph: ISubgraphComponent + db?: IPgComponent listsServer?: string fragmentName: string getFragment: () => string @@ -22,6 +30,7 @@ export function createNFTComponent(options: { }): INFTsComponent { const { subgraph, + db, fragmentName, getFragment, getSortByProp, @@ -89,6 +98,27 @@ export function createNFTComponent(options: { ) } + if (options.category === NFTCategory.ENS && options.search && db) { + try { + const client = await db.getPool().connect() + const schemaName = await client.query<{ + entity_schema: string + }>( + getLatestSubgraphSchema( + getMarketplaceSubgraphNameChain(getMarketplaceChainId()) + ) + ) + const ids = await client.query<{ id: string }>( + getFuzzySearchQueryForENS( + schemaName.rows[0].entity_schema, + options.search + ) + ) + options.ids = ids.rows.map(({ id }) => id) + options.search = undefined + } catch (error) {} + } + const fetchFragments = getFragmentFetcher(options) const fragments = await fetchFragments() const nfts = fragments.map((fragment) => diff --git a/src/ports/nfts/utils.ts b/src/ports/nfts/utils.ts index 4221d60..25ecabd 100644 --- a/src/ports/nfts/utils.ts +++ b/src/ports/nfts/utils.ts @@ -1,3 +1,4 @@ +import SQL from 'sql-template-strings' import { EmotePlayMode, NFTCategory, NFTFilters, NFTSortBy } from '@dcl/schemas' import { getGenderFilterQuery } from '../utils' import { QueryVariables } from './types' @@ -201,7 +202,6 @@ export function getFetchQuery( let wrapWhere = false if (bannedNames.length) { - console.log('bannedNames inside getFetchQuery: ', bannedNames); where.push( `name_not_in: [${bannedNames.map((name) => `"${name}"`).join(', ')}]` ) @@ -355,3 +355,9 @@ export function getByTokenIdQuery( export function getId(contractAddress: string, tokenId: string) { return `${contractAddress}-${tokenId}` } + +export function getFuzzySearchQueryForENS(schema: string, searchTerm: string) { + return SQL`SELECT id from ` + .append(schema) + .append(SQL`.ens_active WHERE subdomain % ${searchTerm}`) +} diff --git a/src/tests/ports/catalog-queries.spec.ts b/src/tests/ports/catalog-queries.spec.ts index cdcad91..ce5de7a 100644 --- a/src/tests/ports/catalog-queries.spec.ts +++ b/src/tests/ports/catalog-queries.spec.ts @@ -405,6 +405,21 @@ test('catalog utils', () => { ) }) }) + describe('when the `search` filter is passed in the filter object', () => { + let ids: string[] + let search: string + beforeEach(() => { + ids = ['anId', 'anotherId'] + search = 'a search string' + }) + it('should ORDER BY the ids order in the array', () => { + const orderBy = getOrderBy({ search, ids }) as SQLStatement + expect(orderBy.text).toContain( + `ORDER BY array_position($1::text[], id) ` + ) + expect(orderBy.values).toContain(ids) + }) + }) }) describe('when adding the catalog "LIMIT" and "OFFSET" statements to the query', () => { @@ -487,17 +502,61 @@ test('catalog utils', () => { describe('and passing the "search" filter', () => { let search: string let schema: string + let category: NFTCategory beforeEach(() => { schema = 'aSchema' search = 'a search string' }) - it('should add the is search definition to the WHERE', () => { - expect(getItemIdsBySearchTextQuery(schema, search).text).toContain( - `items.search_text ILIKE '%' || $1 || '%'` - ) - expect( - getItemIdsBySearchTextQuery(schema, search).values - ).toStrictEqual([search]) + + describe('and there is no category', () => { + it('should both JOINs with metadata_wearable and metadata_emote and trigram matching operator', () => { + const query = getItemIdsBySearchTextQuery(schema, search, category) + expect(query.text).toContain( + `LEFT JOIN LATERAL unnest(string_to_array(metadata_wearable.name, ' ')) AS word_wearable ON TRUE ` + ) + expect(query.text).toContain( + `LEFT JOIN LATERAL unnest(string_to_array(metadata_emote.name, ' ')) AS word_emote ON TRUE ` + ) + expect(query.text).toContain( + `word_wearable % $1 OR word_emote % $2 ` + ) + // it appears four times `JOIN LATERAL unnest(string_to_array(metadata_emote.name, ' ')) AS word ON TRUE WHERE word % $1 ORDER BY GREATEST(similarity(word, $2))` + expect(query.values).toStrictEqual(Array(4).fill(search)) + }) + }) + + describe('and the category is Wearable', () => { + beforeEach(() => { + category = NFTCategory.WEARABLE + }) + it('should add JOIN with the metadata_wearable and trigram matching operator', () => { + const query = getItemIdsBySearchTextQuery(schema, search, category) + expect(query.text).toContain( + `JOIN LATERAL unnest(string_to_array(metadata_wearable.name, ' ')) AS word ON TRUE ` + ) + expect(query.text).toContain( + `word % $1 ` + ) + // it appears twice `JOIN LATERAL unnest(string_to_array(metadata_wearable.name, ' ')) AS word ON TRUE WHERE word % $1 ORDER BY GREATEST(similarity(word, $2))` + expect(query.values).toStrictEqual(Array(2).fill(search)) + }) + }) + + describe('and the category is Emote', () => { + beforeEach(() => { + category = NFTCategory.EMOTE + }) + it('should add JOIN with the metadata_emote', () => { + const query = getItemIdsBySearchTextQuery(schema, search, category) + expect(query.text).toContain( + `JOIN LATERAL unnest(string_to_array(metadata_emote.name, ' ')) AS word ON TRUE ` + ) + expect(query.text).toContain( + `word % $1 ` + ) + // it appears twice `JOIN LATERAL unnest(string_to_array(metadata_emote.name, ' ')) AS word ON TRUE WHERE word % $1 ORDER BY GREATEST(similarity(word, $2))` + expect(query.values).toStrictEqual(Array(2).fill(search)) + }) }) }) }) diff --git a/src/tests/ports/catalog.spec.ts b/src/tests/ports/catalog.spec.ts index 81f6c4e..cf5647c 100644 --- a/src/tests/ports/catalog.spec.ts +++ b/src/tests/ports/catalog.spec.ts @@ -2,9 +2,7 @@ import { CatalogFilters, ChainId, Network } from '@dcl/schemas' import { IPgComponent } from '@well-known-components/pg-component' import { createTestDbComponent, test } from '../../../src/tests/components' import { createCatalogComponent } from '../../ports/catalog/component' -import { - getItemIdsBySearchTextQuery, -} from '../../ports/catalog/queries' +import { getItemIdsBySearchTextQuery } from '../../ports/catalog/queries' import { CollectionsItemDBResult, ICatalogComponent, @@ -14,7 +12,10 @@ import { getCatalogQuery, } from '../../ports/catalog/utils' import { IFavoritesComponent, PickStats } from '../../ports/favorites/types' -import { getLatestSubgraphSchema, getSubgraphNameForNetwork } from '../../subgraphUtils' +import { + getLatestSubgraphSchema, + getSubgraphNameForNetwork, +} from '../../subgraphUtils' const mockedDBItemResponse: CollectionsItemDBResult = { id: '0xe42257bb4aada439179d736a64a736be0693a4ec-2', @@ -28,7 +29,7 @@ const mockedDBItemResponse: CollectionsItemDBResult = { name: 'Descension', loop: false, has_geometry: false, - has_sound: false + has_sound: false, }, image: 'https://peer-lb.decentraland.org/lambdas/collections/contents/urn:decentraland:matic:collections-v2:0xe42257bb4aada439179d736a64a736be0693a4ec:2/thumbnail', @@ -133,7 +134,7 @@ test('catalog component', function () { name: 'Descension', loop: false, has_geometry: false, - has_sound: false + has_sound: false, }, image: 'https://peer-lb.decentraland.org/lambdas/collections/contents/urn:decentraland:matic:collections-v2:0xe42257bb4aada439179d736a64a736be0693a4ec:2/thumbnail', @@ -254,7 +255,10 @@ test('catalog component', function () { expect(dbClientQueryMock.mock.calls.length).toEqual(3) // 2 for the schema name and 1 for the catalog query expect(dbClientQueryMock.mock.calls[0][0]).toEqual( getLatestSubgraphSchema( - getSubgraphNameForNetwork(Network.ETHEREUM, ChainId.ETHEREUM_SEPOLIA) + getSubgraphNameForNetwork( + Network.ETHEREUM, + ChainId.ETHEREUM_SEPOLIA + ) ) ) expect(dbClientQueryMock.mock.calls[1][0]).toEqual( @@ -289,6 +293,7 @@ test('catalog component', function () { filters = { network, search, + // ids: ['id1', 'id2'], } latestSubgraphSchemaResponse = { @@ -321,9 +326,16 @@ test('catalog component', function () { ) ) expect(dbClientQueryMock.mock.calls[1][0]).toEqual( - getItemIdsBySearchTextQuery(latestSchema, filters.search) + getItemIdsBySearchTextQuery( + latestSchema, + filters.search, + filters.category + ) + ) + // It's repeated 4 times due to this WHERE statement: `WHERE word_wearable % $1 OR word_emote % $2 ORDER BY GREATEST(similarity(word_wearable, $3), similarity(word_emote, $4)) DESC;` + expect(dbClientQueryMock.mock.calls[1][0].values).toEqual( + Array(4).fill(search) ) - expect(dbClientQueryMock.mock.calls[1][0].values).toEqual([search]) }) }) @@ -348,7 +360,7 @@ test('catalog component', function () { rowCount: 1, }) }) - it('should use the ids returned by the search query in the main catalog query', async () => { + it('should use the ids returned by the search query in the main catalog query and be sorted by them', async () => { expect(await catalogComponent.fetch(filters)).toEqual({ data: [ { @@ -367,9 +379,16 @@ test('catalog component', function () { ) ) expect(dbClientQueryMock.mock.calls[1][0]).toEqual( - getItemIdsBySearchTextQuery(latestSchema, filters.search) + getItemIdsBySearchTextQuery( + latestSchema, + filters.search, + filters.category + ) + ) + // It's repeated 4 times due to this WHERE statement: `WHERE word_wearable % $1 OR word_emote % $2 ORDER BY GREATEST(similarity(word_wearable, $3), similarity(word_emote, $4)) DESC;` + expect(dbClientQueryMock.mock.calls[1][0].values).toEqual( + Array(4).fill(search) ) - expect(dbClientQueryMock.mock.calls[1][0].values).toEqual([search]) expect(dbClientQueryMock.mock.calls[2][0]).toEqual( getCatalogQuery({ [network]: latestSchema }, filters) )