Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TSK-1237: Improve full text indexer #3025

Merged
merged 2 commits into from
Apr 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,4 @@ tsdoc-metadata.json
pods/front/dist
*.cpuprofile
*.pyc
metrics.txt
3 changes: 2 additions & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@
"ELASTIC_URL": "http://localhost:9200",
"MONGO_URL": "mongodb://localhost:27017",
"APM_SERVER_URL2": "http://localhost:8200",
"METRICS_CONSOLE": "true", // Show metrics in console evert 30 seconds.,
"METRICS_CONSOLE": "false",
"METRICS_FILE": "${workspaceRoot}/metrics.txt", // Show metrics in console evert 30 seconds.,
"MINIO_ENDPOINT": "localhost",
"MINIO_ACCESS_KEY": "minioadmin",
"MINIO_SECRET_KEY": "minioadmin",
Expand Down
2 changes: 1 addition & 1 deletion dev/generator/src/issues.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ export async function generateIssues (
await connection.close()
ctx.end()

console.info(metricsToString(ctx.metrics, 'Client'))
console.info(metricsToString(ctx.metrics, 'Client', 70))
}

async function genIssue (client: TxOperations, statuses: Ref<IssueStatus>[]): Promise<void> {
Expand Down
2 changes: 1 addition & 1 deletion dev/generator/src/recruit.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ export async function generateContacts (
await connection.close()
ctx.end()

console.info(metricsToString(ctx.metrics, 'Client'))
console.info(metricsToString(ctx.metrics, 'Client', 70))
}

async function genVacansyApplicants (
Expand Down
2 changes: 1 addition & 1 deletion dev/tool/src/clean.ts
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ export async function cleanRemovedTransactions (workspaceId: WorkspaceId, transa
)

count += toRemove.length
console.log('processed', count, removedDocs.total)
console.log('processed', count)
}

console.log('total docs with remove', count)
Expand Down
20 changes: 15 additions & 5 deletions models/core/src/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,13 @@ import {
Prop,
TypeBoolean,
TypeIntlString,
TypeRecord,
TypeRef,
TypeString,
TypeTimestamp,
UX
} from '@hcengineering/model'
import type { IntlString } from '@hcengineering/platform'
import { getEmbeddedLabel, IntlString } from '@hcengineering/platform'
import core from './component'

// C O R E
Expand Down Expand Up @@ -256,25 +257,34 @@ export class TFulltextData extends TDoc implements FullTextData {

@Model(core.class.DocIndexState, core.class.Doc, DOMAIN_DOC_INDEX_STATE)
export class TDocIndexState extends TDoc implements DocIndexState {
objectClass!: Ref<Class<Doc>>
@Prop(TypeRef(core.class.Class), core.string.Class)
@Index(IndexKind.Indexed)
@Hidden()
objectClass!: Ref<Class<Doc>>

@Prop(TypeRef(core.class.Doc), core.string.AttachedTo)
@Index(IndexKind.Indexed)
@Hidden()
attachedTo?: Ref<Doc>

@Prop(TypeRef(core.class.Doc), core.string.AttachedToClass)
@Prop(TypeRef(core.class.Class), core.string.AttachedToClass)
@Index(IndexKind.Indexed)
@Hidden()
attachedToClass?: Ref<Class<Doc>>

// Indexable attributes of document.
attributes!: Record<string, any>

removed!: boolean
@Prop(TypeBoolean(), getEmbeddedLabel('Removed'))
@Index(IndexKind.Indexed)
@Hidden()
removed!: boolean

// States for different stages
stages!: Record<string, boolean | string>
@Prop(TypeRecord(), getEmbeddedLabel('Stages'))
@Index(IndexKind.Indexed)
@Hidden()
stages!: Record<string, boolean | string>
}

@Model(core.class.IndexStageState, core.class.Doc, DOMAIN_DOC_INDEX_STATE)
Expand Down
29 changes: 28 additions & 1 deletion models/core/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ import {
AttachedDoc,
IndexingConfiguration,
Class,
systemAccountEmail
systemAccountEmail,
DocIndexState
} from '@hcengineering/core'
import { Builder } from '@hcengineering/model'
import core from './component'
Expand Down Expand Up @@ -157,4 +158,30 @@ export function createModel (builder: Builder): void {
]
}
)

builder.mixin<Class<DocIndexState>, IndexingConfiguration<TxCollectionCUD<Doc, AttachedDoc>>>(
core.class.DocIndexState,
core.class.Class,
core.mixin.IndexConfiguration,
{
indexes: [
{
_class: 1,
stages: 1,
_id: 1,
modifiedOn: 1
},
{
_class: 1,
_id: 1,
modifiedOn: 1
},
{
_class: 1,
_id: 1,
objectClass: 1
}
]
}
)
}
4 changes: 4 additions & 0 deletions models/gmail/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,10 @@ export function createModel (builder: Builder): void {
},
gmail.action.WriteEmail
)

builder.mixin(gmail.class.Message, core.class.Class, core.mixin.FullTextSearchContext, {
parentPropagate: false
})
}

export { gmailOperation } from './migration'
6 changes: 3 additions & 3 deletions models/recruit/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1117,19 +1117,19 @@ export function createModel (builder: Builder): void {
// Allow to use fuzzy search for mixins
builder.mixin(recruit.class.Vacancy, core.class.Class, core.mixin.FullTextSearchContext, {
fullTextSummary: true,
propogate: []
propagate: []
})

builder.mixin(recruit.mixin.Candidate, core.class.Class, core.mixin.FullTextSearchContext, {
fullTextSummary: true,
propogate: [recruit.class.Applicant]
propagate: [recruit.class.Applicant]
})

// Allow to use fuzzy search for mixins
builder.mixin(recruit.class.Applicant, core.class.Class, core.mixin.FullTextSearchContext, {
fullTextSummary: true,
forceIndex: true,
propogate: []
propagate: []
})

createAction(builder, {
Expand Down
4 changes: 4 additions & 0 deletions models/telegram/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,10 @@ export function createModel (builder: Builder): void {
},
telegram.ids.TxSharedCreate
)

builder.mixin(telegram.class.Message, core.class.Class, core.mixin.FullTextSearchContext, {
parentPropagate: false
})
}

export { telegramOperation } from './migration'
5 changes: 4 additions & 1 deletion packages/core/src/classes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,10 @@ export interface FullTextSearchContext extends Class<Doc> {
forceIndex?: boolean

// If defined, will propagate changes to child's with defined set of classes
propogate?: Ref<Class<Doc>>[]
propagate?: Ref<Class<Doc>>[]

// Do we need to propagate child value to parent one. Default(true)
parentPropagate?: boolean
}

/**
Expand Down
19 changes: 15 additions & 4 deletions packages/core/src/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import { Tx, TxCUD, TxCollectionCUD, TxCreateDoc, TxProcessor, TxUpdateDoc } fro
import { toFindResult } from './utils'

const transactionThreshold = 500
const modelTransactionThreshold = 50

/**
* @public
Expand Down Expand Up @@ -194,7 +195,11 @@ export async function createClient (
const oldOnConnect: ((apply: boolean) => void) | undefined = conn.onConnect
conn.onConnect = async () => {
// Find all new transactions and apply
await loadModel(conn, loadedTxIds, allowedPlugins, configs, hierarchy, model)
if (!(await loadModel(conn, loadedTxIds, allowedPlugins, configs, hierarchy, model, true))) {
// We need full refresh
await oldOnConnect?.(false)
return
}

// We need to look for last {transactionThreshold} transactions and if it is more since lastTx one we receive, we need to perform full refresh.
const atxes = await conn.findAll(
Expand All @@ -216,7 +221,7 @@ export async function createClient (
}
}

if (atxes.total < transactionThreshold && !needFullRefresh) {
if (atxes.length < transactionThreshold && !needFullRefresh) {
console.log('applying input transactions', atxes.length)
for (const tx of atxes) {
txHandler(tx)
Expand All @@ -236,8 +241,9 @@ async function loadModel (
allowedPlugins: Plugin[] | undefined,
configs: Map<Ref<PluginConfiguration>, PluginConfiguration>,
hierarchy: Hierarchy,
model: ModelDb
): Promise<void> {
model: ModelDb,
reload = false
): Promise<boolean> {
const t = Date.now()

const atxes = await conn.findAll(
Expand All @@ -246,6 +252,10 @@ async function loadModel (
{ sort: { modifiedOn: SortingOrder.Ascending, _id: SortingOrder.Ascending } }
)

if (reload && atxes.length > modelTransactionThreshold) {
return true
}

let systemTx: Tx[] = []
const userTx: Tx[] = []
console.log('find' + (processedTx.size === 0 ? 'full model' : 'model diff'), atxes.length, Date.now() - t)
Expand Down Expand Up @@ -289,6 +299,7 @@ async function loadModel (
console.error('failed to apply model transaction, skipping', JSON.stringify(tx), err)
}
}
return false
}

function fillConfiguration (systemTx: Tx[], configs: Map<Ref<PluginConfiguration>, PluginConfiguration>): void {
Expand Down
2 changes: 2 additions & 0 deletions packages/core/src/component.ts
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ export default plugin(coreId, {
TypeHyperlink: '' as Ref<Class<Type<Hyperlink>>>,
TypeNumber: '' as Ref<Class<Type<number>>>,
TypeMarkup: '' as Ref<Class<Type<string>>>,
TypeRecord: '' as Ref<Class<Type<Record<any, any>>>>,
TypeBoolean: '' as Ref<Class<Type<boolean>>>,
TypeTimestamp: '' as Ref<Class<Type<Timestamp>>>,
TypeDate: '' as Ref<Class<Type<Timestamp | Date>>>,
Expand Down Expand Up @@ -151,6 +152,7 @@ export default plugin(coreId, {
AttachedTo: '' as IntlString,
AttachedToClass: '' as IntlString,
String: '' as IntlString,
Record: '' as IntlString,
Markup: '' as IntlString,
Number: '' as IntlString,
Boolean: '' as IntlString,
Expand Down
77 changes: 64 additions & 13 deletions packages/core/src/measurements/metrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,10 @@ export function childMetrics (root: Metrics, path: string[]): Metrics {
return oop
}

function aggregate (m: Metrics): Metrics {
/**
* @public
*/
export function metricsAggregate (m: Metrics): Metrics {
const ms = aggregateMetrics(m.measurements)

// Use child overage, if there is no top level value specified.
Expand Down Expand Up @@ -105,34 +108,38 @@ function aggregate (m: Metrics): Metrics {
function aggregateMetrics (m: Record<string, Metrics>): Record<string, Metrics> {
const result: Record<string, Metrics> = {}
for (const [k, v] of Object.entries(m).sort((a, b) => b[1].time - a[1].time)) {
result[k] = aggregate(v)
result[k] = metricsAggregate(v)
}
return result
}

function toLen (val: string, sep: string, len = 50): string {
function toLen (val: string, sep: string, len: number): string {
while (val.length < len) {
val += sep
}
return val
}

function printMetricsChildren (params: Record<string, Metrics>, offset: number): string {
function printMetricsChildren (params: Record<string, Metrics>, offset: number, length: number): string {
let r = ''
if (Object.keys(params).length > 0) {
r += '\n' + toLen('', ' ', offset)
r += Object.entries(params)
.map(([k, vv]) => toString(k, vv, offset))
.map(([k, vv]) => toString(k, vv, offset, length))
.join('\n' + toLen('', ' ', offset))
}
return r
}

function printMetricsParams (params: Record<string, Record<string, MetricsData>>, offset: number): string {
function printMetricsParams (
params: Record<string, Record<string, MetricsData>>,
offset: number,
length: number
): string {
let r = ''
const joinP = (key: string, data: Record<string, MetricsData>): string[] => {
return Object.entries(data).map(([k, vv]) =>
`${toLen('', ' ', offset)}${toLen(key + '=' + k, '-', 70 - offset)}: avg ${
`${toLen('', ' ', offset)}${toLen(key + '=' + k, '-', length - offset)}: avg ${
vv.time / (vv.operations > 0 ? vv.operations : 1)
} total: ${vv.time} ops: ${vv.operations}`.trim()
)
Expand All @@ -145,18 +152,62 @@ function printMetricsParams (params: Record<string, Record<string, MetricsData>>
return r
}

function toString (name: string, m: Metrics, offset: number): string {
let r = `${toLen('', ' ', offset)}${toLen(name, '-', 70 - offset)}: avg ${
function toString (name: string, m: Metrics, offset: number, length: number): string {
let r = `${toLen('', ' ', offset)}${toLen(name, '-', length - offset)}: avg ${
m.time / (m.operations > 0 ? m.operations : 1)
} total: ${m.time} ops: ${m.operations}`.trim()
r += printMetricsParams(m.params, offset + 4)
r += printMetricsChildren(m.measurements, offset + 4)
r += printMetricsParams(m.params, offset + 4, length)
r += printMetricsChildren(m.measurements, offset + 4, length)
return r
}

/**
* @public
*/
export function metricsToString (metrics: Metrics, name = 'System', length: number): string {
return toString(name, metricsAggregate(metrics), 0, length)
}

function printMetricsParamsRows (
params: Record<string, Record<string, MetricsData>>,
offset: number
): (string | number)[][] {
const r: (string | number)[][] = []
function joinP (key: string, data: Record<string, MetricsData>): (string | number)[][] {
return Object.entries(data).map(([k, vv]) => [
offset,
`${key}=${k}`,
vv.time / (vv.operations > 0 ? vv.operations : 1),
vv.time,
vv.operations
])
}
for (const [k, v] of Object.entries(params)) {
r.push(...joinP(k, v))
}
return r
}

function printMetricsChildrenRows (params: Record<string, Metrics>, offset: number): (string | number)[][] {
const r: (string | number)[][] = []
if (Object.keys(params).length > 0) {
Object.entries(params).forEach(([k, vv]) => r.push(...toStringRows(k, vv, offset)))
}
return r
}

function toStringRows (name: string, m: Metrics, offset: number): (number | string)[][] {
const r: (number | string)[][] = [
[offset, name, m.time / (m.operations > 0 ? m.operations : 1), m.time, m.operations]
]
r.push(...printMetricsParamsRows(m.params, offset + 1))
r.push(...printMetricsChildrenRows(m.measurements, offset + 1))
return r
}

/**
* @public
*/
export function metricsToString (metrics: Metrics, name = 'System'): string {
return toString(name, aggregate(metrics), 0)
export function metricsToRows (metrics: Metrics, name = 'System'): (number | string)[][] {
return toStringRows(name, metricsAggregate(metrics), 0)
}
Loading