From 8e57ee24b0e4727d1d511bb3cd7b810af76e38b2 Mon Sep 17 00:00:00 2001 From: Joabesv Date: Fri, 20 Sep 2024 12:39:11 -0300 Subject: [PATCH] chore: better teacher name comparision --- apps/core/package.json | 2 + apps/core/src/models/Teacher.ts | 49 +++++++++ .../sync/handlers/componentsTeachers.ts | 102 ++++++++++-------- pnpm-lock.yaml | 17 +++ 4 files changed, 126 insertions(+), 44 deletions(-) diff --git a/apps/core/package.json b/apps/core/package.json index 949a2b02..38b5dc5a 100644 --- a/apps/core/package.json +++ b/apps/core/package.json @@ -40,6 +40,7 @@ "mongoose-lean-virtuals": "^0.9.1", "ms": "^2.1.3", "ofetch": "^1.3.4", + "string-similarity": "^4.0.4", "ua-parser-js": "^1.0.38", "unstorage": "^1.10.2", "xlsx": "https://cdn.sheetjs.com/xlsx-0.20.1/xlsx-0.20.1.tgz", @@ -52,6 +53,7 @@ "@types/lodash-es": "^4.17.12", "@types/ms": "^0.7.34", "@types/node": "^20.14.7", + "@types/string-similarity": "^4.0.2", "@types/ua-parser-js": "^0.7.39", "esbuild": "^0.23.0", "esbuild-plugin-pino": "^2.2.0", diff --git a/apps/core/src/models/Teacher.ts b/apps/core/src/models/Teacher.ts index 99dd9895..8eee2551 100644 --- a/apps/core/src/models/Teacher.ts +++ b/apps/core/src/models/Teacher.ts @@ -1,5 +1,6 @@ import { type InferSchemaType, Schema, model } from 'mongoose'; import { mongooseLeanVirtuals } from 'mongoose-lean-virtuals'; +import stringSimilarity from 'string-similarity'; const teacherSchema = new Schema( { @@ -8,11 +9,59 @@ const teacherSchema = new Schema( }, { timestamps: true, + statics: { + findByFuzzName: async function (name: string) { + const exactMatch = await this.findOne({ + $or: [ + { + name, + }, + { alias: name }, + ], + }); + + if (exactMatch) { + return exactMatch; + } + + const teachers = await this.find({}); + const bestMatch = teachers.reduce( + (best, teacher) => { + const similarity = Math.max( + stringSimilarity.compareTwoStrings(name, teacher.name), + ...teacher.alias.map((alias) => + stringSimilarity.compareTwoStrings( + name, + alias.toLowerCase().replace(/[^a-z]/g, ''), + ), + ), + ); + return similarity > best.similarity + ? { teacher, similarity } + : best; + }, + { teacher: null, similarity: 0 }, + ); + + if (bestMatch.similarity > 0.8) { + return bestMatch.teacher; + } + + return null; + }, + }, }, ); teacherSchema.plugin(mongooseLeanVirtuals); +teacherSchema.pre('save', function (next) { + if (this.isNew) { + this.name = this.name.toLowerCase(); + } + next(); +}); + export type Teacher = InferSchemaType; export type TeacherDocument = ReturnType<(typeof TeacherModel)['hydrate']>; export const TeacherModel = model('teachers', teacherSchema); diff --git a/apps/core/src/modules/sync/handlers/componentsTeachers.ts b/apps/core/src/modules/sync/handlers/componentsTeachers.ts index 2e9e811a..e5aa5cf9 100644 --- a/apps/core/src/modules/sync/handlers/componentsTeachers.ts +++ b/apps/core/src/modules/sync/handlers/componentsTeachers.ts @@ -1,11 +1,10 @@ import { createHash } from 'node:crypto'; -import { batchInsertItems, generateIdentifier } from '@next/common'; +import { batchInsertItems, generateIdentifier, logger } from '@next/common'; import { TeacherModel } from '@/models/Teacher.js'; import { ComponentModel } from '@/models/Component.js'; import { z } from 'zod'; import { ufProcessor } from '@/services/ufprocessor.js'; import type { FastifyReply, FastifyRequest } from 'fastify'; -import type { Types } from 'mongoose'; const validateComponentTeachersBody = z.object({ hash: z.string().optional(), @@ -23,62 +22,77 @@ export async function componentsTeachers( ) { const { season, hash, link, ignoreErrors } = validateComponentTeachersBody.parse(request.body); - const teachers = await TeacherModel.find({}).lean(true); - const teacherMap = new Map(); - for (const teacher of teachers) { - teacherMap.set(teacher.name.toLocaleLowerCase(), teacher._id); - for (const alias of teacher?.alias || []) { - teacherMap.set(alias, teacher._id); - } - } const componentsWithTeachers = await ufProcessor.getComponentsFile(link); const errors: string[] = []; - const nextComponentWithTeachers = componentsWithTeachers.map((component) => { - if (!component.name) { - errors.push( - `Missing required field for component: ${component.UFComponentCode || 'Unknown'}`, - ); + + const teacherCache = new Map(); + + const findTeacher = async (name: string | null) => { + if (!name) { + return null; } + const caseSafeName = name.toLowerCase(); - if ( - component.teachers?.professor && - !teacherMap.has(component.teachers.professor) - ) { - errors.push(component.teachers.professor); + if (teacherCache.has(caseSafeName)) { + return teacherCache.get(caseSafeName); } - if ( - component.teachers?.practice && - !teacherMap.has(component.teachers.practice) - ) { - errors.push(component.teachers.practice); + + const teacher = await TeacherModel.findByFuzzName(caseSafeName); + + if (!teacher) { + errors.push(caseSafeName); + teacherCache.set(caseSafeName, null); + return null; } - const findTeacher = (name: string | null) => { - if (!name) { - return null; + if (!teacher.alias.includes(caseSafeName)) { + await TeacherModel.findByIdAndUpdate(teacher._id, { + $addToSet: { alias: caseSafeName }, + }); + } + + teacherCache.set(caseSafeName, teacher._id); + return teacher._id; + }; + + const nextComponentWithTeachersPromises = componentsWithTeachers.map( + async (component) => { + if (!component.name) { + errors.push( + `Missing required field for component: ${component.UFComponentCode || 'Unknown'}`, + ); } - return teacherMap.get(name) || null; - }; + const [teoria, pratica] = await Promise.all([ + findTeacher(component.teachers?.professor), + findTeacher(component.teachers?.practice), + ]); - return { - disciplina_id: component.UFComponentId, - codigo: component.UFComponentCode, - disciplina: component.name, - campus: component.campus, - turma: component.turma, - turno: component.turno, - vagas: component.vacancies, - teoria: findTeacher(component.teachers?.professor), - pratica: findTeacher(component.teachers?.practice), - season, - }; - }); + return { + disciplina_id: component.UFComponentId, + codigo: component.UFComponentCode, + disciplina: component.name, + campus: component.campus, + turma: component.turma, + turno: component.turno, + vagas: component.vacancies, + teoria, + pratica, + season, + }; + }, + ); + + const nextComponentWithTeachers = await Promise.all( + nextComponentWithTeachersPromises, + ); if (!ignoreErrors && errors.length > 0) { + const errorsSet = [...new Set(errors)]; return reply.status(403).send({ msg: 'Missing professors while parsing', - names: [...new Set(errors)], + names: errorsSet, + size: errorsSet.length, }); } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 32b3711c..48cf14dc 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -83,6 +83,9 @@ importers: ofetch: specifier: ^1.3.4 version: 1.3.4 + string-similarity: + specifier: ^4.0.4 + version: 4.0.4 ua-parser-js: specifier: ^1.0.38 version: 1.0.38 @@ -114,6 +117,9 @@ importers: '@types/node': specifier: ^20.14.7 version: 20.14.14 + '@types/string-similarity': + specifier: ^4.0.2 + version: 4.0.2 '@types/ua-parser-js': specifier: ^0.7.39 version: 0.7.39 @@ -1221,6 +1227,9 @@ packages: '@types/ssh2@1.15.0': resolution: {integrity: sha512-YcT8jP5F8NzWeevWvcyrrLB3zcneVjzYY9ZDSMAMboI+2zR1qYWFhwsyOFVzT7Jorn67vqxC0FRiw8YyG9P1ww==} + '@types/string-similarity@4.0.2': + resolution: {integrity: sha512-LkJQ/jsXtCVMK+sKYAmX/8zEq+/46f1PTQw7YtmQwb74jemS1SlNLmARM2Zml9DgdDTWKAtc5L13WorpHPDjDA==} + '@types/ua-parser-js@0.7.39': resolution: {integrity: sha512-P/oDfpofrdtF5xw433SPALpdSchtJmY7nsJItf8h3KXqOslkbySh8zq4dSWXH2oTjRvJ5PczVEoCZPow6GicLg==} @@ -2787,6 +2796,10 @@ packages: streamx@2.18.0: resolution: {integrity: sha512-LLUC1TWdjVdn1weXGcSxyTR3T4+acB6tVGXT95y0nGbca4t4o/ng1wKAGTljm9VicuCVLvRlqFYXYy5GwgM7sQ==} + string-similarity@4.0.4: + resolution: {integrity: sha512-/q/8Q4Bl4ZKAPjj8WerIBJWALKkaPRfrvhfF8k/B23i4nzrlRj2/go1m90In7nG/3XDSbOo0+pu6RvCTM9RGMQ==} + deprecated: Package no longer supported. Contact Support at https://www.npmjs.com/support for more info. + string-width@4.2.3: resolution: {integrity: sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==} engines: {node: '>=8'} @@ -4406,6 +4419,8 @@ snapshots: dependencies: '@types/node': 18.19.43 + '@types/string-similarity@4.0.2': {} + '@types/ua-parser-js@0.7.39': {} '@types/webidl-conversions@7.0.3': {} @@ -6163,6 +6178,8 @@ snapshots: optionalDependencies: bare-events: 2.4.2 + string-similarity@4.0.4: {} + string-width@4.2.3: dependencies: emoji-regex: 8.0.0