Skip to content

Commit

Permalink
chore: better teacher name comparision
Browse files Browse the repository at this point in the history
  • Loading branch information
Joabesv committed Sep 20, 2024
1 parent f821df7 commit 8e57ee2
Show file tree
Hide file tree
Showing 4 changed files with 126 additions and 44 deletions.
2 changes: 2 additions & 0 deletions apps/core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
"mongoose-lean-virtuals": "^0.9.1",
"ms": "^2.1.3",
"ofetch": "^1.3.4",
"string-similarity": "^4.0.4",
"ua-parser-js": "^1.0.38",
"unstorage": "^1.10.2",
"xlsx": "https://cdn.sheetjs.com/xlsx-0.20.1/xlsx-0.20.1.tgz",
Expand All @@ -52,6 +53,7 @@
"@types/lodash-es": "^4.17.12",
"@types/ms": "^0.7.34",
"@types/node": "^20.14.7",
"@types/string-similarity": "^4.0.2",
"@types/ua-parser-js": "^0.7.39",
"esbuild": "^0.23.0",
"esbuild-plugin-pino": "^2.2.0",
Expand Down
49 changes: 49 additions & 0 deletions apps/core/src/models/Teacher.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { type InferSchemaType, Schema, model } from 'mongoose';
import { mongooseLeanVirtuals } from 'mongoose-lean-virtuals';
import stringSimilarity from 'string-similarity';

const teacherSchema = new Schema(
{
Expand All @@ -8,11 +9,59 @@ const teacherSchema = new Schema(
},
{
timestamps: true,
statics: {
findByFuzzName: async function (name: string) {
const exactMatch = await this.findOne({
$or: [
{
name,
},
{ alias: name },
],
});

if (exactMatch) {
return exactMatch;
}

const teachers = await this.find({});
const bestMatch = teachers.reduce<any>(
(best, teacher) => {
const similarity = Math.max(
stringSimilarity.compareTwoStrings(name, teacher.name),
...teacher.alias.map((alias) =>
stringSimilarity.compareTwoStrings(
name,
alias.toLowerCase().replace(/[^a-z]/g, ''),
),
),
);
return similarity > best.similarity
? { teacher, similarity }
: best;
},
{ teacher: null, similarity: 0 },
);

if (bestMatch.similarity > 0.8) {
return bestMatch.teacher;
}

return null;
},
},
},
);

teacherSchema.plugin(mongooseLeanVirtuals);

teacherSchema.pre('save', function (next) {
if (this.isNew) {
this.name = this.name.toLowerCase();
}
next();
});

export type Teacher = InferSchemaType<typeof teacherSchema>;
export type TeacherDocument = ReturnType<(typeof TeacherModel)['hydrate']>;
export const TeacherModel = model('teachers', teacherSchema);
102 changes: 58 additions & 44 deletions apps/core/src/modules/sync/handlers/componentsTeachers.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import { createHash } from 'node:crypto';
import { batchInsertItems, generateIdentifier } from '@next/common';
import { batchInsertItems, generateIdentifier, logger } from '@next/common';
import { TeacherModel } from '@/models/Teacher.js';
import { ComponentModel } from '@/models/Component.js';
import { z } from 'zod';
import { ufProcessor } from '@/services/ufprocessor.js';
import type { FastifyReply, FastifyRequest } from 'fastify';
import type { Types } from 'mongoose';

const validateComponentTeachersBody = z.object({
hash: z.string().optional(),
Expand All @@ -23,62 +22,77 @@ export async function componentsTeachers(
) {
const { season, hash, link, ignoreErrors } =
validateComponentTeachersBody.parse(request.body);
const teachers = await TeacherModel.find({}).lean(true);
const teacherMap = new Map<string, Types.ObjectId>();
for (const teacher of teachers) {
teacherMap.set(teacher.name.toLocaleLowerCase(), teacher._id);
for (const alias of teacher?.alias || []) {
teacherMap.set(alias, teacher._id);
}
}
const componentsWithTeachers = await ufProcessor.getComponentsFile(link);
const errors: string[] = [];
const nextComponentWithTeachers = componentsWithTeachers.map((component) => {
if (!component.name) {
errors.push(
`Missing required field for component: ${component.UFComponentCode || 'Unknown'}`,
);

const teacherCache = new Map();

const findTeacher = async (name: string | null) => {
if (!name) {
return null;
}
const caseSafeName = name.toLowerCase();

if (
component.teachers?.professor &&
!teacherMap.has(component.teachers.professor)
) {
errors.push(component.teachers.professor);
if (teacherCache.has(caseSafeName)) {
return teacherCache.get(caseSafeName);
}
if (
component.teachers?.practice &&
!teacherMap.has(component.teachers.practice)
) {
errors.push(component.teachers.practice);

const teacher = await TeacherModel.findByFuzzName(caseSafeName);

if (!teacher) {
errors.push(caseSafeName);
teacherCache.set(caseSafeName, null);
return null;
}

const findTeacher = (name: string | null) => {
if (!name) {
return null;
if (!teacher.alias.includes(caseSafeName)) {
await TeacherModel.findByIdAndUpdate(teacher._id, {
$addToSet: { alias: caseSafeName },
});
}

teacherCache.set(caseSafeName, teacher._id);
return teacher._id;
};

const nextComponentWithTeachersPromises = componentsWithTeachers.map(
async (component) => {
if (!component.name) {
errors.push(
`Missing required field for component: ${component.UFComponentCode || 'Unknown'}`,
);
}

return teacherMap.get(name) || null;
};
const [teoria, pratica] = await Promise.all([
findTeacher(component.teachers?.professor),
findTeacher(component.teachers?.practice),
]);

return {
disciplina_id: component.UFComponentId,
codigo: component.UFComponentCode,
disciplina: component.name,
campus: component.campus,
turma: component.turma,
turno: component.turno,
vagas: component.vacancies,
teoria: findTeacher(component.teachers?.professor),
pratica: findTeacher(component.teachers?.practice),
season,
};
});
return {
disciplina_id: component.UFComponentId,
codigo: component.UFComponentCode,
disciplina: component.name,
campus: component.campus,
turma: component.turma,
turno: component.turno,
vagas: component.vacancies,
teoria,
pratica,
season,
};
},
);

const nextComponentWithTeachers = await Promise.all(
nextComponentWithTeachersPromises,
);

if (!ignoreErrors && errors.length > 0) {
const errorsSet = [...new Set(errors)];
return reply.status(403).send({
msg: 'Missing professors while parsing',
names: [...new Set(errors)],
names: errorsSet,
size: errorsSet.length,
});
}

Expand Down
17 changes: 17 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 8e57ee2

Please sign in to comment.