Skip to content

Commit

Permalink
Merge pull request #96 from SubramanyamChalla24/main
Browse files Browse the repository at this point in the history
added resume matcher ipynb
  • Loading branch information
srbhr authored Aug 5, 2023
2 parents e496814 + a7f3f67 commit e40f37b
Show file tree
Hide file tree
Showing 3 changed files with 171 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,5 @@ dmypy.json
# Vscode's Files
.vscode
.vscode/*

scripts/similarity/config.yml
167 changes: 167 additions & 0 deletions archive/resume_matcher.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"gpuType": "T4"
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "aHoRFk4LpFSZ",
"outputId": "0a950106-ea2a-498a-9dcc-e99458b1f139"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.5/44.5 kB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.7/2.7 MB\u001b[0m \u001b[31m30.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m132.5/132.5 kB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.4/75.4 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m304.5/304.5 kB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m74.5/74.5 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.5/57.5 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h"
]
}
],
"source": [
"!pip install cohere --quiet\n",
"!pip install qdrant-client --quiet"
]
},
{
"cell_type": "code",
"source": [
"import yaml\n",
"from qdrant_client import QdrantClient, models\n",
"from qdrant_client.http.models import Batch\n",
"import cohere\n",
"def read_config(filepath):\n",
" with open(filepath) as f:\n",
" config = yaml.safe_load(f)\n",
" return config\n",
"\n",
"class QdrantSearch:\n",
" def __init__(self, resumes, jd):\n",
" config = read_config(\"config.yml\")\n",
" self.cohere_key = config['cohere']['api_key']\n",
" self.qdrant_key = config['qdrant']['api_key']\n",
" self.qdrant_url = config['qdrant']['url']\n",
" self.resumes = resumes\n",
" self.jd = jd\n",
"\n",
" self.cohere = cohere.Client(self.cohere_key)\n",
"\n",
" self.qdrant = QdrantClient(\n",
" url=self.qdrant_url,\n",
" api_key=self.qdrant_key,\n",
" )\n",
"\n",
" vector_size = 4096\n",
" self.qdrant.recreate_collection(\n",
" collection_name=\"resume_matcher\",\n",
" vectors_config=models.VectorParams(\n",
" size=vector_size,\n",
" distance=models.Distance.COSINE\n",
" )\n",
" )\n",
"\n",
" def get_embedding(self, text):\n",
" embeddings = self.cohere.embed([text], \"large\").embeddings\n",
" return list(map(float, embeddings[0])), len(embeddings[0])\n",
"\n",
" def update_qdrant(self):\n",
" vectors = []\n",
" ids = []\n",
" for i, resume in enumerate(self.resumes):\n",
" vector, size = self.get_embedding(resume)\n",
" vectors.append(vector)\n",
" ids.append(i)\n",
"\n",
" self.qdrant.upsert(\n",
" collection_name=\"resume_matcher\",\n",
" points=Batch(\n",
" ids=ids,\n",
" vectors=vectors,\n",
" payloads=[{\"text\": resume} for resume in self.resumes]\n",
"\n",
" )\n",
" )\n",
"\n",
" def search(self):\n",
" vector, _ = self.get_embedding(self.jd)\n",
"\n",
" hits = self.qdrant.search(\n",
" collection_name=\"resume_matcher\",\n",
" query_vector=vector,\n",
" limit=30\n",
" )\n",
" results = []\n",
" for hit in hits:\n",
" result = {\n",
" 'text': str(hit.payload)[:30],\n",
" 'score': hit.score\n",
" }\n",
" results.append(result)\n",
"\n",
" return results\n"
],
"metadata": {
"id": "SXOgwcCATtww"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"resumes = [\"Professional Summary Highly skilled MERN Stack Developer with over 10 years of experience specializing in designing building and maintaining complex web applications Proficient in MongoDB Expressjs React and Nodejs Currently contributing to the development of AI technologies at OpenAI with a primary focus on the ChatGPT project Skills JavaScript and TypeScript MongoDB Expressjs React Nodejs MERN stack RESTful APIs Git and GitHub Docker and Kubernetes Agile and Scrum Python and Machine Learning basics Experience June 2020 PresentMERN Stack Developer OpenAI San Francisco USA Working on the development of the ChatGPT project using Nodejs Expressjs and React Implementing RESTful services for communication between frontend and backend Utilizing Docker and Kubernetes for deployment and management of applications Working in an Agile environment delivering highquality software every sprint Contributing to the design and implementation of machine learning algorithms for natural language processing tasks July 2015 May 2020Full Stack Developer Uber San Francisco USA Developed and maintained scalable web applications using MERN stack Ensured the performance quality and responsiveness of applications Successfully deployed solutions using Docker and Kubernetes Collaborated with a team of engineers product managers and UX designers Led a team of junior developers conducted code reviews and ensured adherence to best coding practices Worked closely with the data science team to optimize recommendation algorithms and enhance user experience June 2012 June 2015Software Developer Facebook Menlo Park USA Developed features for the Facebook web application using React Ensured the performance of the MongoDB databases Utilized RESTful APIs for communication between different parts of the application Worked in a fastpaced testdriven development environment Assisted in migrating the legacy system to a modern MERN stack architecture Education 2009 2012 PhD in Computer Science CalTech Pasadena USA 2007 2009 Master of Science in Computer Science MIT Cambridge USA 2003 2007 Bachelor of Science in Computer Science UC San Diego San Diego USA 1/2 Projects 2019 PresentPersonal Project Gotham Event Planner Created a fullfeatured web application to plan and organize events in Gotham city Used MERN stack for development and Docker for deployment The application allows users to create manage and share events and integrates with Google Maps API to display event locations 2/2\"]\n",
"job_description = \"Job Description Java Developer 3 Years of Experience Tech Solutions San Francisco CA USA About Us At Tech Solutions we believe in the power of technology to solve complex problems We are a dynamic forwardthinking tech company specializing in custom software solutions for various industries We are seeking a talented and experienced Java Developer to join our team Job Description We are seeking a skilled Java Developer with at least 3 years of experience in building highperforming scal able enterprisegrade applications You will be part of a talented software team that works on missioncritical applications Your roles and responsibilities will include managing Java/Java EE application development while providing expertise in the full software development lifecycle Responsibilities •Designing implementing and maintaining Java applications that are often highvolume and low latency required for missioncritical systems •Delivering high availability and performance •Contributing to all phases of the development lifecycle •Writing welldesigned efficient and testable code •Conducting software analysis programming testing and debugging •Ensuring designs comply with specifications •Preparing and producing releases of software components •Supporting continuous improvement by investigating alternatives and technologies and presenting these for architectural review Requirements •BS/MS degree in Computer Science Engineering or a related subject •Proven handson Software Development experience •Proven working experience in Java development •Handson experience in designing and developing applications using Java EE platforms •ObjectOriented Analysis and design using common design patterns •Profound insight of Java and JEE internals Classloading Memory Management Transaction man agement etc 1 •Excellent knowledge of Relational Databases SQL and ORM technologies JPA2 Hibernate •Experience in developing web applications using at least one popular web framework JSF Wicket GWT Spring MVC •Experience with testdriven development Benefits •Competitive salary package •Health dental and vision insurance •Retirement savings plan •Professional development opportunities •Flexible work hours Tech Solutions is proud to be an equal opportunity employer We celebrate diversity and are committed to creating an inclusive environment for all employees How to Apply To apply please submit your resume and a brief explanation of your relevant experience to 2\"\n",
"\n",
"qdrant_search = QdrantSearch(resumes, job_description)\n",
"\n",
"qdrant_search.update_qdrant()\n",
"\n",
"results = qdrant_search.search()\n",
"for r in results:\n",
" print(r)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "rlP3s5euo435",
"outputId": "3f4f15b6-d446-4491-d4d5-d9ba14a2a145"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{'text': \"{'text': 'Professional Summary\", 'score': 0.54787695}\n"
]
}
]
}
]
}
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,5 @@ validators==0.20.0
wasabi==1.1.2
watchdog==3.0.0
zipp==3.16.2

cohere~=4.19.2

0 comments on commit e40f37b

Please sign in to comment.