-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathnl_codi_recommend.py
212 lines (189 loc) · 7.32 KB
/
nl_codi_recommend.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
import json
import re
import os
from typing import List
from dotenv import load_dotenv
from flask import Blueprint, request, Response, jsonify
from vertexai.generative_models import GenerativeModel
from config import load_prompt
from vertexai.language_models import TextEmbeddingModel, TextEmbeddingInput
from google.cloud import bigquery, aiplatform
# .env 파일 로드
load_dotenv()
# Blueprint 생성
nl_codi_recommend_bp = Blueprint('nl_codi_recommend', __name__)
# GCP 설정
PROJECT_ID = os.getenv("PROJECT_ID")
LOCATION = "us-central1"
MODEL_NAME = "textembedding-gecko@003"
DATASET_ID = "vector_search"
TABLE_ID = "vector_test_table"
# 1. Vertex AI 임베딩 모델 사용
def embed_texts(texts: List[str], project_id: str, location: str, model_name: str = "textembedding-gecko@003") -> List[
List[float]]:
"""
Vertex AI Text Embedding 모델을 사용하여 텍스트 데이터를 임베딩 벡터로 변환
"""
aiplatform.init(project=project_id, location=location)
model = TextEmbeddingModel.from_pretrained(model_name)
inputs = [TextEmbeddingInput(text) for text in texts]
embeddings = model.get_embeddings(inputs)
return [embedding.values for embedding in embeddings]
# 2. BigQuery에서 코사인 유사도 계산 쿼리 실행
def query_similar_embeddings(project_id: str, dataset_id: str, table_id: str, user_embedding: List[float],
top_n: int = 5):
"""
BigQuery에서 사용자 임베딩과 데이터베이스의 임베딩 간 코사인 유사도를 계산하여 상위 N개 결과 반환
"""
client = bigquery.Client(project=project_id)
# 사용자 임베딩을 문자열 형태로 변환
user_embedding_str = ", ".join(map(str, user_embedding))
query = f"""
CREATE TEMP FUNCTION cosine_similarity(vec1 ARRAY<FLOAT64>, vec2 ARRAY<FLOAT64>) AS (
(
SELECT SUM(v1 * v2)
FROM UNNEST(vec1) AS v1 WITH OFFSET i
JOIN UNNEST(vec2) AS v2 WITH OFFSET j ON i = j
) /
(
SQRT(
(SELECT SUM(POW(v, 2)) FROM UNNEST(vec1) AS v)
) *
SQRT(
(SELECT SUM(POW(v, 2)) FROM UNNEST(vec2) AS v)
)
)
);
WITH user_embedding AS (
SELECT ARRAY[{user_embedding_str}] AS embedding
)
SELECT
codi_json,
cosine_similarity(user_embedding.embedding, table_embedding.embedding) AS similarity
FROM `{project_id}.{dataset_id}.{table_id}` AS table_embedding
CROSS JOIN user_embedding
ORDER BY similarity DESC
LIMIT {top_n};
"""
query_job = client.query(query)
return query_job.result()
def recommend_codi_to_gemini(user_codi, rag_data, natural_language):
multimodal_model = GenerativeModel(model_name="gemini-1.5-flash-002")
prompt = load_prompt("prompt/nl_codi_recommend_prompt.txt")
prompt = prompt.replace("{{USER_CLOTHES}}", user_codi).replace("{{RECOMMENDED_OUTFITS}}", rag_data).replace("{{USER_REQUIREMENT}}", natural_language)
print("***** prompt = ", prompt)
# 이미지 URI와 프롬프트 전송
response = multimodal_model.generate_content(
[
prompt
],
generation_config={
"temperature": 0.8, # temperature 설정
}
)
# 불필요한 ```json 구문 제거 및 JSON 파싱
codis = response.text if response else "No response text found"
json_match = re.search(r"\{.*\}", codis, re.DOTALL) # 중괄호로 시작하는 JSON 부분 추출
if json_match:
json_str = json_match.group(0) # JSON 부분만 추출
else:
json_str = "{}" # JSON 부분이 없을 때 빈 객체 반환
try:
codis_json = json.loads(json_str) if codis else {}
except json.JSONDecodeError as e:
codis_json = {"error": "Invalid JSON format received"}
return codis_json
@nl_codi_recommend_bp.route("/get_nl_codi", methods=["POST"])
def get_codi():
"""
자연어(NL) 코디 추천 API
---
tags:
- Recommendation
consumes:
- application/json
produces:
- application/json
parameters:
- in: body
name: body
required: true
description: 사용자가 선택한 데이터..? 사실 코디 추천 API랑 똑같음. 자연어가 추가됐음
schema:
type: object
properties:
natural_language:
type: string
example: "무슨무슨 옷을 추천해줘"
clothing:
type: array
items:
type: object
properties:
baseColor:
type: string
clothing_id:
type: integer
description:
type: string
mainCategory:
type: string
name:
type: string
pattern:
type: string
pointColor:
type: string
season:
type: string
style:
type: string
subCategory:
type: string
textile:
type: string
responses:
200:
description: 자연어 생각해서 생성한 코디 1개
schema:
type: object
properties:
clothing_ids:
type: array
items:
type: integer
example: [1, 2, 8]
description:
type: string
example: "부드러운 울 니트와 블랙 와이드 데님의 편안한 가을 남친룩."
hashtags:
type: array
items:
type: string
example: ["남친룩", "가을코디", "캐주얼", "데일리룩", "편안함"]
name:
type: string
example: "가을 남친룩"
"""
# 사용자 옷
nl_codi_request = request.get_json()
# 사용자 코디 데이터를 Vertex AI 임베딩 모델을 사용해 임베딩 벡터로 변환
natural_language = nl_codi_request.get('natural_language')
clothing = request.get_data(as_text=True)
clothing_list = [clothing]
natural_language_list = [natural_language]
clothing_embedding = embed_texts(clothing_list, PROJECT_ID, LOCATION, MODEL_NAME)
natural_language_embedding = embed_texts(natural_language_list, PROJECT_ID, LOCATION, MODEL_NAME)
# BigQuery에서 코사인 유사도 계산 및 상위 N개 결과 가져오기
a_result = query_similar_embeddings(PROJECT_ID, DATASET_ID, TABLE_ID, clothing_embedding[0], top_n=3)
b_result = query_similar_embeddings(PROJECT_ID, DATASET_ID, TABLE_ID, natural_language_embedding[0], top_n=3)
rag_data = ""
for row in a_result:
rag_data += row['codi_json']
for row in b_result:
rag_data += row['codi_json']
response = recommend_codi_to_gemini(clothing, rag_data, natural_language)
try:
return jsonify(response), 200
except Exception as e:
return jsonify({"error": str(e)}), 500