Skip to content

Commit

Permalink
fix: \
Browse files Browse the repository at this point in the history
  • Loading branch information
gitfrosh committed Apr 27, 2024
1 parent 86c6505 commit 1345913
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions db/convert-csv-to-json.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

def transform_objectid(text):
"""Replace MongoDB ObjectId references to proper JSON format."""
return re.sub(r'ObjectId\((.*?)\)', r'{"$oid": "\1"}', text)
# Use non-capturing group and directly format the string with $oid.
return re.sub(r'ObjectId\(([^)]+)\)', r'{"$oid": "\1"}', text)

def main():
os.makedirs('db/json', exist_ok=True) # Ensure the directory for JSON files exists
Expand All @@ -14,10 +15,10 @@ def main():
df = pd.read_csv(f'db/csv/{file}')
# Transform all string columns that may contain ObjectId references
for column in df.select_dtypes(include=['object']):
df[column] = df[column].apply(lambda x: transform_objectid(str(x)) if x else x)
df[column] = df[column].apply(lambda x: transform_objectid(str(x)) if pd.notna(x) else x)
# Save each dataframe as a JSON file with all objects in a single array
json_path = f'db/json/{file.replace(".csv", ".json")}'
df.to_json(json_path, orient='records', indent=4, ensure_ascii=False)
df.to_json(json_path, orient='records', indent=4)

if __name__ == "__main__":
main()

0 comments on commit 1345913

Please sign in to comment.