Skip to content

Commit

Permalink
Export to parquet instead of CSV
Browse files Browse the repository at this point in the history
  • Loading branch information
erfannariman committed Jun 4, 2024
1 parent ed1dd4e commit 2e83f76
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 12 deletions.
13 changes: 3 additions & 10 deletions df_to_azure/adf.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
Factory,
LinkedServiceReference,
LinkedServiceResource,
ParquetFormat,
PipelineResource,
SecureString,
SqlServerStoredProcedureActivity,
Expand Down Expand Up @@ -177,16 +178,8 @@ def create_input_blob(self):
ds_azure_blob = AzureBlobDataset(
linked_service_name=ds_ls,
folder_path=f"dftoazure/{self.table_name}",
file_name=f"{self.table_name}.csv",
format={
"type": "TextFormat",
"columnDelimiter": "^",
"rowDelimiter": "\n",
"treatEmptyAsNull": "true",
"skipLineCount": 0,
"firstRowAsHeader": "true",
"quoteChar": '"',
},
file_name=f"{self.table_name}.parquet", # Changed to parquet
format=ParquetFormat(), # Changed format to ParquetFormat
)
ds_azure_blob = DatasetResource(properties=ds_azure_blob)
self.adf_client.datasets.create_or_update(self.rg_name, self.df_name, ds_name, ds_azure_blob)
Expand Down
4 changes: 2 additions & 2 deletions df_to_azure/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,10 +183,10 @@ def upload_to_blob(self):
blob_client = self.blob_service_client()
blob_client = blob_client.get_blob_client(
container="dftoazure",
blob=f"{self.table_name}/{self.table_name}.csv",
blob=f"{self.table_name}/{self.table_name}.parquet",
)

data = self.df.to_csv(index=False, sep="^", quotechar='"', lineterminator="\n")
data = self.df.to_parquet(index=False)
blob_client.upload_blob(data, overwrite=True)

def create_schema(self):
Expand Down

0 comments on commit 2e83f76

Please sign in to comment.