-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #67 from boostcampaitech4lv23cv2/build
[Release] Version 1.1.2
- Loading branch information
Showing
13 changed files
with
666 additions
and
84 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import pandas as pd | ||
import argparse | ||
import glob | ||
import os | ||
from datetime import datetime | ||
|
||
def make_csv(data: str, path: str, date: str): | ||
"""maek csv function | ||
Args: | ||
data (str): dataset | ||
path (str): folder path to make csv | ||
date (str): today date | ||
""" | ||
today_path = os.path.join(path, date) | ||
files = os.listdir(today_path) | ||
csv_path = os.path.join(path, data+'.csv') | ||
df = pd.read_csv(csv_path) | ||
|
||
for file in files: | ||
file = file[:-4].split('_') | ||
print(file) | ||
if (int(file[3]) == 2 and int(file[2]) >= 70) or (int(file[3]) == 1 and int(file[2]) >= 80): | ||
new_row = {'img_path': f"{date}/"+file[0]+'.jpg', 'categories_id': file[1]} | ||
df = df.append(new_row, ignore_index=True) | ||
|
||
df.to_csv(os.path.join(path, data+'.csv'), columns = ['img_path', 'categories_id'], index = False) | ||
|
||
def rename_file(path: str, date: str): | ||
"""rename file | ||
Args: | ||
path (str): folder path to make csv | ||
date (str): today date | ||
""" | ||
today_path = os.path.join(path, date) | ||
files = os.listdir(today_path) | ||
for file in files: | ||
f_split = file[:-4].split('_') | ||
os.rename(os.path.join(today_path, file), os.path.join(today_path, f_split[0]+".jpg")) | ||
|
||
if __name__ == "__main__": | ||
today = datetime.today().strftime('%Y%m%d') | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument('--fpath', type=str, default='/opt/ml/data2/fish', help='rename folder path') | ||
parser.add_argument('--spath', type=str, default='/opt/ml/data2/sashimi', help='rename folder path') | ||
parser.add_argument('--date', type=str, default=today, help='today date') | ||
|
||
args = parser.parse_args() | ||
|
||
make_csv('fish', args.fpath, args.date) | ||
make_csv('sashimi', args.spath, args.date) | ||
|
||
rename_file(args.fpath, args.date) | ||
rename_file(args.spath, args.date) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,33 @@ | ||
# Airflow | ||
|
||
## GCP 준비 | ||
> Airflow 서버를 gcp에서 구동시키고 다른 계정의 gcp bucket과 aistages 서버에 연결하여 이미지를 옮길 수 있도록 설정하였습니다. | ||
### GCP 준비 | ||
- https://medium.com/apache-airflow/a-simple-guide-to-start-using-apache-airflow-2-on-google-cloud-1811c2127445 | ||
- https://ruuci.tistory.com/6 | ||
|
||
## Docker compose를 이용하여 Airflow 설치 | ||
### Docker compose를 이용하여 Airflow 설치 | ||
- https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html | ||
- https://mep997.tistory.com/27 | ||
|
||
## Google Cloud Connection | ||
### Google Cloud Connection | ||
- Connection Id = "원하는 이름" | ||
- Connection Type = Google Cloud | ||
- Keyfile Path = gcp에 업로드 해 놓은 keyfile 절대경로 | ||
- ex) /opt/airflow/dags/key.json | ||
- 주의!) keyfile path, keyfile json, keyfile secret name은 중복하여 작성X | ||
- key file을 생성한 서비스 계정의 권한은 storage object admin으로 설정해야 파일에 접근할 수 있다. | ||
|
||
## SFTP server Connection | ||
### SFTP server Connection | ||
- Connection Id = "원하는 이름" | ||
- Connection Type = SFTP | ||
- Host = 연결할 서버의 IP 주소 | ||
- username = 서버의 username | ||
- Port = sftp port 번호 | ||
- Extra = gcp에 업로드 해 놓은 ssh keyfile의 위치를 json 형식으로 입력 | ||
- ex) {"key_file": "/opt/airflow/dags/key} | ||
- 참고자료: https://docs.aws.amazon.com/ko_kr/mwaa/latest/userguide/samples-ssh.html | ||
|
||
### Dag 작성 | ||
- gcs to sftp: https://airflow.apache.org/docs/apache-airflow-providers-google/stable/operators/transfer/gcs_to_sftp.html | ||
- execute bash command on sftp server: https://airflow.apache.org/docs/apache-airflow-providers-ssh/stable/_api/airflow/providers/ssh/operators/ssh/index.html |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
""" | ||
Example Airflow DAG for Google Cloud Storage to SFTP transfer operators. | ||
""" | ||
from __future__ import annotations | ||
|
||
import os | ||
from datetime import datetime | ||
from pathlib import Path | ||
|
||
from airflow import models | ||
from airflow.providers.google.cloud.transfers.gcs_to_sftp import GCSToSFTPOperator | ||
from airflow.providers.google.cloud.operators.gcs import GCSDeleteObjectsOperator | ||
from airflow.operators.dummy import DummyOperator | ||
from airflow.providers.ssh.operators.ssh import SSHOperator | ||
from airflow.providers.ssh.hooks.ssh import SSHHook | ||
from airflow.utils.trigger_rule import TriggerRule | ||
|
||
ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID") | ||
PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT") | ||
DAG_ID = "gcs_to_sftp" | ||
|
||
SFTP_CONN_ID = "aistages" | ||
BUCKET_NAME = "user-data-cv13" | ||
DESTINATION_PATH_FISH = "/opt/ml/data2/fish" | ||
DESTINATION_PATH_SASHIMI = "/opt/ml/data2/sashimi" | ||
GCS_FISH_DIR = "fish/*" | ||
GCS_SASHIMI_DIR = "sashimi/*" | ||
date = "{{ ds_nodash }}" | ||
sshHook = SSHHook(ssh_conn_id=SFTP_CONN_ID) | ||
|
||
with models.DAG( | ||
DAG_ID, | ||
schedule="@once", | ||
start_date=datetime(2023, 2, 5), | ||
catchup=False, | ||
tags=["example", "gcs"], | ||
) as dag: | ||
|
||
fish_image_delete = GCSDeleteObjectsOperator( | ||
task_id="fish-image-delete", | ||
bucket_name='ficv_dataset', | ||
prefix='fish/' | ||
) | ||
|
||
sashimi_image_delete = GCSDeleteObjectsOperator( | ||
task_id="sashimi-image-delete", | ||
bucket_name='ficv_dataset', | ||
prefix='sashimi/' | ||
) | ||
|
||
( | ||
fish_image_delete >> sashimi_image_delete | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
""" | ||
Example Airflow DAG for Google Cloud Storage to SFTP transfer operators. | ||
""" | ||
from __future__ import annotations | ||
|
||
import os | ||
from datetime import datetime | ||
from pathlib import Path | ||
|
||
from airflow import models | ||
from airflow.providers.google.cloud.transfers.gcs_to_sftp import GCSToSFTPOperator | ||
from airflow.providers.google.cloud.operators.gcs import GCSDeleteObjectsOperator | ||
from airflow.operators.dummy import DummyOperator | ||
from airflow.providers.ssh.operators.ssh import SSHOperator | ||
from airflow.providers.ssh.hooks.ssh import SSHHook | ||
from airflow.utils.trigger_rule import TriggerRule | ||
|
||
ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID") | ||
PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT") | ||
DAG_ID = "ficv_pipeline" | ||
|
||
SFTP_CONN_ID = "aistages" | ||
BUCKET_NAME = "user-data-cv13" | ||
DESTINATION_PATH_FISH = "/opt/ml/data2/fish" | ||
DESTINATION_PATH_SASHIMI = "/opt/ml/data2/sashimi" | ||
GCS_FISH_DIR = "fish/*" | ||
GCS_SASHIMI_DIR = "sashimi/*" | ||
date = "{{ ds_nodash }}" | ||
sshHook = SSHHook(ssh_conn_id=SFTP_CONN_ID) | ||
|
||
with models.DAG( | ||
DAG_ID, | ||
schedule="@once", | ||
start_date=datetime(2023, 2, 5), | ||
catchup=False, | ||
tags=["example", "gcs"], | ||
) as dag: | ||
|
||
dummy = DummyOperator( | ||
task_id = "start", | ||
) | ||
|
||
# today folder name | ||
fish_mkdir_today = SSHOperator( | ||
task_id="fish-mkdir-today", | ||
command="mkdir -p {{params.fish_des}}/{{ ds_nodash }}", | ||
ssh_hook=sshHook, | ||
params={'fish_des': DESTINATION_PATH_FISH, | ||
'date': date, | ||
} | ||
) | ||
|
||
sashimi_mkdir_today = SSHOperator( | ||
task_id="sashimi-mkdir-today", | ||
command="mkdir -p {{params.sashimi_des}}/{{ ds_nodash }}", | ||
ssh_hook=sshHook, | ||
params={'sashimi_des': DESTINATION_PATH_SASHIMI, | ||
'date': date, | ||
} | ||
) | ||
|
||
|
||
# [START howto_operator_gcs_to_sftp_move_specific_files] | ||
move_dir_fish = GCSToSFTPOperator( | ||
task_id="fish-move-gsc-to-sftp", | ||
sftp_conn_id=SFTP_CONN_ID, | ||
source_bucket=BUCKET_NAME, | ||
source_object=GCS_FISH_DIR, | ||
destination_path=os.path.join(DESTINATION_PATH_FISH, date), | ||
keep_directory_structure=False, | ||
) | ||
|
||
move_dir_sashimi = GCSToSFTPOperator( | ||
task_id="sashimi-move-gsc-to-sftp", | ||
sftp_conn_id=SFTP_CONN_ID, | ||
source_bucket=BUCKET_NAME, | ||
source_object=GCS_SASHIMI_DIR, | ||
destination_path=os.path.join(DESTINATION_PATH_SASHIMI, date), | ||
keep_directory_structure=False, | ||
) | ||
|
||
fish_image_delete = GCSDeleteObjectsOperator( | ||
task_id="fish-image-delete", | ||
bucket_name=BUCKET_NAME, | ||
prefix='fish/' | ||
) | ||
|
||
sashimi_image_delete = GCSDeleteObjectsOperator( | ||
task_id="sashimi-image-delete", | ||
bucket_name=BUCKET_NAME, | ||
prefix='sashimi/' | ||
) | ||
|
||
make_dataset = SSHOperator( | ||
task_id="make-dataset", | ||
command="python /opt/ml/final-project-level3-cv-13/Data/utils/pseudo_dataset.py --date=\"{{ ds_nodash }}\"", | ||
ssh_hook=sshHook, | ||
params={'sashimi_des': DESTINATION_PATH_SASHIMI, | ||
'date': date, | ||
} | ||
) | ||
|
||
kfold_dataset = SSHOperator( | ||
task_id="kfold-dataset", | ||
command="python /opt/ml/final-project-level3-cv-13/Data/utils/dataset_split.py", | ||
ssh_hook=sshHook, | ||
) | ||
|
||
|
||
|
||
( | ||
dummy >> fish_mkdir_today >> move_dir_fish >> fish_image_delete >> make_dataset, | ||
dummy >> sashimi_mkdir_today >> move_dir_sashimi >> sashimi_image_delete >> make_dataset, | ||
make_dataset >> kfold_dataset | ||
) |
Oops, something went wrong.