Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ScriptAction - clone default mlc repo if nothing present #119

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/test-mlc-core-actions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -237,9 +237,10 @@ jobs:
run: |
cd .github/scripts && python -c "import test_mlc_access as test; test.test_find_repo()"

- name: Pull MLCOMMONS mlperf-automations repository
- name: Try running a script with no automation repo - should clone default mlcommons repo
run: |
mlc pull repo mlcommons@mlperf-automations --checkout=dev
mlc rm repo anandhu-eng@mlperf-automations
mlcr detect,os

- name: Run tests from test-mlc-access.py
run: |
Expand Down
2 changes: 1 addition & 1 deletion mlc/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def is_curdir_inside_path(base_path):
logger.warning(f"""Warning: {repo_path} not found. Considering it as a corrupt entry and deleting automatically...""")
logger.warning(f"Deleting the {meta_yaml_path} entry from repos.json")
from .repo_action import rm_repo
res = rm_repo(repo_path, os.path.join(self.repos_path, 'repos.json'), True)
res = rm_repo(repo_path, os.path.join(self.repos_path, 'repos.json'), self.repos, True)

if res["return"] > 0:
return res
Expand Down
330 changes: 169 additions & 161 deletions mlc/repo_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def add(self, run_args):
#check if its an URL
if utils.is_valid_url(i_repo_path):
if "github.com" in i_repo_path:
res = self.github_url_to_user_repo_format(i_repo_path)
res = github_url_to_user_repo_format(i_repo_path)
if res['return'] > 0:
return res
repo_folder_name = res['value']
Expand All @@ -62,40 +62,14 @@ def add(self, run_args):
return {'return': 0}

def conflicting_repo(self, repo_meta):
for repo_object in self.repos:
if repo_object.meta.get('uid', '') == '':
return {"return": 1, "error": f"UID is not present in file 'meta.yaml' in the repo path {repo_object.path}"}
if repo_meta["uid"] == repo_object.meta.get('uid', ''):
if repo_meta['path'] == repo_object.path:
return {"return": 1, "error": f"Same repo is already registered"}
else:
return {"return": 1, "error": f"Conflicting with repo in the path {repo_object.path}", "conflicting_path": repo_object.path}
return {"return": 0}

def register_repo(self, repo_path, repo_meta):
return conflicting_repo(repo_meta, self.repos)

if repo_meta.get('deps'):
for dep in repo_meta['deps']:
self.pull_repo(dep['url'], branch=dep.get('branch'), checkout=dep.get('checkout'))

# Get the path to the repos.json file in $HOME/MLC
repos_file_path = os.path.join(self.repos_path, 'repos.json')

with open(repos_file_path, 'r') as f:
repos_list = json.load(f)

if repo_path not in repos_list:
repos_list.append(repo_path)
logger.info(f"Added new repo path: {repo_path}")

with open(repos_file_path, 'w') as f:
json.dump(repos_list, f, indent=2)
logger.info(f"Updated repos.json at {repos_file_path}")
return {'return': 0}

def unregister_repo(self, repo_path):
repos_file_path = os.path.join(self.repos_path, 'repos.json')
return unregister_repo(repo_path, repos_file_path)

def register_repo(self, repo_path, repo_meta):
return register_repo(repo_path, self.repos_path, repo_meta)


def find(self, run_args):
Expand Down Expand Up @@ -130,7 +104,7 @@ def find(self, run_args):
elif "@" in repo:
repo_name = repo
elif "github.com" in repo:
result = self.github_url_to_user_repo_format(repo)
result = github_url_to_user_repo_format(repo)
if result["return"] == 0:
repo_name = result["value"]
else:
Expand Down Expand Up @@ -163,135 +137,6 @@ def find(self, run_args):

return {'return': 0, 'list': lst}

def github_url_to_user_repo_format(self, url):
"""
Converts a GitHub repo URL to user@repo_name format.

:param url: str, GitHub repository URL (e.g., https://github.com/user/repo_name.git)
:return: str, formatted as user@repo_name
"""
# Regex to match GitHub URLs
pattern = r"(?:https?://)?(?:www\.)?github\.com/([^/]+)/([^/.]+)(?:\.git)?"

match = re.match(pattern, url)
if match:
user, repo_name = match.groups()
return {"return": 0, "value": f"{user}@{repo_name}"}
else:
return {"return": 0, "value": os.path.basename(url).replace(".git", "")}

def pull_repo(self, repo_url, branch=None, checkout = None, tag = None, pat = None, ssh = None):

# Determine the checkout path from environment or default
repo_base_path = self.repos_path # either the value will be from 'MLC_REPOS'
os.makedirs(repo_base_path, exist_ok=True) # Ensure the directory exists

# Handle user@repo format (convert to standard GitHub URL)
if re.match(r'^[\w-]+@[\w-]+$', repo_url):
user, repo = repo_url.split('@')
repo_url = f"https://github.com/{user}/{repo}.git"

# support pat and ssh
if pat or ssh:
tmp_param = {}
url_type = "pat" if pat else "ssh"
if pat:
tmp_param["token"] = pat
res = utils.modify_git_url(url_type, repo_url, tmp_param)
if res["return"] > 0:
return res
else:
print(res)
repo_url = res["url"]


# Extract the repo name from URL
repo_name = repo_url.split('/')[-1].replace('.git', '')
res = self.github_url_to_user_repo_format(repo_url)
if res["return"] > 0:
return res
else:
repo_download_name = res["value"]
repo_path = os.path.join(repo_base_path, repo_download_name)

try:
# If the directory doesn't exist, clone it
if not os.path.exists(repo_path):
logger.info(f"Cloning repository {repo_url} to {repo_path}...")

# Build clone command without branch if not provided
clone_command = ['git', 'clone', repo_url, repo_path]
if branch:
clone_command = ['git', 'clone', '--branch', branch, repo_url, repo_path]

subprocess.run(clone_command, check=True)

else:
logger.info(f"Repository {repo_name} already exists at {repo_path}. Checking for local changes...")

# Check for local changes
status_command = ['git', '-C', repo_path, 'status', '--porcelain', '--untracked-files=no']
local_changes = subprocess.run(status_command, capture_output=True, text=True)

if local_changes.stdout.strip():
logger.warning("There are local changes in the repository. Please commit or stash them before checking out.")
print(local_changes.stdout.strip())
return {"return": 0, "warning": f"Local changes detected in the already existing repository: {repo_path}, skipping the pull"}
else:
logger.info("No local changes detected. Fetching latest changes...")
subprocess.run(['git', '-C', repo_path, 'fetch'], check=True)

if tag:
checkout = "tags/"+tag

# Checkout to a specific branch or commit if --checkout is provided
if checkout or tag:
logger.info(f"Checking out to {checkout} in {repo_path}...")
subprocess.run(['git', '-C', repo_path, 'checkout', checkout], check=True)

if not tag:
subprocess.run(['git', '-C', repo_path, 'pull'], check=True)
logger.info("Repository successfully pulled.")

logger.info("Registering the repo in repos.json")

# check the meta file to obtain uids
meta_file_path = os.path.join(repo_path, 'meta.yaml')
if not os.path.exists(meta_file_path):
logger.warning(f"meta.yaml not found in {repo_path}. Repo pulled but not register in mlc repos. Skipping...")
return {"return": 0}

with open(meta_file_path, 'r') as meta_file:
meta_data = yaml.safe_load(meta_file)
meta_data["path"] = repo_path

# Check UID conflicts
is_conflict = self.conflicting_repo(meta_data)
if is_conflict['return'] > 0:
if "UID not present" in is_conflict['error']:
logger.warning(f"UID not found in meta.yaml at {repo_path}. Repo pulled but can not register in mlc repos. Skipping...")
return {"return": 0}
elif "already registered" in is_conflict["error"]:
#logger.warning(is_conflict["error"])
logger.info("No changes made to repos.json.")
return {"return": 0}
else:
logger.warning(f"The repo to be cloned has conflict with the repo already in the path: {is_conflict['conflicting_path']}")
self.unregister_repo(is_conflict['conflicting_path'])
self.register_repo(repo_path, meta_data)
logger.warning(f"{repo_path} is registered in repos.json and {is_conflict['conflicting_path']} is unregistered.")
return {"return": 0}
else:
r = self.register_repo(repo_path, meta_data)
if r['return'] > 0:
return r
return {"return": 0}

except subprocess.CalledProcessError as e:
return {'return': 1, 'error': f"Git command failed: {e}"}
except Exception as e:
return {'return': 1, 'error': f"Error pulling repository: {str(e)}"}

def pull(self, run_args):
repo_url = run_args.get('repo', run_args.get('url', 'repo'))
if not repo_url or repo_url == "repo":
Expand All @@ -317,6 +162,9 @@ def pull(self, run_args):
return res

return {'return': 0}

def pull_repo(self, repo_url, branch=None, checkout = None, tag = None, pat = None, ssh = None):
return pull_repo(repo_url, self.repos_path, self.repos, branch, checkout, tag, pat, ssh)


def list(self, run_args):
Expand Down Expand Up @@ -390,3 +238,163 @@ def unregister_repo(repo_path, repos_file_path):
logger.info(f"Path: {repo_path} not found in {repos_file_path}. Nothing to be unregistered!")
return {'return': 0}

def pull_repo(repo_url, repos_path, repos, branch=None, checkout = None, tag = None, pat = None, ssh = None):

# Determine the checkout path from environment or default
repo_base_path = repos_path # either the value will be from 'MLC_REPOS'
os.makedirs(repo_base_path, exist_ok=True) # Ensure the directory exists

# Handle user@repo format (convert to standard GitHub URL)
if re.match(r'^[\w-]+@[\w-]+$', repo_url):
user, repo = repo_url.split('@')
repo_url = f"https://github.com/{user}/{repo}.git"

# support pat and ssh
if pat or ssh:
tmp_param = {}
url_type = "pat" if pat else "ssh"
if pat:
tmp_param["token"] = pat
res = utils.modify_git_url(url_type, repo_url, tmp_param)
if res["return"] > 0:
return res
else:
print(res)
repo_url = res["url"]


# Extract the repo name from URL
repo_name = repo_url.split('/')[-1].replace('.git', '')
res = github_url_to_user_repo_format(repo_url)
if res["return"] > 0:
return res
else:
repo_download_name = res["value"]
repo_path = os.path.join(repo_base_path, repo_download_name)

try:
# If the directory doesn't exist, clone it
if not os.path.exists(repo_path):
logger.info(f"Cloning repository {repo_url} to {repo_path}...")

# Build clone command without branch if not provided
clone_command = ['git', 'clone', repo_url, repo_path]
if branch:
clone_command = ['git', 'clone', '--branch', branch, repo_url, repo_path]

subprocess.run(clone_command, check=True)

else:
logger.info(f"Repository {repo_name} already exists at {repo_path}. Checking for local changes...")

# Check for local changes
status_command = ['git', '-C', repo_path, 'status', '--porcelain', '--untracked-files=no']
local_changes = subprocess.run(status_command, capture_output=True, text=True)

if local_changes.stdout.strip():
logger.warning("There are local changes in the repository. Please commit or stash them before checking out.")
print(local_changes.stdout.strip())
return {"return": 0, "warning": f"Local changes detected in the already existing repository: {repo_path}, skipping the pull"}
else:
logger.info("No local changes detected. Fetching latest changes...")
subprocess.run(['git', '-C', repo_path, 'fetch'], check=True)

if tag:
checkout = "tags/"+tag

# Checkout to a specific branch or commit if --checkout is provided
if checkout or tag:
logger.info(f"Checking out to {checkout} in {repo_path}...")
subprocess.run(['git', '-C', repo_path, 'checkout', checkout], check=True)

if not tag:
subprocess.run(['git', '-C', repo_path, 'pull'], check=True)
logger.info("Repository successfully pulled.")

logger.info("Registering the repo in repos.json")

# check the meta file to obtain uids
meta_file_path = os.path.join(repo_path, 'meta.yaml')
if not os.path.exists(meta_file_path):
logger.warning(f"meta.yaml not found in {repo_path}. Repo pulled but not register in mlc repos. Skipping...")
return {"return": 0}

with open(meta_file_path, 'r') as meta_file:
meta_data = yaml.safe_load(meta_file)
meta_data["path"] = repo_path

# Check UID conflicts
is_conflict = conflicting_repo(meta_data, repos)
if is_conflict['return'] > 0:
if "UID not present" in is_conflict['error']:
logger.warning(f"UID not found in meta.yaml at {repo_path}. Repo pulled but can not register in mlc repos. Skipping...")
return {"return": 0}
elif "already registered" in is_conflict["error"]:
#logger.warning(is_conflict["error"])
logger.info("No changes made to repos.json.")
return {"return": 0}
else:
logger.warning(f"The repo to be cloned has conflict with the repo already in the path: {is_conflict['conflicting_path']}")
unregister_repo(is_conflict['conflicting_path'], os.path.join(repos_path, 'repos.json'))
register_repo(repo_path, repos_path, meta_data)
logger.warning(f"{repo_path} is registered in repos.json and {is_conflict['conflicting_path']} is unregistered.")
return {"return": 0}
else:
r = register_repo(repo_path, repos_path, meta_data)
if r['return'] > 0:
return r
return {"return": 0}

except subprocess.CalledProcessError as e:
return {'return': 1, 'error': f"Git command failed: {e}"}
except Exception as e:
return {'return': 1, 'error': f"Error pulling repository: {str(e)}"}

def github_url_to_user_repo_format(url):
"""
Converts a GitHub repo URL to user@repo_name format.

:param url: str, GitHub repository URL (e.g., https://github.com/user/repo_name.git)
:return: str, formatted as user@repo_name
"""
# Regex to match GitHub URLs
pattern = r"(?:https?://)?(?:www\.)?github\.com/([^/]+)/([^/.]+)(?:\.git)?"

match = re.match(pattern, url)
if match:
user, repo_name = match.groups()
return {"return": 0, "value": f"{user}@{repo_name}"}
else:
return {"return": 0, "value": os.path.basename(url).replace(".git", "")}

def register_repo(repo_path, repos_path, repo_meta):

if repo_meta.get('deps'):
for dep in repo_meta['deps']:
pull_repo(dep['url'], repos_path, branch=dep.get('branch'), checkout=dep.get('checkout'))

# Get the path to the repos.json file in $HOME/MLC
repos_file_path = os.path.join(repos_path, 'repos.json')

with open(repos_file_path, 'r') as f:
repos_list = json.load(f)

if repo_path not in repos_list:
repos_list.append(repo_path)
logger.info(f"Added new repo path: {repo_path}")

with open(repos_file_path, 'w') as f:
json.dump(repos_list, f, indent=2)
logger.info(f"Updated repos.json at {repos_file_path}")
return {'return': 0}

def conflicting_repo(repo_meta, repos):
for repo_object in repos:
if repo_object.meta.get('uid', '') == '':
return {"return": 1, "error": f"UID is not present in file 'meta.yaml' in the repo path {repo_object.path}"}
if repo_meta["uid"] == repo_object.meta.get('uid', ''):
if repo_meta['path'] == repo_object.path:
return {"return": 1, "error": f"Same repo is already registered"}
else:
return {"return": 1, "error": f"Conflicting with repo in the path {repo_object.path}", "conflicting_path": repo_object.path}
return {"return": 0}
Loading
Loading