diff --git a/.github/workflows/test-mlc-core-actions.yaml b/.github/workflows/test-mlc-core-actions.yaml index 8f7cd0660..85013b1cd 100644 --- a/.github/workflows/test-mlc-core-actions.yaml +++ b/.github/workflows/test-mlc-core-actions.yaml @@ -237,9 +237,10 @@ jobs: run: | cd .github/scripts && python -c "import test_mlc_access as test; test.test_find_repo()" - - name: Pull MLCOMMONS mlperf-automations repository + - name: Try running a script with no automation repo - should clone default mlcommons repo run: | - mlc pull repo mlcommons@mlperf-automations --checkout=dev + mlc rm repo anandhu-eng@mlperf-automations + mlcr detect,os - name: Run tests from test-mlc-access.py run: | diff --git a/mlc/action.py b/mlc/action.py index 087fb729f..e27d3a3ae 100644 --- a/mlc/action.py +++ b/mlc/action.py @@ -107,7 +107,7 @@ def is_curdir_inside_path(base_path): logger.warning(f"""Warning: {repo_path} not found. Considering it as a corrupt entry and deleting automatically...""") logger.warning(f"Deleting the {meta_yaml_path} entry from repos.json") from .repo_action import rm_repo - res = rm_repo(repo_path, os.path.join(self.repos_path, 'repos.json'), True) + res = rm_repo(repo_path, os.path.join(self.repos_path, 'repos.json'), self.repos, True) if res["return"] > 0: return res diff --git a/mlc/repo_action.py b/mlc/repo_action.py index 677fc2495..ebc2e1238 100644 --- a/mlc/repo_action.py +++ b/mlc/repo_action.py @@ -36,7 +36,7 @@ def add(self, run_args): #check if its an URL if utils.is_valid_url(i_repo_path): if "github.com" in i_repo_path: - res = self.github_url_to_user_repo_format(i_repo_path) + res = github_url_to_user_repo_format(i_repo_path) if res['return'] > 0: return res repo_folder_name = res['value'] @@ -62,40 +62,14 @@ def add(self, run_args): return {'return': 0} def conflicting_repo(self, repo_meta): - for repo_object in self.repos: - if repo_object.meta.get('uid', '') == '': - return {"return": 1, "error": f"UID is not present in file 'meta.yaml' in the repo path {repo_object.path}"} - if repo_meta["uid"] == repo_object.meta.get('uid', ''): - if repo_meta['path'] == repo_object.path: - return {"return": 1, "error": f"Same repo is already registered"} - else: - return {"return": 1, "error": f"Conflicting with repo in the path {repo_object.path}", "conflicting_path": repo_object.path} - return {"return": 0} - - def register_repo(self, repo_path, repo_meta): + return conflicting_repo(repo_meta, self.repos) - if repo_meta.get('deps'): - for dep in repo_meta['deps']: - self.pull_repo(dep['url'], branch=dep.get('branch'), checkout=dep.get('checkout')) - - # Get the path to the repos.json file in $HOME/MLC - repos_file_path = os.path.join(self.repos_path, 'repos.json') - - with open(repos_file_path, 'r') as f: - repos_list = json.load(f) - - if repo_path not in repos_list: - repos_list.append(repo_path) - logger.info(f"Added new repo path: {repo_path}") - - with open(repos_file_path, 'w') as f: - json.dump(repos_list, f, indent=2) - logger.info(f"Updated repos.json at {repos_file_path}") - return {'return': 0} - def unregister_repo(self, repo_path): repos_file_path = os.path.join(self.repos_path, 'repos.json') return unregister_repo(repo_path, repos_file_path) + + def register_repo(self, repo_path, repo_meta): + return register_repo(repo_path, self.repos_path, repo_meta) def find(self, run_args): @@ -130,7 +104,7 @@ def find(self, run_args): elif "@" in repo: repo_name = repo elif "github.com" in repo: - result = self.github_url_to_user_repo_format(repo) + result = github_url_to_user_repo_format(repo) if result["return"] == 0: repo_name = result["value"] else: @@ -163,135 +137,6 @@ def find(self, run_args): return {'return': 0, 'list': lst} - def github_url_to_user_repo_format(self, url): - """ - Converts a GitHub repo URL to user@repo_name format. - - :param url: str, GitHub repository URL (e.g., https://github.com/user/repo_name.git) - :return: str, formatted as user@repo_name - """ - # Regex to match GitHub URLs - pattern = r"(?:https?://)?(?:www\.)?github\.com/([^/]+)/([^/.]+)(?:\.git)?" - - match = re.match(pattern, url) - if match: - user, repo_name = match.groups() - return {"return": 0, "value": f"{user}@{repo_name}"} - else: - return {"return": 0, "value": os.path.basename(url).replace(".git", "")} - - def pull_repo(self, repo_url, branch=None, checkout = None, tag = None, pat = None, ssh = None): - - # Determine the checkout path from environment or default - repo_base_path = self.repos_path # either the value will be from 'MLC_REPOS' - os.makedirs(repo_base_path, exist_ok=True) # Ensure the directory exists - - # Handle user@repo format (convert to standard GitHub URL) - if re.match(r'^[\w-]+@[\w-]+$', repo_url): - user, repo = repo_url.split('@') - repo_url = f"https://github.com/{user}/{repo}.git" - - # support pat and ssh - if pat or ssh: - tmp_param = {} - url_type = "pat" if pat else "ssh" - if pat: - tmp_param["token"] = pat - res = utils.modify_git_url(url_type, repo_url, tmp_param) - if res["return"] > 0: - return res - else: - print(res) - repo_url = res["url"] - - - # Extract the repo name from URL - repo_name = repo_url.split('/')[-1].replace('.git', '') - res = self.github_url_to_user_repo_format(repo_url) - if res["return"] > 0: - return res - else: - repo_download_name = res["value"] - repo_path = os.path.join(repo_base_path, repo_download_name) - - try: - # If the directory doesn't exist, clone it - if not os.path.exists(repo_path): - logger.info(f"Cloning repository {repo_url} to {repo_path}...") - - # Build clone command without branch if not provided - clone_command = ['git', 'clone', repo_url, repo_path] - if branch: - clone_command = ['git', 'clone', '--branch', branch, repo_url, repo_path] - - subprocess.run(clone_command, check=True) - - else: - logger.info(f"Repository {repo_name} already exists at {repo_path}. Checking for local changes...") - - # Check for local changes - status_command = ['git', '-C', repo_path, 'status', '--porcelain', '--untracked-files=no'] - local_changes = subprocess.run(status_command, capture_output=True, text=True) - - if local_changes.stdout.strip(): - logger.warning("There are local changes in the repository. Please commit or stash them before checking out.") - print(local_changes.stdout.strip()) - return {"return": 0, "warning": f"Local changes detected in the already existing repository: {repo_path}, skipping the pull"} - else: - logger.info("No local changes detected. Fetching latest changes...") - subprocess.run(['git', '-C', repo_path, 'fetch'], check=True) - - if tag: - checkout = "tags/"+tag - - # Checkout to a specific branch or commit if --checkout is provided - if checkout or tag: - logger.info(f"Checking out to {checkout} in {repo_path}...") - subprocess.run(['git', '-C', repo_path, 'checkout', checkout], check=True) - - if not tag: - subprocess.run(['git', '-C', repo_path, 'pull'], check=True) - logger.info("Repository successfully pulled.") - - logger.info("Registering the repo in repos.json") - - # check the meta file to obtain uids - meta_file_path = os.path.join(repo_path, 'meta.yaml') - if not os.path.exists(meta_file_path): - logger.warning(f"meta.yaml not found in {repo_path}. Repo pulled but not register in mlc repos. Skipping...") - return {"return": 0} - - with open(meta_file_path, 'r') as meta_file: - meta_data = yaml.safe_load(meta_file) - meta_data["path"] = repo_path - - # Check UID conflicts - is_conflict = self.conflicting_repo(meta_data) - if is_conflict['return'] > 0: - if "UID not present" in is_conflict['error']: - logger.warning(f"UID not found in meta.yaml at {repo_path}. Repo pulled but can not register in mlc repos. Skipping...") - return {"return": 0} - elif "already registered" in is_conflict["error"]: - #logger.warning(is_conflict["error"]) - logger.info("No changes made to repos.json.") - return {"return": 0} - else: - logger.warning(f"The repo to be cloned has conflict with the repo already in the path: {is_conflict['conflicting_path']}") - self.unregister_repo(is_conflict['conflicting_path']) - self.register_repo(repo_path, meta_data) - logger.warning(f"{repo_path} is registered in repos.json and {is_conflict['conflicting_path']} is unregistered.") - return {"return": 0} - else: - r = self.register_repo(repo_path, meta_data) - if r['return'] > 0: - return r - return {"return": 0} - - except subprocess.CalledProcessError as e: - return {'return': 1, 'error': f"Git command failed: {e}"} - except Exception as e: - return {'return': 1, 'error': f"Error pulling repository: {str(e)}"} - def pull(self, run_args): repo_url = run_args.get('repo', run_args.get('url', 'repo')) if not repo_url or repo_url == "repo": @@ -317,6 +162,9 @@ def pull(self, run_args): return res return {'return': 0} + + def pull_repo(self, repo_url, branch=None, checkout = None, tag = None, pat = None, ssh = None): + return pull_repo(repo_url, self.repos_path, self.repos, branch, checkout, tag, pat, ssh) def list(self, run_args): @@ -390,3 +238,163 @@ def unregister_repo(repo_path, repos_file_path): logger.info(f"Path: {repo_path} not found in {repos_file_path}. Nothing to be unregistered!") return {'return': 0} +def pull_repo(repo_url, repos_path, repos, branch=None, checkout = None, tag = None, pat = None, ssh = None): + + # Determine the checkout path from environment or default + repo_base_path = repos_path # either the value will be from 'MLC_REPOS' + os.makedirs(repo_base_path, exist_ok=True) # Ensure the directory exists + + # Handle user@repo format (convert to standard GitHub URL) + if re.match(r'^[\w-]+@[\w-]+$', repo_url): + user, repo = repo_url.split('@') + repo_url = f"https://github.com/{user}/{repo}.git" + + # support pat and ssh + if pat or ssh: + tmp_param = {} + url_type = "pat" if pat else "ssh" + if pat: + tmp_param["token"] = pat + res = utils.modify_git_url(url_type, repo_url, tmp_param) + if res["return"] > 0: + return res + else: + print(res) + repo_url = res["url"] + + + # Extract the repo name from URL + repo_name = repo_url.split('/')[-1].replace('.git', '') + res = github_url_to_user_repo_format(repo_url) + if res["return"] > 0: + return res + else: + repo_download_name = res["value"] + repo_path = os.path.join(repo_base_path, repo_download_name) + + try: + # If the directory doesn't exist, clone it + if not os.path.exists(repo_path): + logger.info(f"Cloning repository {repo_url} to {repo_path}...") + + # Build clone command without branch if not provided + clone_command = ['git', 'clone', repo_url, repo_path] + if branch: + clone_command = ['git', 'clone', '--branch', branch, repo_url, repo_path] + + subprocess.run(clone_command, check=True) + + else: + logger.info(f"Repository {repo_name} already exists at {repo_path}. Checking for local changes...") + + # Check for local changes + status_command = ['git', '-C', repo_path, 'status', '--porcelain', '--untracked-files=no'] + local_changes = subprocess.run(status_command, capture_output=True, text=True) + + if local_changes.stdout.strip(): + logger.warning("There are local changes in the repository. Please commit or stash them before checking out.") + print(local_changes.stdout.strip()) + return {"return": 0, "warning": f"Local changes detected in the already existing repository: {repo_path}, skipping the pull"} + else: + logger.info("No local changes detected. Fetching latest changes...") + subprocess.run(['git', '-C', repo_path, 'fetch'], check=True) + + if tag: + checkout = "tags/"+tag + + # Checkout to a specific branch or commit if --checkout is provided + if checkout or tag: + logger.info(f"Checking out to {checkout} in {repo_path}...") + subprocess.run(['git', '-C', repo_path, 'checkout', checkout], check=True) + + if not tag: + subprocess.run(['git', '-C', repo_path, 'pull'], check=True) + logger.info("Repository successfully pulled.") + + logger.info("Registering the repo in repos.json") + + # check the meta file to obtain uids + meta_file_path = os.path.join(repo_path, 'meta.yaml') + if not os.path.exists(meta_file_path): + logger.warning(f"meta.yaml not found in {repo_path}. Repo pulled but not register in mlc repos. Skipping...") + return {"return": 0} + + with open(meta_file_path, 'r') as meta_file: + meta_data = yaml.safe_load(meta_file) + meta_data["path"] = repo_path + + # Check UID conflicts + is_conflict = conflicting_repo(meta_data, repos) + if is_conflict['return'] > 0: + if "UID not present" in is_conflict['error']: + logger.warning(f"UID not found in meta.yaml at {repo_path}. Repo pulled but can not register in mlc repos. Skipping...") + return {"return": 0} + elif "already registered" in is_conflict["error"]: + #logger.warning(is_conflict["error"]) + logger.info("No changes made to repos.json.") + return {"return": 0} + else: + logger.warning(f"The repo to be cloned has conflict with the repo already in the path: {is_conflict['conflicting_path']}") + unregister_repo(is_conflict['conflicting_path'], os.path.join(repos_path, 'repos.json')) + register_repo(repo_path, repos_path, meta_data) + logger.warning(f"{repo_path} is registered in repos.json and {is_conflict['conflicting_path']} is unregistered.") + return {"return": 0} + else: + r = register_repo(repo_path, repos_path, meta_data) + if r['return'] > 0: + return r + return {"return": 0} + + except subprocess.CalledProcessError as e: + return {'return': 1, 'error': f"Git command failed: {e}"} + except Exception as e: + return {'return': 1, 'error': f"Error pulling repository: {str(e)}"} + +def github_url_to_user_repo_format(url): + """ + Converts a GitHub repo URL to user@repo_name format. + + :param url: str, GitHub repository URL (e.g., https://github.com/user/repo_name.git) + :return: str, formatted as user@repo_name + """ + # Regex to match GitHub URLs + pattern = r"(?:https?://)?(?:www\.)?github\.com/([^/]+)/([^/.]+)(?:\.git)?" + + match = re.match(pattern, url) + if match: + user, repo_name = match.groups() + return {"return": 0, "value": f"{user}@{repo_name}"} + else: + return {"return": 0, "value": os.path.basename(url).replace(".git", "")} + +def register_repo(repo_path, repos_path, repo_meta): + + if repo_meta.get('deps'): + for dep in repo_meta['deps']: + pull_repo(dep['url'], repos_path, branch=dep.get('branch'), checkout=dep.get('checkout')) + + # Get the path to the repos.json file in $HOME/MLC + repos_file_path = os.path.join(repos_path, 'repos.json') + + with open(repos_file_path, 'r') as f: + repos_list = json.load(f) + + if repo_path not in repos_list: + repos_list.append(repo_path) + logger.info(f"Added new repo path: {repo_path}") + + with open(repos_file_path, 'w') as f: + json.dump(repos_list, f, indent=2) + logger.info(f"Updated repos.json at {repos_file_path}") + return {'return': 0} + +def conflicting_repo(repo_meta, repos): + for repo_object in repos: + if repo_object.meta.get('uid', '') == '': + return {"return": 1, "error": f"UID is not present in file 'meta.yaml' in the repo path {repo_object.path}"} + if repo_meta["uid"] == repo_object.meta.get('uid', ''): + if repo_meta['path'] == repo_object.path: + return {"return": 1, "error": f"Same repo is already registered"} + else: + return {"return": 1, "error": f"Conflicting with repo in the path {repo_object.path}", "conflicting_path": repo_object.path} + return {"return": 0} \ No newline at end of file diff --git a/mlc/script_action.py b/mlc/script_action.py index 3bc999382..dd4516d39 100644 --- a/mlc/script_action.py +++ b/mlc/script_action.py @@ -108,7 +108,12 @@ def call_script_module_function(self, function_name, run_args): # Import script submodule script_path = self.find_target_folder("script") if not script_path: - return {'return': 1, 'error': f"""Script automation not found. Have you done "mlc pull repo mlcommons@mlperf-automations --branch=dev"?"""} + logger.warning("""Script automation not found. Pulling default MLCFlow script repo - mlcommons@mlperf-automations dev branch.""") + from .repo_action import pull_repo + res = pull_repo(repo_url="mlcommons@mlperf-automations", branch="dev", repos_path=self.repos_path, repos=self.repos) + if res["return"]>0: + return res + script_path = self.find_target_folder("script") module_path = os.path.join(script_path, "module.py") module = self.dynamic_import_module(module_path)