-
Notifications
You must be signed in to change notification settings - Fork 4.2k
/
env_report.py
140 lines (118 loc) · 4.6 KB
/
env_report.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import torch
import deepspeed
import subprocess
import argparse
from .ops.op_builder import ALL_OPS
from .git_version_info import installed_ops, torch_info
GREEN = '\033[92m'
RED = '\033[91m'
YELLOW = '\033[93m'
END = '\033[0m'
SUCCESS = f"{GREEN} [SUCCESS] {END}"
OKAY = f"{GREEN}[OKAY]{END}"
WARNING = f"{YELLOW}[WARNING]{END}"
FAIL = f'{RED}[FAIL]{END}'
INFO = '[INFO]'
color_len = len(GREEN) + len(END)
okay = f"{GREEN}[OKAY]{END}"
warning = f"{YELLOW}[WARNING]{END}"
def op_report(verbose=True):
max_dots = 23
max_dots2 = 11
h = ["op name", "installed", "compatible"]
print("-" * (max_dots + max_dots2 + len(h[0]) + len(h[1])))
print("DeepSpeed C++/CUDA extension op report")
print("-" * (max_dots + max_dots2 + len(h[0]) + len(h[1])))
print("NOTE: Ops not installed will be just-in-time (JIT) compiled at\n"
" runtime if needed. Op compatibility means that your system\n"
" meet the required dependencies to JIT install the op.")
print("-" * (max_dots + max_dots2 + len(h[0]) + len(h[1])))
print("JIT compiled ops requires ninja")
ninja_status = OKAY if ninja_installed() else FAIL
print('ninja', "." * (max_dots - 5), ninja_status)
print("-" * (max_dots + max_dots2 + len(h[0]) + len(h[1])))
print(h[0], "." * (max_dots - len(h[0])), h[1], "." * (max_dots2 - len(h[1])), h[2])
print("-" * (max_dots + max_dots2 + len(h[0]) + len(h[1])))
installed = f"{GREEN}[YES]{END}"
no = f"{YELLOW}[NO]{END}"
for op_name, builder in ALL_OPS.items():
dots = "." * (max_dots - len(op_name))
is_compatible = OKAY if builder.is_compatible(verbose) else no
is_installed = installed if installed_ops[op_name] else no
dots2 = '.' * ((len(h[1]) + (max_dots2 - len(h[1]))) -
(len(is_installed) - color_len))
print(op_name, dots, is_installed, dots2, is_compatible)
print("-" * (max_dots + max_dots2 + len(h[0]) + len(h[1])))
def ninja_installed():
try:
import ninja # noqa: F401
except ImportError:
return False
return True
def nvcc_version():
import torch.utils.cpp_extension
cuda_home = torch.utils.cpp_extension.CUDA_HOME
if cuda_home is None:
return f"{RED} [FAIL] cannot find CUDA_HOME via torch.utils.cpp_extension.CUDA_HOME={torch.utils.cpp_extension.CUDA_HOME} {END}"
try:
output = subprocess.check_output([cuda_home + "/bin/nvcc",
"-V"],
universal_newlines=True)
except FileNotFoundError:
return f"{RED} [FAIL] nvcc missing {END}"
output_split = output.split()
release_idx = output_split.index("release")
release = output_split[release_idx + 1].replace(',', '').split(".")
return ".".join(release)
def debug_report():
max_dots = 33
hip_version = None
if hasattr(torch.version, 'hip'):
hip_version = torch.version.hip
report = [
("torch install path",
torch.__path__),
("torch version",
torch.__version__),
("torch cuda version",
torch.version.cuda),
("torch hip version",
hip_version),
("nvcc version",
(None if hip_version else nvcc_version())),
("deepspeed install path",
deepspeed.__path__),
("deepspeed info",
f"{deepspeed.__version__}, {deepspeed.__git_hash__}, {deepspeed.__git_branch__}"
),
("deepspeed wheel compiled w.",
f"torch {torch_info['version']}, " +
(f"hip {torch_info['hip_version']}"
if hip_version else f"cuda {torch_info['cuda_version']}")),
]
print("DeepSpeed general environment info:")
for name, value in report:
print(name, "." * (max_dots - len(name)), value)
def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument(
'--hide_operator_status',
action='store_true',
help=
'Suppress display of installation and compatibility statuses of DeepSpeed operators. '
)
parser.add_argument('--hide_errors_and_warnings',
action='store_true',
help='Suppress warning and error messages.')
args = parser.parse_args()
return args
def main(hide_operator_status=False, hide_errors_and_warnings=False):
if not hide_operator_status:
op_report(verbose=not hide_errors_and_warnings)
debug_report()
def cli_main():
args = parse_arguments()
main(hide_operator_status=args.hide_operator_status,
hide_errors_and_warnings=args.hide_errors_and_warnings)
if __name__ == "__main__":
main()