You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
然后过一段时间出现:
[Local Message] 调用THUDM/chatglm2-6b-int4失败.
Traceback (most recent call last):
File ".\request_llms\local_llm_class.py", line 158, in run
for response_full in self.llm_stream_generator(**kwargs):
File ".\request_llms\bridge_chatglm.py", line 59, in llm_stream_generator
for response, history in self._model.stream_chat(self._tokenizer,
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\utils_contextlib.py", line 35, in generator_context
response = gen.send(None)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\modeling_chatglm.py", line 1063, in stream_chat
for outputs in self.stream_generate(**inputs, past_key_values=past_key_values,
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\utils_contextlib.py", line 35, in generator_context
response = gen.send(None)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\modeling_chatglm.py", line 1149, in stream_generate
outputs = self(
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\modeling_chatglm.py", line 937, in forward
transformer_outputs = self.transformer(
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\modeling_chatglm.py", line 830, in forward
hidden_states, presents, all_hidden_states, all_self_attentions = self.encoder(
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\modeling_chatglm.py", line 640, in forward
layer_ret = layer(
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\modeling_chatglm.py", line 544, in forward
attention_output, kv_cache = self.self_attention(
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\modeling_chatglm.py", line 376, in forward
mixed_x_layer = self.query_key_value(hidden_states)
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\quantization.py", line 500, in forward
output = W8A16LinearCPU.apply(input, self.weight, self.weight_scale, self.weight_bit_width)
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\autograd\function.py", line 539, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\quantization.py", line 246, in forward
weight = extract_weight_to_float(quant_w, scale_w, weight_bit_width, quantization_cache=quantization_cache)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\quantization.py", line 228, in extract_weight_to_float
func(
TypeError: 'NoneType' object is not callable
在cmd窗口中出现:
正在执行一些模块的预热...
正在加载tokenizer,如果是第一次运行,可能需要一点时间下载参数
加载tokenizer完毕
正在加载tokenizer,如果是第一次运行,可能需要一点时间下载参数
加载tokenizer完毕
Running on local URL: http://0.0.0.0:8737
Failed to load cpm_kernels:No module named 'cpm_kernels'
c:/mingw/bin/../lib/gcc/mingw32/6.3.0/../../../../mingw32/bin/ld.exe: cannot find -lpthread
collect2.exe: 错误:ld 返回 1
Compile parallel cpu kernel gcc -O3 -fPIC -pthread -fopenmp -std=c99 C:\Users\Administrator.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\quantization_kernels_parallel.c -shared -o C:\Users\Administrator.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\quantization_kernels_parallel.so failed.
Load cpu kernel C:\Users\Administrator.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\quantization_kernels.so failed: Traceback (most recent call last):
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\quantization.py", line 165, in init
kernels = ctypes.cdll.LoadLibrary(kernel_file)
File "C:\gpt\GPTacademic1\installer_files\env\lib\ctypes_init_.py", line 452, in LoadLibrary
return self.dlltype(name)
File "C:\gpt\GPTacademic1\installer_files\env\lib\ctypes_init.py", line 374, in init
self._handle = _dlopen(self._name, mode)
OSError: [WinError 193] %1 不是有效的 Win32 应用程序。
reacted with thumbs up emoji reacted with thumbs down emoji reacted with laugh emoji reacted with hooray emoji reacted with confused emoji reacted with heart emoji reacted with rocket emoji reacted with eyes emoji
-
1260P主机,32G内存,想使用本地大模型Chatglm2,按照作者的注释通过
python -m pip install -r request_llms/requirements_chatglm.txt 安装了依赖,
但是在"更换模型&Prompt"下拉列表中选择Chatglm后,在输入区输入问题,就会出现:
{'http': 'http://127.0.0.1:7890', 'https': 'http://127.0.0.1:7890'} [PROXY] 网络代理状态:已配置。配置信息如下:
然后过一段时间出现:
[Local Message] 调用THUDM/chatglm2-6b-int4失败.
Traceback (most recent call last):
File ".\request_llms\local_llm_class.py", line 158, in run
for response_full in self.llm_stream_generator(**kwargs):
File ".\request_llms\bridge_chatglm.py", line 59, in llm_stream_generator
for response, history in self._model.stream_chat(self._tokenizer,
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\utils_contextlib.py", line 35, in generator_context
response = gen.send(None)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\modeling_chatglm.py", line 1063, in stream_chat
for outputs in self.stream_generate(**inputs, past_key_values=past_key_values,
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\utils_contextlib.py", line 35, in generator_context
response = gen.send(None)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\modeling_chatglm.py", line 1149, in stream_generate
outputs = self(
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\modeling_chatglm.py", line 937, in forward
transformer_outputs = self.transformer(
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\modeling_chatglm.py", line 830, in forward
hidden_states, presents, all_hidden_states, all_self_attentions = self.encoder(
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\modeling_chatglm.py", line 640, in forward
layer_ret = layer(
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\modeling_chatglm.py", line 544, in forward
attention_output, kv_cache = self.self_attention(
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\modeling_chatglm.py", line 376, in forward
mixed_x_layer = self.query_key_value(hidden_states)
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\quantization.py", line 500, in forward
output = W8A16LinearCPU.apply(input, self.weight, self.weight_scale, self.weight_bit_width)
File "C:\gpt\GPTacademic1\installer_files\env\lib\site-packages\torch\autograd\function.py", line 539, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\quantization.py", line 246, in forward
weight = extract_weight_to_float(quant_w, scale_w, weight_bit_width, quantization_cache=quantization_cache)
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\quantization.py", line 228, in extract_weight_to_float
func(
TypeError: 'NoneType' object is not callable
在cmd窗口中出现:
正在执行一些模块的预热...
正在加载tokenizer,如果是第一次运行,可能需要一点时间下载参数
加载tokenizer完毕
正在加载tokenizer,如果是第一次运行,可能需要一点时间下载参数
加载tokenizer完毕
Running on local URL: http://0.0.0.0:8737
Failed to load cpm_kernels:No module named 'cpm_kernels'
c:/mingw/bin/../lib/gcc/mingw32/6.3.0/../../../../mingw32/bin/ld.exe: cannot find -lpthread
collect2.exe: 错误:ld 返回 1
Compile parallel cpu kernel gcc -O3 -fPIC -pthread -fopenmp -std=c99 C:\Users\Administrator.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\quantization_kernels_parallel.c -shared -o C:\Users\Administrator.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\quantization_kernels_parallel.so failed.
Load cpu kernel C:\Users\Administrator.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\quantization_kernels.so failed: Traceback (most recent call last):
File "C:\Users\Administrator/.cache\huggingface\modules\transformers_modules\THUDM\chatglm2-6b-int4\66ecaf1db3a5085714e133357ea4824b69698743\quantization.py", line 165, in init
kernels = ctypes.cdll.LoadLibrary(kernel_file)
File "C:\gpt\GPTacademic1\installer_files\env\lib\ctypes_init_.py", line 452, in LoadLibrary
return self.dlltype(name)
File "C:\gpt\GPTacademic1\installer_files\env\lib\ctypes_init.py", line 374, in init
self._handle = _dlopen(self._name, mode)
OSError: [WinError 193] %1 不是有效的 Win32 应用程序。
想知道这是什么问题?怎么解决。
Beta Was this translation helpful? Give feedback.
All reactions