Reading decoder layer 37
Reading decoder layer 38
Reading decoder layer 39
Reading final norm
Reading LM head
Saving ONNX model in E:\ai-models\Qwen3-14B\onnx-webgpu
Traceback (most recent call last):
File "C:\Users\xiaofeihan\AppData\Roaming\Python\Python313\site-packages\onnx_ir\serde.py", line 100, in wrapper
return func(*args, **kwargs)
File "C:\Users\xiaofeihan\AppData\Roaming\Python\Python313\site-packages\onnx_ir\serde.py", line 1787, in serialize_tensor_into
tensor_proto.raw_data = from_.tobytes()
~~~~~~~~~~~~~^^
File "C:\Users\xiaofeihan\AppData\Roaming\Python\Python313\site-packages\onnx_ir\_core.py", line 1078, in tobytes
return self._evaluate().tobytes()
~~~~~~~~~~~~~~~~~~~~~~~~^^
File "C:\Users\xiaofeihan\AppData\Roaming\Python\Python313\site-packages\onnx_ir\tensor_adapters.py", line 206, in tobytes
return bytes(data)
MemoryError
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\xiaofeihan\AppData\Roaming\Python\Python313\site-packages\onnx_ir\serde.py", line 100, in wrapper
return func(*args, **kwargs)
File "C:\Users\xiaofeihan\AppData\Roaming\Python\Python313\site-packages\onnx_ir\serde.py", line 1582, in serialize_graph_into
serialize_tensor_into(graph_proto.initializer.add(), from_=value.const_value)
~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\xiaofeihan\AppData\Roaming\Python\Python313\site-packages\onnx_ir\serde.py", line 102, in wrapper
raise SerdeError(
f"Error calling {func.__name__} with: {arg_capturer(*args, **kwargs)}"
) from e
onnx_ir.serde.SerdeError: Error calling serialize_tensor_into with: LazyTensor<FLOAT16,[5120,151936]>(func=<function Model.make_initializer.<locals>.tensor_func at 0x0000024E7CF10540>, name='lm_head.MatMul.weight')
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\xiaofeihan\AppData\Roaming\Python\Python313\site-packages\onnx_ir\serde.py", line 100, in wrapper
return func(*args, **kwargs)
File "C:\Users\xiaofeihan\AppData\Roaming\Python\Python313\site-packages\onnx_ir\serde.py", line 1384, in serialize_model_into
serialize_graph_into(model_proto.graph, from_.graph)
~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\xiaofeihan\AppData\Roaming\Python\Python313\site-packages\onnx_ir\serde.py", line 102, in wrapper
raise SerdeError(
f"Error calling {func.__name__} with: {arg_capturer(*args, **kwargs)}"
) from e
onnx_ir.serde.SerdeError: Error calling serialize_graph_into with: name=main_graph, doc_string=None, len(inputs)=82, len(initializers)=445, len(nodes)=774, len(outputs)=81, metadata_props={}
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "C:\Users\xiaofeihan\AppData\Roaming\Python\Python313\site-packages\onnxruntime_genai\models\builder.py", line 502, in <module>
create_model(
~~~~~~~~~~~~^
args.model_name,
^^^^^^^^^^^^^^^^
...<5 lines>...
**extra_options,
^^^^^^^^^^^^^^^^
)
^
File "C:\Users\xiaofeihan\AppData\Roaming\Python\Python313\site-packages\torch\utils\_contextlib.py", line 124, in decorate_context
return func(*args, **kwargs)
File "C:\Users\xiaofeihan\AppData\Roaming\Python\Python313\site-packages\onnxruntime_genai\models\builder.py", line 346, in create_model
onnx_model.save_model(output_dir)
~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^
File "C:\Users\xiaofeihan\AppData\Roaming\Python\Python313\site-packages\onnxruntime_genai\models\builders\base.py", line 748, in save_model
model = self.to_int4()
File "C:\Users\xiaofeihan\AppData\Roaming\Python\Python313\site-packages\onnxruntime_genai\models\builders\base.py", line 729, in to_int4
model=ir.to_proto(self.model),
~~~~~~~~~~~^^^^^^^^^^^^
File "C:\Users\xiaofeihan\AppData\Roaming\Python\Python313\site-packages\onnx_ir\serde.py", line 277, in to_proto
return serialize_model(ir_object)
File "C:\Users\xiaofeihan\AppData\Roaming\Python\Python313\site-packages\onnx_ir\serde.py", line 1356, in serialize_model
return serialize_model_into(onnx.ModelProto(), from_=model)
File "C:\Users\xiaofeihan\AppData\Roaming\Python\Python313\site-packages\onnx_ir\serde.py", line 102, in wrapper
raise SerdeError(
f"Error calling {func.__name__} with: {arg_capturer(*args, **kwargs)}"
) from e
onnx_ir.serde.SerdeError: Error calling serialize_model_into with: ir_version=10, producer_name=onnxruntime-genai, producer_version=None, domain=None,
OS: Windows 11
onnxruntime-genai: 0.12.1
RAM: 64GB
python -m onnxruntime_genai.models.builder -p int4 -e webgpu -m Qwen/Qwen3-14B -o Qwen3-14BIt reports the following error