Bug#1098205: onnxruntime: autopkgtest failure with glibc 2.41 due to use of executable stack

To: Debian Bug Tracking System <submit@bugs.debian.org>
Subject: Bug#1098205: onnxruntime: autopkgtest failure with glibc 2.41 due to use of executable stack
From: Aurelien Jarno <aurel32@debian.org>
Date: Mon, 17 Feb 2025 19:07:03 +0100
Message-id: <[🔎] 173981562359.442572.18183013751274059209.reportbug@ohm.local>
Reply-to: Aurelien Jarno <aurel32@debian.org>, 1098205@bugs.debian.org

Source: onnxruntime
Version: 1.19.2+dfsg-9
Severity: important
Tags: ftbfs patch upstream
X-Debbugs-Cc: debian-glibc@lists.debian.org
User: debian-glibc@lists.debian.org
Usertags: glibc2.41 dlopen-executable-stack
Control: found -1 onnxruntime/1.20.1+dfsg-1~exp2

Dear maintainer,

Starting with glibc 2.41, the dlopen and dlmopen functions no longer
make the stack executable if a shared library requires it and instead
just fail. This change aims to improve security, as the previous
behaviour was used as a vector for RCE (CVE-2023-38408).

Unfortunately the python3-onnxruntime package provide a python module
which requires an executable stack. With this change, it can't be
imported anymore, causing the autopkgtest to fail:

| 145s autopkgtest [07:39:12]: test command1: [-----------------------
| 147s Exported graph: graph(%feature : Float(1, 128, strides=[128, 1], requires_grad=0, device=cpu),
| 147s       %0.weight : Float(16, 128, strides=[128, 1], requires_grad=1, device=cpu),
| 147s       %0.bias : Float(16, strides=[1], requires_grad=1, device=cpu),
| 147s       %2.weight : Float(1, 16, strides=[16, 1], requires_grad=1, device=cpu),
| 147s       %2.bias : Float(1, strides=[1], requires_grad=1, device=cpu)):
| 147s   %/0/Gemm_output_0 : Float(1, 16, strides=[16, 1], requires_grad=1, device=cpu) = onnx::Gemm[alpha=1., beta=1., transB=1, onnx_name="/0/Gemm"](%feature, %0.weight, %0.bias), scope: torch.nn.modules.container.Sequential::/torch.nn.modules.linear.Linear::0 # /usr/lib/python3/dist-packages/torch/nn/modules/linear.py:125:0
| 147s   %/1/Relu_output_0 : Float(1, 16, strides=[16, 1], requires_grad=1, device=cpu) = onnx::Relu[onnx_name="/1/Relu"](%/0/Gemm_output_0), scope: torch.nn.modules.container.Sequential::/torch.nn.modules.activation.ReLU::1 # /usr/lib/python3/dist-packages/torch/nn/functional.py:1704:0
| 147s   %7 : Float(1, 1, strides=[1, 1], requires_grad=1, device=cpu) = onnx::Gemm[alpha=1., beta=1., transB=1, onnx_name="/2/Gemm"](%/1/Relu_output_0, %2.weight, %2.bias), scope: torch.nn.modules.container.Sequential::/torch.nn.modules.linear.Linear::2 # /usr/lib/python3/dist-packages/torch/nn/modules/linear.py:125:0
| 147s   return (%7)
| 147s 
| 147s Exported a dummy neural network at test_model.onnx
| 147s Traceback (most recent call last):
| 147s   File "/tmp/autopkgtest-lxc.x7zg2iaj/downtmp/build.4Lq/src/debian/tests/inference.py", line 25, in <module>
| 147s     import onnxruntime as rt
| 147s   File "/usr/lib/python3/dist-packages/onnxruntime/__init__.py", line 57, in <module>
| 147s     raise import_capi_exception
| 147s   File "/usr/lib/python3/dist-packages/onnxruntime/__init__.py", line 23, in <module>
| 147s     from onnxruntime.capi._pybind_state import ExecutionMode  # noqa: F401
| 147s     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| 147s   File "/usr/lib/python3/dist-packages/onnxruntime/capi/_pybind_state.py", line 32, in <module>
| 147s     from .onnxruntime_pybind11_state import *  # noqa
| 147s     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| 147s ImportError: /usr/lib/python3/dist-packages/onnxruntime/capi/onnxruntime_pybind11_state.cpython-313-x86_64-linux-gnu.so: cannot enable executable stack as shared object requires: Invalid argument
| 148s autopkgtest [07:39:15]: test command1: -----------------------]
| 148s command1             FAIL non-zero exit status 1

For a full log, see:
https://ci.debian.net/packages/o/onnxruntime/unstable/amd64/57637366/

The problem is that the assembly sources in
onnxruntime/core/mlas/lib/x86_64/*.S does not mark the stack as
non-executable, as can be seen in the build log:

| /usr/bin/ld: warning: QgemmU8S8KernelAmx.S.o: missing .note.GNU-stack section implies executable stack
| /usr/bin/ld: NOTE: This behaviour is deprecated and will be removed in a future version of the linker

I think this should be fixed at that assembly code level but upstream
chose to fix that at the link time for the main library [1]. Therefore
please find a oatch to use the same strategy for the python modules:

--- onnxruntime-1.19.2+dfsg.orig/cmake/onnxruntime_python.cmake
+++ onnxruntime-1.19.2+dfsg/cmake/onnxruntime_python.cmake
@@ -119,9 +119,9 @@ if(APPLE)
   set(ONNXRUNTIME_SO_LINK_FLAG "-Xlinker -exported_symbols_list -Xlinker ${ONNXRUNTIME_ROOT}/python/exported_symbols.lst")
 elseif(UNIX)
   if (onnxruntime_ENABLE_EXTERNAL_CUSTOM_OP_SCHEMAS)
-    set(ONNXRUNTIME_SO_LINK_FLAG "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/python/version_script_expose_onnx_protobuf.lds -Xlinker --gc-sections")
+    set(ONNXRUNTIME_SO_LINK_FLAG "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/python/version_script_expose_onnx_protobuf.lds -Xlinker --gc-sections -z noexecstack")
   else()
-    set(ONNXRUNTIME_SO_LINK_FLAG "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/python/version_script.lds -Xlinker --gc-sections")
+    set(ONNXRUNTIME_SO_LINK_FLAG "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/python/version_script.lds -Xlinker --gc-sections -z noexecstack")
   endif()
 else()
   set(ONNXRUNTIME_SO_LINK_FLAG "-DEF:${ONNXRUNTIME_ROOT}/python/pybind.def")

Regards
Aurelien

[1] https://github.com/microsoft/onnxruntime/commit/49ce4891bced02db4554253f98e595f1857b58f8

Reply to:

Prev by Date: Bug#1096157: dhcpcd: need seccomp update for glibc 2.41
Next by Date: [Git][glibc-team/glibc][glibc-2.41] debian/control.in/libc: add breaks against dhcpcd (<< 1:10.1.0-7~), see #1096157.
Previous by thread: Bug#1096157: dhcpcd: need seccomp update for glibc 2.41
Next by thread: [Git][glibc-team/glibc][glibc-2.41] debian/control.in/libc: add breaks against dhcpcd (<< 1:10.1.0-7~), see #1096157.
Index(es):
- Date
- Thread