#23 0x00007fff31f36466 in () at /lib/x86_64-linux-gnu/libamdhip64.so #24 0x00007fff31f0c0f1 in () at /lib/x86_64-linux-gnu/libamdhip64.so #25 0x00007fffef8ace37 in __pthread_once_slow (_once_control_=0x7fff329a1ed8, init_routine=0x7fffefad3200 <__once_proxy>) at ./nptl/pthread_once.c:116 #26 0x00007fff31f0f5a9 in () at /lib/x86_64-linux-gnu/libamdhip64.so #27 0x00007fff31f604c3 in () at /lib/x86_64-linux-gnu/libamdhip64.so #28 0x00007fff31f60fdd in () at /lib/x86_64-linux-gnu/libamdhip64.so #29 0x00007fff31f0f19e in () at /lib/x86_64-linux-gnu/libamdhip64.so #30 0x00007fff31f52dfe in () at /lib/x86_64-linux-gnu/libamdhip64.so #31 0x00007fff31cc676c in () at /lib/x86_64-linux-gnu/libamdhip64.so #32 0x00007fff31cc75ad in hipInit () at /lib/x86_64-linux-gnu/libamdhip64.so #33 0x0000555557e38824 in ccl::device_hip_safe_init () at ./intern/cycles/device/hip/device.cpp:96 #34 ccl::device_hip_info(ccl::vector<ccl::DeviceInfo, ccl::GuardedAllocator<ccl::DeviceInfo> >&) (devices=...) at ./intern/cycles/device/hip/device.cpp:104 #35 0x0000555557e20b7a in ccl::Device::available_devices(unsigned int) (mask=34) at ./intern/cycles/device/device.cpp:228 #36 0x0000555557bbbc3d in ccl::available_devices_func(PyObject*, PyObject*) (args=<optimized out>) at ./intern/cycles/blender/python.cpp:416
I wish we had debug symbols for these libraries. We can't make -dbgsym packages since dwz does not support .debug_addr, but maybe -dbg packages would be possible?
I lazily tried to produce debug libraries using clang-15 like Debian does. My build script is attached. It installs the debug libraries to $HOME/rocm.
I tested blender-3.4.0-beta+v34.bba6d2894290-linux.x86_64-release from the blender.org daily builds with
LD_LIBRARY_PATH=$HOME/rocm/libs ./blender
Unfortunately, my GPU was detected correctly! Blender detected
HIP correctly with my custom debug build. That's different from
when I tried using the OS packages. When using the sid packages
for HIP, my GPU was not detected. It seemed to be some sort of
failure in hipInit. I'd been hoping to use a debugger to
understand that better, but I haven't yet managed to reproduce the
problem with a debug build.
However, the fact that my naive build worked correctly and the
Debian packages are failing does provide an interesting data
point. I'm not sure what made the difference in one case vs the
other.
Sincerely,
Cory Bloor
#!/usr/bin/env bash # Build ROCm 5.2.3 set -exuo pipefail sudo apt-get -qq update sudo apt-get -qq upgrade sudo apt-get -qq install build-essential cmake wget WORKSPACE=$HOME # where to download and build the sources ARCH=gfx906 # https://llvm.org/docs/AMDGPUUsage.html # llvm cd "$WORKSPACE" sudo apt-get -qq install python3 wget -qO- https://github.com/llvm/llvm-project/archive/refs/tags/llvmorg-15.0.4.tar.gz | tar xz cd llvm-project-llvmorg-15.0.4 cmake -Sllvm -Bbuild -DCMAKE_BUILD_TYPE=Release \ -DLLVM_ENABLE_PROJECTS="clang;lld;clang-tools-extra;compiler-rt" \ -DLLVM_TARGETS_TO_BUILD="AMDGPU;X86" \ -DCMAKE_INSTALL_PREFIX=$HOME/rocm/llvm make -j16 -C build make -C build install # rocm-cmake cd "$WORKSPACE" wget -qO- https://github.com/RadeonOpenCompute/rocm-cmake/archive/refs/tags/rocm-5.2.3.tar.gz | tar xz cd rocm-cmake-rocm-5.2.3 cmake -S. -Bbuild -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=$HOME/rocm make -j16 -C build make -C build install # rocm-device-libs cd "$WORKSPACE" wget -qO- https://github.com/RadeonOpenCompute/ROCm-Device-Libs/archive/refs/tags/rocm-5.2.3.tar.gz | tar xz cd ROCm-Device-Libs-rocm-5.2.3 cmake -S. -Bbuild -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_C_COMPILER=$HOME/rocm/llvm/bin/clang \ -DCMAKE_INSTALL_PREFIX=$HOME/rocm make -j16 -C build make -C build install # roct-thunk-interface cd "$WORKSPACE" sudo apt-get -qq install libnuma-dev pkg-config libdrm-dev zlib1g-dev libudev-dev wget -qO- https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/archive/refs/tags/rocm-5.2.3.tar.gz | tar xz cd ROCT-Thunk-Interface-rocm-5.2.3 cmake -S. -Bbuild -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_PREFIX_PATH=$HOME/rocm \ -DCMAKE_INSTALL_PREFIX=$HOME/rocm make -j16 -C build make -C build install # rocr-runtime cd "$WORKSPACE" sudo apt-get -qq install libelf-dev xxd wget -qO- https://github.com/RadeonOpenCompute/ROCR-Runtime/archive/refs/tags/rocm-5.2.3.tar.gz | tar xz cd ROCR-Runtime-rocm-5.2.3 cmake -Ssrc -Bbuild -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_PREFIX_PATH=$HOME/rocm \ -DCMAKE_INSTALL_PREFIX=$HOME/rocm make -j16 -C build make -C build install # rocminfo cd "$WORKSPACE" sudo apt-get -qq install kmod python3 wget -qO- https://github.com/RadeonOpenCompute/rocminfo/archive/refs/tags/rocm-5.2.3.tar.gz | tar xz cd rocminfo-rocm-5.2.3 cmake -S. -Bbuild -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_PREFIX_PATH=$HOME/rocm \ -DCMAKE_INSTALL_PREFIX=$HOME/rocm make -j16 -C build make -C build install # comgr cd "$WORKSPACE" wget -qO- https://github.com/RadeonOpenCompute/ROCm-CompilerSupport/archive/refs/tags/rocm-5.2.3.tar.gz | tar xz cd ROCm-CompilerSupport-rocm-5.2.3 # 0005-dbg-compression-type.patch patch -p1 << 'EOF' --- rocm-compilersupport.orig/lib/comgr/src/comgr-compiler.cpp +++ rocm-compilersupport/lib/comgr/src/comgr-compiler.cpp @@ -244,13 +244,12 @@ OPT_compress_debug_sections_EQ)) { if (A->getOption().getID() == OPT_compress_debug_sections) { // TODO: be more clever about the compression type auto-detection - Opts.CompressDebugSections = llvm::DebugCompressionType::GNU; + Opts.CompressDebugSections = llvm::DebugCompressionType::Z; } else { Opts.CompressDebugSections = llvm::StringSwitch<llvm::DebugCompressionType>(A->getValue()) .Case("none", llvm::DebugCompressionType::None) .Case("zlib", llvm::DebugCompressionType::Z) - .Case("zlib-gnu", llvm::DebugCompressionType::GNU) .Default(llvm::DebugCompressionType::None); } } EOF # 0006-TargetRegistry.patch patch -p1 << 'EOF' --- rocm-compilersupport.orig/lib/comgr/src/comgr-compiler.cpp +++ rocm-compilersupport/lib/comgr/src/comgr-compiler.cpp @@ -452,7 +452,7 @@ std::unique_ptr<MCCodeEmitter> MCE; std::unique_ptr<MCAsmBackend> MAB; if (Opts.ShowEncoding) { - MCE.reset(TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx)); + MCE.reset(TheTarget->createMCCodeEmitter(*MCII, Ctx)); MCTargetOptions Options; MAB.reset(TheTarget->createMCAsmBackend(*STI, *MRI, Options)); } @@ -471,7 +471,7 @@ Out = BOS.get(); } - MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx); + MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, Ctx); MCTargetOptions Options; MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, Options); Triple T(Opts.Triple); EOF cmake -Slib/comgr -Bbuild -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_PREFIX_PATH=$HOME/rocm/llvm \ -DCMAKE_INSTALL_PREFIX=$HOME/rocm make -j16 -C build make -C build install # hip cd "$WORKSPACE" sudo apt-get -qq install mesa-common-dev wget -qO- https://github.com/ROCm-Developer-Tools/hipamd/archive/refs/tags/rocm-5.2.3.tar.gz | tar xz wget -qO- https://github.com/ROCm-Developer-Tools/ROCclr/archive/refs/tags/rocm-5.2.3.tar.gz | tar xz wget -qO- https://github.com/RadeonOpenCompute/ROCm-OpenCL-Runtime/archive/refs/tags/rocm-5.2.3.tar.gz | tar xz wget -qO- https://github.com/ROCm-Developer-Tools/HIP/archive/refs/tags/rocm-5.2.3.tar.gz | tar xz cd hipamd-rocm-5.2.3 # 0002-fix-build-from-tarball.patch patch -p1 << 'EOF' --- a/CMakeLists.txt 2022-05-23 22:01:05.000000000 -0600 +++ b/CMakeLists.txt 2022-07-09 14:24:16.519487276 -0600 @@ -140,6 +140,8 @@ endif() else() # FIXME: Some parts depend on this being set. + set(HIP_VERSION_BUILD_ID 0) + set(HIP_VERSION_BUILD_NAME "") set(HIP_PACKAGING_VERSION_PATCH "0") endif() @@ -182,8 +184,10 @@ set (HIP_LIB_VERSION_MINOR ${HIP_VERSION_MINOR}) if (${ROCM_PATCH_VERSION} ) set (HIP_LIB_VERSION_PATCH ${ROCM_PATCH_VERSION}) -else () +elseif (DEFINED HIP_VERSION_GITHASH) set (HIP_LIB_VERSION_PATCH ${HIP_VERSION_PATCH}-${HIP_VERSION_GITHASH}) +else () + set (HIP_LIB_VERSION_PATCH ${HIP_VERSION_PATCH}) endif () set (HIP_LIB_VERSION_STRING "${HIP_LIB_VERSION_MAJOR}.${HIP_LIB_VERSION_MINOR}.${HIP_LIB_VERSION_PATCH}") if (DEFINED ENV{ROCM_RPATH}) EOF mkdir build cd build cmake -S.. -B. -DCMAKE_BUILD_TYPE=Debug \ -DHIP_COMMON_DIR="$WORKSPACE/HIP-rocm-5.2.3" \ -DAMD_OPENCL_PATH="$WORKSPACE/ROCm-OpenCL-Runtime-rocm-5.2.3" \ -DROCCLR_PATH="$WORKSPACE/ROCclr-rocm-5.2.3" \ -DCMAKE_PREFIX_PATH="$HOME/rocm;$HOME/rocm/llvm" \ -DCMAKE_INSTALL_PREFIX=$HOME/rocm make -j16 make install sudo apt-get -qq install perl file # used for hipcc