From a1e5154a93ba7b8557a27b47223a7bbc552210d9 Mon Sep 17 00:00:00 2001
From: misaka00251 <liuxin@iscas.ac.cn>
Date: Fri, 21 Feb 2025 00:47:22 +0800
Subject: [PATCH] Enable tensile

---
 rocblas.spec | 95 +++++++++++++++++++++++++++-------------------------
 1 file changed, 49 insertions(+), 46 deletions(-)

diff --git a/rocblas.spec b/rocblas.spec
index 67a8d65..b3e6bbc 100644
--- a/rocblas.spec
+++ b/rocblas.spec
@@ -7,9 +7,6 @@
 # hipcc does not support some clang flags
 %global build_cxxflags %(echo %{optflags} | sed -e 's/-fstack-protector-strong/-Xarch_host -fstack-protector-strong/' -e 's/-fcf-protection/-Xarch_host -fcf-protection/')
 
-# $gpu will be evaluated in the loops below
-%global _vpath_builddir %{_vendor}-%{_target_os}-build-${gpu}
-
 %bcond_with debug
 %if %{with debug}
 %global build_type DEBUG
@@ -33,7 +30,7 @@
 %endif
 
 # Tensile blocks general enablement of rocBLAS.
-%bcond_with tensile
+%bcond_without tensile
 %if %{with tensile}
 %global build_tensile ON
 %else
@@ -42,10 +39,17 @@
 
 # Why Build ID is 8 bytes
 %global buildid 1
+# or just
+#global debug_package %{nil}
+
+# Compression type and level for source/binary package payloads.
+#  "w7T0.xzdio"	xz level 7 using %%{getncpus} threads
+%define _source_payload	w7T0.xzdio
+%define _binary_payload	w7T0.xzdio
 
 Name:           rocblas
 Version:        %{rocm_version}
-Release:        1
+Release:        2
 Summary:        BLAS implementation for ROCm
 Url:            https://github.com/ROCmSoftwarePlatform/%{upstreamname}
 License:        MIT AND BSD-3-Clause
@@ -62,7 +66,6 @@ BuildRequires:  rocm-compilersupport-macros
 BuildRequires:  rocm-hip-devel
 BuildRequires:  rocm-runtime-devel
 BuildRequires:  rocm-rpm-macros
-BuildRequires:  rocm-rpm-macros-modules
 
 %if %{with tensile}
 BuildRequires:  msgpack-devel
@@ -82,7 +85,7 @@ BuildRequires:  rocminfo
 BuildRequires:  rocm-smi-devel
 %endif
 
-Requires:       rocm-rpm-macros-modules
+Provides:       rocblas = %{version}-%{release}
 
 ExclusiveArch:  x86_64 riscv64
 
@@ -111,6 +114,10 @@ Requires:       %{name}%{?_isa} = %{version}-%{release}
 %autosetup -p1 -n %{upstreamname}-rocm-%{version}
 sed -i -e 's@set( BLAS_LIBRARY "blas" )@set( BLAS_LIBRARY "cblas" )@' clients/CMakeLists.txt
 sed -i -e 's@target_link_libraries( rocblas-test PRIVATE ${BLAS_LIBRARY} ${GTEST_BOTH_LIBRARIES} roc::rocblas )@target_link_libraries( rocblas-test PRIVATE cblas ${GTEST_BOTH_LIBRARIES} roc::rocblas )@' clients/gtest/CMakeLists.txt
+
+# no git in this build
+sed -i -e 's@find_package(Git REQUIRED)@find_package(Git)@' library/CMakeLists.txt
+
 %build
 
 # With compat llvm the system clang is wrong
@@ -120,50 +127,43 @@ export TENSILE_ROCM_OFFLOAD_BUNDLER_PATH=${CLANG_PATH}/clang-offload-bundler
 # Work around problem with koji's ld
 export HIPCC_LINK_FLAGS_APPEND=-fuse-ld=lld
 
-for gpu in %{rocm_gpu_list}
-do
-    module load rocm/$gpu
-    # We can't compile all the targets at once
-    export ROCM_GPUS='gfx1030;gfx1100;gfx1101;gfx1102'
-
-    %cmake \
-	   -DCMAKE_CXX_COMPILER=hipcc \
-	   -DCMAKE_C_COMPILER=hipcc \
-	   -DCMAKE_LINKER=%rocmllvm_bindir/ld.lld \
-	   -DCMAKE_AR=%rocmllvm_bindir/llvm-ar \
-	   -DCMAKE_RANLIB=%rocmllvm_bindir/llvm-ranlib \
-	   -DCMAKE_BUILD_TYPE=%{build_type} \
-	   -DCMAKE_PREFIX_PATH=%{rocmllvm_cmakedir}/.. \
-	   -DCMAKE_SKIP_RPATH=ON \
-	   -DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF \
-	   -DROCM_SYMLINK_LIBS=OFF \
-	   -DHIP_PLATFORM=amd \
-	   -DAMDGPU_TARGETS=${ROCM_GPUS} \
-	   -DCMAKE_INSTALL_LIBDIR=$ROCM_LIB \
-	   -DCMAKE_INSTALL_BINDIR=$ROCM_BIN \
-	   -DBUILD_CLIENTS_BENCHMARKS=%{build_test} \
-	   -DBUILD_CLIENTS_TESTS=%{build_test} \
-	   -DBUILD_WITH_TENSILE=%{build_tensile} \
-	   -DBUILD_CLIENTS_TESTS_OPENMP=OFF \
-	   -DBUILD_FORTRAN_CLIENTS=OFF \
-	   -DBLAS_LIBRARY=cblas \
-	   -DBUILD_OFFLOAD_COMPRESS=%{build_compress} \
-	   -DBUILD_WITH_HIPBLASLT=OFF \
-	   -DTensile_COMPILER=hipcc \
+# We can't compile all the targets at once
+export ROCM_GPUS='gfx1030;gfx1100;gfx1101;gfx1102'
+
+%cmake \
+    -DCMAKE_CXX_COMPILER=hipcc \
+    -DCMAKE_C_COMPILER=hipcc \
+    -DCMAKE_LINKER=%rocmllvm_bindir/ld.lld \
+    -DCMAKE_AR=%rocmllvm_bindir/llvm-ar \
+    -DCMAKE_RANLIB=%rocmllvm_bindir/llvm-ranlib \
+    -DCMAKE_BUILD_TYPE=%{build_type} \
+    -DCMAKE_PREFIX_PATH=%{rocmllvm_cmakedir}/.. \
+    -DCMAKE_SKIP_RPATH=ON \
+    -DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF \
+    -DROCM_SYMLINK_LIBS=OFF \
+    -DHIP_PLATFORM=amd \
+    -DAMDGPU_TARGETS=${ROCM_GPUS} \
+    -DCMAKE_INSTALL_LIBDIR=%_libdir \
+    -DBUILD_CLIENTS_BENCHMARKS=%{build_test} \
+    -DBUILD_CLIENTS_TESTS=%{build_test} \
+    -DBUILD_CLIENTS_TESTS_OPENMP=OFF \
+    -DBUILD_FORTRAN_CLIENTS=OFF \
+    -DBLAS_LIBRARY=cblas \
+    -DBUILD_OFFLOAD_COMPRESS=%{build_compress} \
+    -DBUILD_WITH_HIPBLASLT=OFF \
+    -DTensile_COMPILER=hipcc \
+    -DBUILD_WITH_TENSILE=%{build_tensile} \
+    -DTensile_DIR=${TP}/cmake \
 %if %{buildid}
-	   -DCMAKE_SHARED_LINKER_FLAGS="-Wl,--build-id=sha1" \
+    -DCMAKE_SHARED_LINKER_FLAGS="-Wl,--build-id=sha1" \
 %endif
-	   -DBUILD_WITH_PIP=OFF
+    -DBUILD_WITH_PIP=OFF
+
+%cmake_build
 
-    %cmake_build
-    module purge
-done
 
 %install
-for gpu in %{rocm_gpu_list}
-do
-    %cmake_install
-done
+%cmake_install
 
 echo s@%{buildroot}@@ > br.sed
 find %{buildroot}%{_libdir} -name '*.so.*.[0-9]' | sed -f br.sed >  %{name}.files
@@ -196,6 +196,9 @@ fi
 %endif
 
 %changelog
+* Fri Feb 21 2025 misaka00251 <liuxin@iscas.ac.cn> - 6.3.0-2
+- Enable tensile
+
 * Wed Dec 18 2024 misaka00251 <liuxin@iscas.ac.cn> - 6.3.0-1
 - Update to 6.3.0
 
-- 
GitLab