sci-libs/caffe2: add USE=rocm flag for AMDGPU support for 2.1.2 and 2.2.1

Closes: https://bugs.gentoo.org/905286 Signed-off-by: Sv. Lockal <lockalsash@gmail.com> Closes: https://github.com/gentoo/gentoo/pull/35713 Signed-off-by: Alfredo Tupone <tupone@gentoo.org>
author: Sv. Lockal <lockalsash@gmail.com> 2024-03-11 16:37:02 +0000
committer: Alfredo Tupone <tupone@gentoo.org> 2024-03-11 20:27:48 +0100
commit: a5bd494c9be931e7bdcf88f75f37d9f4d8594864 (patch)
tree: 83ec33cb40798cab58d10826104a264fc126a381 /sci-libs/caffe2
parent: sci-libs/caffe2: fix compilation with USE=fbgemm (diff)
download: gentoo-a5bd494c9be931e7bdcf88f75f37d9f4d8594864.tar.gz
gentoo-a5bd494c9be931e7bdcf88f75f37d9f4d8594864.tar.bz2
gentoo-a5bd494c9be931e7bdcf88f75f37d9f4d8594864.zip
4 files changed, 166 insertions, 18 deletions
diff --git a/sci-libs/caffe2/caffe2-2.1.2-r6.ebuild b/sci-libs/caffe2/caffe2-2.1.2-r7.ebuild
index 969c36754c5c..f57406145c6a 100644
--- a/sci-libs/caffe2/caffe2-2.1.2-r6.ebuild
+++ b/sci-libs/caffe2/caffe2-2.1.2-r7.ebuild
@@ -4,7 +4,8 @@
 EAPI=8
 
 PYTHON_COMPAT=( python3_{9..12} )
-inherit python-single-r1 cmake cuda flag-o-matic prefix
+ROCM_VERSION=5.7
+inherit python-single-r1 cmake cuda flag-o-matic prefix rocm
 
 MYPN=pytorch
 MYP=${MYPN}-${PV}
@@ -17,7 +18,7 @@ SRC_URI="https://github.com/pytorch/${MYPN}/archive/refs/tags/v${PV}.tar.gz
 LICENSE="BSD"
 SLOT="0"
 KEYWORDS="~amd64"
-IUSE="cuda distributed fbgemm ffmpeg gloo mkl mpi nnpack +numpy onednn openblas opencl opencv openmp qnnpack tensorpipe xnnpack"
+IUSE="cuda distributed fbgemm ffmpeg gloo mkl mpi nnpack +numpy onednn openblas opencl opencv openmp qnnpack rocm tensorpipe xnnpack"
 RESTRICT="test"
 REQUIRED_USE="
 	${PYTHON_REQUIRED_USE}
@@ -26,7 +27,9 @@ REQUIRED_USE="
 	tensorpipe? ( distributed )
 	distributed? ( tensorpipe )
 	gloo? ( distributed )
-" # ?? ( cuda rocm )
+	?? ( cuda rocm )
+	rocm? ( || ( ${ROCM_REQUIRED_USE} ) )
+"
 
 # CUDA 12 not supported yet: https://github.com/pytorch/pytorch/issues/91122
 RDEPEND="
@@ -59,6 +62,20 @@ RDEPEND="
 	opencl? ( virtual/opencl )
 	opencv? ( media-libs/opencv:= )
 	qnnpack? ( sci-libs/QNNPACK )
+	rocm? (
+		>=dev-util/hip-5.7
+		>=dev-libs/rccl-5.7[${ROCM_USEDEP}]
+		>=sci-libs/rocThrust-5.7[${ROCM_USEDEP}]
+		>=sci-libs/rocPRIM-5.7[${ROCM_USEDEP}]
+		>=sci-libs/hipBLAS-5.7[${ROCM_USEDEP}]
+		>=sci-libs/hipFFT-5.7[${ROCM_USEDEP}]
+		>=sci-libs/hipSPARSE-5.7[${ROCM_USEDEP}]
+		>=sci-libs/hipRAND-5.7[${ROCM_USEDEP}]
+		>=sci-libs/hipCUB-5.7[${ROCM_USEDEP}]
+		>=sci-libs/hipSOLVER-5.7[${ROCM_USEDEP}]
+		>=sci-libs/miopen-5.7[${ROCM_USEDEP}]
+		>=dev-util/roctracer-5.7[${ROCM_USEDEP}]
+	)
 	tensorpipe? ( sci-libs/tensorpipe[cuda?] )
 	xnnpack? ( >=sci-libs/XNNPACK-2022.12.22 )
 	mkl? ( sci-libs/mkl )
@@ -92,6 +109,7 @@ PATCHES=(
 	"${FILESDIR}"/${PN}-2.1.1-cudaExtra.patch
 	"${FILESDIR}"/${PN}-2.1.2-fix-rpath.patch
 	"${FILESDIR}"/${PN}-2.1.2-fix-openmp-link.patch
+	"${FILESDIR}"/${PN}-2.1.2-rocm-fix-std-cpp17.patch
 )
 
 src_prepare() {
@@ -118,6 +136,18 @@ src_prepare() {
 		cmake/Dependencies.cmake \
 		torch/CMakeLists.txt \
 		CMakeLists.txt
+
+	if use rocm; then
+		sed -e "s:ROCM_PATH /opt/rocm:ROCM_PATH /usr:" \
+			-e "s:HIP_PATH \${ROCM_PATH}/hip:HIP_PATH /usr:" \
+			-e "s:\${HIP_PATH}/cmake:/usr/$(get_libdir)/cmake/hip:g" \
+			-e "s/HIP 1.0/HIP 1.0 REQUIRED/" \
+			-i cmake/public/LoadHIP.cmake || die
+
+		ebegin "HIPifying cuda sources"
+		${EPYTHON} tools/amd_build/build_amd.py || die
+		eend $?
+	fi
 }
 
 src_configure() {
@@ -140,9 +170,6 @@ src_configure() {
 
 		-DUSE_CCACHE=OFF
 		-DUSE_CUDA=$(usex cuda)
-		-DUSE_CUDNN=$(usex cuda)
-		-DTORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5 7.0}"
-		-DBUILD_NVFUSER=$(usex cuda)
 		-DUSE_DISTRIBUTED=$(usex distributed)
 		-DUSE_MPI=$(usex mpi)
 		-DUSE_FAKELOWP=OFF
@@ -155,7 +182,6 @@ src_configure() {
 		-DUSE_LEVELDB=OFF
 		-DUSE_MAGMA=OFF # TODO: In GURU as sci-libs/magma
 		-DUSE_MKLDNN=$(usex onednn)
-		-DUSE_NCCL=OFF # TODO: NVIDIA Collective Communication Library
 		-DUSE_NNPACK=$(usex nnpack)
 		-DUSE_QNNPACK=$(usex qnnpack)
 		-DUSE_XNNPACK=$(usex xnnpack)
@@ -166,7 +192,7 @@ src_configure() {
 		-DUSE_OPENCL=$(usex opencl)
 		-DUSE_OPENCV=$(usex opencv)
 		-DUSE_OPENMP=$(usex openmp)
-		-DUSE_ROCM=OFF # TODO
+		-DUSE_ROCM=$(usex rocm)
 		-DUSE_SYSTEM_CPUINFO=ON
 		-DUSE_SYSTEM_PYBIND11=ON
 		-DUSE_UCC=OFF
@@ -200,8 +226,20 @@ src_configure() {
 		addpredict "/dev/char"
 
 		mycmakeargs+=(
+			-DUSE_CUDNN=ON
+			-DTORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5 7.0}"
+			-DBUILD_NVFUSER=ON
+			-DUSE_NCCL=OFF # TODO: NVIDIA Collective Communication Library
 			-DCMAKE_CUDA_FLAGS="$(cuda_gccdir -f | tr -d \")"
 		)
+	elif use rocm; then
+		export PYTORCH_ROCM_ARCH="$(get_amdgpu_flags)"
+
+		mycmakeargs+=(
+			-DBUILD_NVFUSER=ON
+			-DUSE_NCCL=ON
+			-DUSE_SYSTEM_NCCL=ON
+		)
 	fi
 
 	if use onednn; then
@@ -214,6 +252,9 @@ src_configure() {
 	fi
 
 	cmake_src_configure
+
+	# do not rerun cmake and the build process in src_install
+	sed '/RERUN/,+1d' -i "${BUILD_DIR}"/build.ninja || die
 }
 
 src_install() {
@@ -225,7 +266,7 @@ src_install() {
 	rm -rf python
 	mkdir -p python/torch/include || die
 	mv "${ED}"/usr/lib/python*/site-packages/caffe2 python/ || die
-	if use cuda; then
+	if use cuda || use rocm; then
 		mv "${ED}${S}"/nvfuser python/nvfuser || die
 		mv "${ED}"/usr/$(get_libdir)/nvfuser.so python/nvfuser/_C.so || die
 	fi
@@ -234,7 +275,7 @@ src_install() {
 	python_domodule python/torch
 	ln -s ../../../../../include/torch \
 		"${D}$(python_get_sitedir)"/torch/include/torch || die # bug 923269
-	if use cuda; then
+	if use cuda || use rocm; then
 		python_domodule python/nvfuser
 	fi
 	rm -rf "${ED}${WORKDIR}"
diff --git a/sci-libs/caffe2/caffe2-2.2.1.ebuild b/sci-libs/caffe2/caffe2-2.2.1-r1.ebuild
index 6f96107154b7..80dc2b500a0f 100644
--- a/sci-libs/caffe2/caffe2-2.2.1.ebuild
+++ b/sci-libs/caffe2/caffe2-2.2.1-r1.ebuild
@@ -4,7 +4,8 @@
 EAPI=8
 
 PYTHON_COMPAT=( python3_{9..12} )
-inherit python-single-r1 cmake cuda flag-o-matic prefix
+ROCM_VERSION=5.7
+inherit python-single-r1 cmake cuda flag-o-matic prefix rocm
 
 MYPN=pytorch
 MYP=${MYPN}-${PV}
@@ -17,14 +18,16 @@ SRC_URI="https://github.com/pytorch/${MYPN}/archive/refs/tags/v${PV}.tar.gz
 LICENSE="BSD"
 SLOT="0"
 KEYWORDS="~amd64"
-IUSE="cuda distributed fbgemm ffmpeg gloo mkl mpi nnpack +numpy onednn openblas opencl opencv openmp qnnpack xnnpack"
+IUSE="cuda distributed fbgemm ffmpeg gloo mkl mpi nnpack +numpy onednn openblas opencl opencv openmp qnnpack rocm xnnpack"
 RESTRICT="test"
 REQUIRED_USE="
 	${PYTHON_REQUIRED_USE}
 	ffmpeg? ( opencv )
 	mpi? ( distributed )
 	gloo? ( distributed )
-" # ?? ( cuda rocm )
+	?? ( cuda rocm )
+	rocm? ( || ( ${ROCM_REQUIRED_USE} ) )
+"
 
 # CUDA 12 not supported yet: https://github.com/pytorch/pytorch/issues/91122
 RDEPEND="
@@ -57,6 +60,20 @@ RDEPEND="
 	opencl? ( virtual/opencl )
 	opencv? ( media-libs/opencv:= )
 	qnnpack? ( sci-libs/QNNPACK )
+	rocm? (
+		>=dev-util/hip-5.7
+		>=dev-libs/rccl-5.7[${ROCM_USEDEP}]
+		>=sci-libs/rocThrust-5.7[${ROCM_USEDEP}]
+		>=sci-libs/rocPRIM-5.7[${ROCM_USEDEP}]
+		>=sci-libs/hipBLAS-5.7[${ROCM_USEDEP}]
+		>=sci-libs/hipFFT-5.7[${ROCM_USEDEP}]
+		>=sci-libs/hipSPARSE-5.7[${ROCM_USEDEP}]
+		>=sci-libs/hipRAND-5.7[${ROCM_USEDEP}]
+		>=sci-libs/hipCUB-5.7[${ROCM_USEDEP}]
+		>=sci-libs/hipSOLVER-5.7[${ROCM_USEDEP}]
+		>=sci-libs/miopen-5.7[${ROCM_USEDEP}]
+		>=dev-util/roctracer-5.7[${ROCM_USEDEP}]
+	)
 	distributed? ( sci-libs/tensorpipe[cuda?] )
 	xnnpack? ( >=sci-libs/XNNPACK-2022.12.22 )
 	mkl? ( sci-libs/mkl )
@@ -89,6 +106,7 @@ PATCHES=(
 	"${FILESDIR}"/${PN}-2.0.0-cudnn_include_fix.patch
 	"${FILESDIR}"/${PN}-2.1.2-fix-rpath.patch
 	"${FILESDIR}"/${PN}-2.1.2-fix-openmp-link.patch
+	"${FILESDIR}"/${PN}-2.1.2-rocm-fix-std-cpp17.patch
 )
 
 src_prepare() {
@@ -115,6 +133,17 @@ src_prepare() {
 		cmake/Dependencies.cmake \
 		torch/CMakeLists.txt \
 		CMakeLists.txt
+
+	if use rocm; then
+		sed -e "s:/opt/rocm:/usr:" \
+			-e "s:lib/cmake:$(get_libdir)/cmake:g" \
+			-e "s/HIP 1.0/HIP 1.0 REQUIRED/" \
+			-i cmake/public/LoadHIP.cmake || die
+
+		ebegin "HIPifying cuda sources"
+		${EPYTHON} tools/amd_build/build_amd.py || die
+		eend $?
+	fi
 }
 
 src_configure() {
@@ -137,9 +166,6 @@ src_configure() {
 
 		-DUSE_CCACHE=OFF
 		-DUSE_CUDA=$(usex cuda)
-		-DUSE_CUDNN=$(usex cuda)
-		-DTORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5 7.0}"
-		-DBUILD_NVFUSER=$(usex cuda)
 		-DUSE_DISTRIBUTED=$(usex distributed)
 		-DUSE_MPI=$(usex mpi)
 		-DUSE_FAKELOWP=OFF
@@ -152,7 +178,6 @@ src_configure() {
 		-DUSE_LEVELDB=OFF
 		-DUSE_MAGMA=OFF # TODO: In GURU as sci-libs/magma
 		-DUSE_MKLDNN=$(usex onednn)
-		-DUSE_NCCL=OFF # TODO: NVIDIA Collective Communication Library
 		-DUSE_NNPACK=$(usex nnpack)
 		-DUSE_QNNPACK=$(usex qnnpack)
 		-DUSE_XNNPACK=$(usex xnnpack)
@@ -163,7 +188,7 @@ src_configure() {
 		-DUSE_OPENCL=$(usex opencl)
 		-DUSE_OPENCV=$(usex opencv)
 		-DUSE_OPENMP=$(usex openmp)
-		-DUSE_ROCM=OFF # TODO
+		-DUSE_ROCM=$(usex rocm)
 		-DUSE_SYSTEM_CPUINFO=ON
 		-DUSE_SYSTEM_PYBIND11=ON
 		-DUSE_UCC=OFF
@@ -197,8 +222,18 @@ src_configure() {
 		addpredict "/dev/char"
 
 		mycmakeargs+=(
+			-DUSE_CUDNN=ON
+			-DTORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5 7.0}"
+			-DUSE_NCCL=OFF # TODO: NVIDIA Collective Communication Library
 			-DCMAKE_CUDA_FLAGS="$(cuda_gccdir -f | tr -d \")"
 		)
+	elif use rocm; then
+		export PYTORCH_ROCM_ARCH="$(get_amdgpu_flags)"
+
+		mycmakeargs+=(
+			-DUSE_NCCL=ON
+			-DUSE_SYSTEM_NCCL=ON
+		)
 	fi
 
 	if use onednn; then
@@ -211,6 +246,9 @@ src_configure() {
 	fi
 
 	cmake_src_configure
+
+	# do not rerun cmake and the build process in src_install
+	sed '/RERUN/,+1d' -i "${BUILD_DIR}"/build.ninja || die
 }
 
 src_install() {
diff --git a/sci-libs/caffe2/files/caffe2-2.1.2-rocm-fix-std-cpp17.patch b/sci-libs/caffe2/files/caffe2-2.1.2-rocm-fix-std-cpp17.patch
new file mode 100644
index 000000000000..cb0fa0c48e80
--- /dev/null
+++ b/sci-libs/caffe2/files/caffe2-2.1.2-rocm-fix-std-cpp17.patch
@@ -0,0 +1,68 @@
+Fix for error: invalid argument '-std=c++17' not allowed with 'C'
+https://github.com/pytorch/pytorch/issues/103222
+--- a/c10/hip/CMakeLists.txt
++++ b/c10/hip/CMakeLists.txt
+@@ -30,6 +30,7 @@ hip_add_library(c10_hip ${C10_HIP_SRCS} ${C10_HIP_HEADERS})
+ 
+ # Propagate HIP_CXX_FLAGS that were set from Dependencies.cmake
+ target_compile_options(c10_hip PRIVATE ${HIP_CXX_FLAGS})
++set_target_properties(c10_hip PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS OFF)
+ 
+ # caffe2_hip adds a bunch of dependencies like rocsparse, but c10/hip is supposed to be
+ # minimal.  I'm not sure if we need hip_hcc or not; for now leave it out
+--- a/caffe2/CMakeLists.txt
++++ b/caffe2/CMakeLists.txt
+@@ -1598,6 +1598,7 @@ if(USE_ROCM)
+ 
+   # Since PyTorch files contain HIP headers, these flags are required for the necessary definitions to be added.
+   target_compile_options(torch_hip PUBLIC ${HIP_CXX_FLAGS})  # experiment
++  set_target_properties(torch_hip PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS OFF)
+   target_link_libraries(torch_hip PUBLIC c10_hip)
+ 
+   if(NOT INTERN_BUILD_MOBILE)
+@@ -1774,6 +1775,7 @@ if(BUILD_TEST)
+       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
+       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE} ${Caffe2_HIP_INCLUDE})
+       target_compile_options(${test_name} PRIVATE ${HIP_CXX_FLAGS})
++      set_target_properties(${test_name} PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS OFF)
+       add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
+       if(INSTALL_TEST)
+         install(TARGETS ${test_name} DESTINATION test)
+@@ -1955,6 +1957,7 @@ if(BUILD_PYTHON)
+     endif()
+     if(NOT MSVC)
+       target_compile_options(caffe2_pybind11_state_hip PRIVATE ${HIP_CXX_FLAGS} -fvisibility=hidden)
++      set_target_properties(caffe2_pybind11_state_hip PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS OFF)
+     endif()
+     set_target_properties(caffe2_pybind11_state_hip PROPERTIES PREFIX "")
+     set_target_properties(caffe2_pybind11_state_hip PROPERTIES SUFFIX ${PY_EXT_SUFFIX})
+--- a/cmake/Dependencies.cmake
++++ b/cmake/Dependencies.cmake
+@@ -1287,7 +1287,6 @@ if(USE_ROCM)
+     list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier)
+     list(APPEND HIP_CXX_FLAGS -DCAFFE2_USE_MIOPEN)
+     list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP)
+-    list(APPEND HIP_CXX_FLAGS -std=c++17)
+     add_definitions(-DROCM_VERSION=${ROCM_VERSION_DEV_INT})
+     add_definitions(-DTORCH_HIP_VERSION=${TORCH_HIP_VERSION})
+     message("TORCH_HIP_VERSION=${TORCH_HIP_VERSION} is added as a compiler defines")
+--- a/cmake/public/utils.cmake
++++ b/cmake/public/utils.cmake
+@@ -335,6 +335,7 @@ function(caffe2_hip_binary_target target_name_or_src)
+   caffe2_binary_target(${target_name_or_src})
+ 
+   target_compile_options(${__target} PRIVATE ${HIP_CXX_FLAGS})
++  set_target_properties(${__target} PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS OFF)
+   target_include_directories(${__target} PRIVATE ${Caffe2_HIP_INCLUDE})
+ endfunction()
+ 
+--- a/modules/detectron/CMakeLists.txt
++++ b/modules/detectron/CMakeLists.txt
+@@ -31,6 +31,7 @@ if(BUILD_CAFFE2_OPS)
+         ${Detectron_CPU_SRCS}
+         ${Detectron_HIP_SRCS})
+     target_compile_options(caffe2_detectron_ops_hip PRIVATE ${HIP_CXX_FLAGS})
++    set_target_properties(caffe2_detectron_ops_hip PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS OFF)
+     if(USE_MKLDNN)
+       target_link_libraries(caffe2_detectron_ops_hip PRIVATE caffe2::mkldnn)
+     endif()
diff --git a/sci-libs/caffe2/metadata.xml b/sci-libs/caffe2/metadata.xml
index 3fe84b0977fc..ed1f9fa58993 100644
--- a/sci-libs/caffe2/metadata.xml
+++ b/sci-libs/caffe2/metadata.xml
@@ -18,6 +18,7 @@
 		<flag name="opencv">Add support for image processing operators</flag>
 		<flag name="openmp">Use OpenMP for parallel code</flag>
 		<flag name="qnnpack">Use QNNPACK</flag>
+		<flag name="rocm">Enable ROCm gpu computing support</flag>
 		<flag name="tensorpipe">Use tensorpipe</flag>
 		<flag name="xnnpack">Use XNNPACK</flag>
 	</use>
author	Sv. Lockal <lockalsash@gmail.com>	2024-03-11 16:37:02 +0000
committer	Alfredo Tupone <tupone@gentoo.org>	2024-03-11 20:27:48 +0100
commit	a5bd494c9be931e7bdcf88f75f37d9f4d8594864 (patch)
tree	83ec33cb40798cab58d10826104a264fc126a381 /sci-libs/caffe2
parent	sci-libs/caffe2: fix compilation with USE=fbgemm (diff)
download	gentoo-a5bd494c9be931e7bdcf88f75f37d9f4d8594864.tar.gz gentoo-a5bd494c9be931e7bdcf88f75f37d9f4d8594864.tar.bz2 gentoo-a5bd494c9be931e7bdcf88f75f37d9f4d8594864.zip