summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSv. Lockal <lockalsash@gmail.com>2024-03-11 16:37:02 +0000
committerAlfredo Tupone <tupone@gentoo.org>2024-03-11 20:27:48 +0100
commita5bd494c9be931e7bdcf88f75f37d9f4d8594864 (patch)
tree83ec33cb40798cab58d10826104a264fc126a381 /sci-libs/caffe2
parentsci-libs/caffe2: fix compilation with USE=fbgemm (diff)
downloadgentoo-a5bd494c9be931e7bdcf88f75f37d9f4d8594864.tar.gz
gentoo-a5bd494c9be931e7bdcf88f75f37d9f4d8594864.tar.bz2
gentoo-a5bd494c9be931e7bdcf88f75f37d9f4d8594864.zip
sci-libs/caffe2: add USE=rocm flag for AMDGPU support for 2.1.2 and 2.2.1
Closes: https://bugs.gentoo.org/905286 Signed-off-by: Sv. Lockal <lockalsash@gmail.com> Closes: https://github.com/gentoo/gentoo/pull/35713 Signed-off-by: Alfredo Tupone <tupone@gentoo.org>
Diffstat (limited to 'sci-libs/caffe2')
-rw-r--r--sci-libs/caffe2/caffe2-2.1.2-r7.ebuild (renamed from sci-libs/caffe2/caffe2-2.1.2-r6.ebuild)61
-rw-r--r--sci-libs/caffe2/caffe2-2.2.1-r1.ebuild (renamed from sci-libs/caffe2/caffe2-2.2.1.ebuild)54
-rw-r--r--sci-libs/caffe2/files/caffe2-2.1.2-rocm-fix-std-cpp17.patch68
-rw-r--r--sci-libs/caffe2/metadata.xml1
4 files changed, 166 insertions, 18 deletions
diff --git a/sci-libs/caffe2/caffe2-2.1.2-r6.ebuild b/sci-libs/caffe2/caffe2-2.1.2-r7.ebuild
index 969c36754c5c..f57406145c6a 100644
--- a/sci-libs/caffe2/caffe2-2.1.2-r6.ebuild
+++ b/sci-libs/caffe2/caffe2-2.1.2-r7.ebuild
@@ -4,7 +4,8 @@
EAPI=8
PYTHON_COMPAT=( python3_{9..12} )
-inherit python-single-r1 cmake cuda flag-o-matic prefix
+ROCM_VERSION=5.7
+inherit python-single-r1 cmake cuda flag-o-matic prefix rocm
MYPN=pytorch
MYP=${MYPN}-${PV}
@@ -17,7 +18,7 @@ SRC_URI="https://github.com/pytorch/${MYPN}/archive/refs/tags/v${PV}.tar.gz
LICENSE="BSD"
SLOT="0"
KEYWORDS="~amd64"
-IUSE="cuda distributed fbgemm ffmpeg gloo mkl mpi nnpack +numpy onednn openblas opencl opencv openmp qnnpack tensorpipe xnnpack"
+IUSE="cuda distributed fbgemm ffmpeg gloo mkl mpi nnpack +numpy onednn openblas opencl opencv openmp qnnpack rocm tensorpipe xnnpack"
RESTRICT="test"
REQUIRED_USE="
${PYTHON_REQUIRED_USE}
@@ -26,7 +27,9 @@ REQUIRED_USE="
tensorpipe? ( distributed )
distributed? ( tensorpipe )
gloo? ( distributed )
-" # ?? ( cuda rocm )
+ ?? ( cuda rocm )
+ rocm? ( || ( ${ROCM_REQUIRED_USE} ) )
+"
# CUDA 12 not supported yet: https://github.com/pytorch/pytorch/issues/91122
RDEPEND="
@@ -59,6 +62,20 @@ RDEPEND="
opencl? ( virtual/opencl )
opencv? ( media-libs/opencv:= )
qnnpack? ( sci-libs/QNNPACK )
+ rocm? (
+ >=dev-util/hip-5.7
+ >=dev-libs/rccl-5.7[${ROCM_USEDEP}]
+ >=sci-libs/rocThrust-5.7[${ROCM_USEDEP}]
+ >=sci-libs/rocPRIM-5.7[${ROCM_USEDEP}]
+ >=sci-libs/hipBLAS-5.7[${ROCM_USEDEP}]
+ >=sci-libs/hipFFT-5.7[${ROCM_USEDEP}]
+ >=sci-libs/hipSPARSE-5.7[${ROCM_USEDEP}]
+ >=sci-libs/hipRAND-5.7[${ROCM_USEDEP}]
+ >=sci-libs/hipCUB-5.7[${ROCM_USEDEP}]
+ >=sci-libs/hipSOLVER-5.7[${ROCM_USEDEP}]
+ >=sci-libs/miopen-5.7[${ROCM_USEDEP}]
+ >=dev-util/roctracer-5.7[${ROCM_USEDEP}]
+ )
tensorpipe? ( sci-libs/tensorpipe[cuda?] )
xnnpack? ( >=sci-libs/XNNPACK-2022.12.22 )
mkl? ( sci-libs/mkl )
@@ -92,6 +109,7 @@ PATCHES=(
"${FILESDIR}"/${PN}-2.1.1-cudaExtra.patch
"${FILESDIR}"/${PN}-2.1.2-fix-rpath.patch
"${FILESDIR}"/${PN}-2.1.2-fix-openmp-link.patch
+ "${FILESDIR}"/${PN}-2.1.2-rocm-fix-std-cpp17.patch
)
src_prepare() {
@@ -118,6 +136,18 @@ src_prepare() {
cmake/Dependencies.cmake \
torch/CMakeLists.txt \
CMakeLists.txt
+
+ if use rocm; then
+ sed -e "s:ROCM_PATH /opt/rocm:ROCM_PATH /usr:" \
+ -e "s:HIP_PATH \${ROCM_PATH}/hip:HIP_PATH /usr:" \
+ -e "s:\${HIP_PATH}/cmake:/usr/$(get_libdir)/cmake/hip:g" \
+ -e "s/HIP 1.0/HIP 1.0 REQUIRED/" \
+ -i cmake/public/LoadHIP.cmake || die
+
+ ebegin "HIPifying cuda sources"
+ ${EPYTHON} tools/amd_build/build_amd.py || die
+ eend $?
+ fi
}
src_configure() {
@@ -140,9 +170,6 @@ src_configure() {
-DUSE_CCACHE=OFF
-DUSE_CUDA=$(usex cuda)
- -DUSE_CUDNN=$(usex cuda)
- -DTORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5 7.0}"
- -DBUILD_NVFUSER=$(usex cuda)
-DUSE_DISTRIBUTED=$(usex distributed)
-DUSE_MPI=$(usex mpi)
-DUSE_FAKELOWP=OFF
@@ -155,7 +182,6 @@ src_configure() {
-DUSE_LEVELDB=OFF
-DUSE_MAGMA=OFF # TODO: In GURU as sci-libs/magma
-DUSE_MKLDNN=$(usex onednn)
- -DUSE_NCCL=OFF # TODO: NVIDIA Collective Communication Library
-DUSE_NNPACK=$(usex nnpack)
-DUSE_QNNPACK=$(usex qnnpack)
-DUSE_XNNPACK=$(usex xnnpack)
@@ -166,7 +192,7 @@ src_configure() {
-DUSE_OPENCL=$(usex opencl)
-DUSE_OPENCV=$(usex opencv)
-DUSE_OPENMP=$(usex openmp)
- -DUSE_ROCM=OFF # TODO
+ -DUSE_ROCM=$(usex rocm)
-DUSE_SYSTEM_CPUINFO=ON
-DUSE_SYSTEM_PYBIND11=ON
-DUSE_UCC=OFF
@@ -200,8 +226,20 @@ src_configure() {
addpredict "/dev/char"
mycmakeargs+=(
+ -DUSE_CUDNN=ON
+ -DTORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5 7.0}"
+ -DBUILD_NVFUSER=ON
+ -DUSE_NCCL=OFF # TODO: NVIDIA Collective Communication Library
-DCMAKE_CUDA_FLAGS="$(cuda_gccdir -f | tr -d \")"
)
+ elif use rocm; then
+ export PYTORCH_ROCM_ARCH="$(get_amdgpu_flags)"
+
+ mycmakeargs+=(
+ -DBUILD_NVFUSER=ON
+ -DUSE_NCCL=ON
+ -DUSE_SYSTEM_NCCL=ON
+ )
fi
if use onednn; then
@@ -214,6 +252,9 @@ src_configure() {
fi
cmake_src_configure
+
+ # do not rerun cmake and the build process in src_install
+ sed '/RERUN/,+1d' -i "${BUILD_DIR}"/build.ninja || die
}
src_install() {
@@ -225,7 +266,7 @@ src_install() {
rm -rf python
mkdir -p python/torch/include || die
mv "${ED}"/usr/lib/python*/site-packages/caffe2 python/ || die
- if use cuda; then
+ if use cuda || use rocm; then
mv "${ED}${S}"/nvfuser python/nvfuser || die
mv "${ED}"/usr/$(get_libdir)/nvfuser.so python/nvfuser/_C.so || die
fi
@@ -234,7 +275,7 @@ src_install() {
python_domodule python/torch
ln -s ../../../../../include/torch \
"${D}$(python_get_sitedir)"/torch/include/torch || die # bug 923269
- if use cuda; then
+ if use cuda || use rocm; then
python_domodule python/nvfuser
fi
rm -rf "${ED}${WORKDIR}"
diff --git a/sci-libs/caffe2/caffe2-2.2.1.ebuild b/sci-libs/caffe2/caffe2-2.2.1-r1.ebuild
index 6f96107154b7..80dc2b500a0f 100644
--- a/sci-libs/caffe2/caffe2-2.2.1.ebuild
+++ b/sci-libs/caffe2/caffe2-2.2.1-r1.ebuild
@@ -4,7 +4,8 @@
EAPI=8
PYTHON_COMPAT=( python3_{9..12} )
-inherit python-single-r1 cmake cuda flag-o-matic prefix
+ROCM_VERSION=5.7
+inherit python-single-r1 cmake cuda flag-o-matic prefix rocm
MYPN=pytorch
MYP=${MYPN}-${PV}
@@ -17,14 +18,16 @@ SRC_URI="https://github.com/pytorch/${MYPN}/archive/refs/tags/v${PV}.tar.gz
LICENSE="BSD"
SLOT="0"
KEYWORDS="~amd64"
-IUSE="cuda distributed fbgemm ffmpeg gloo mkl mpi nnpack +numpy onednn openblas opencl opencv openmp qnnpack xnnpack"
+IUSE="cuda distributed fbgemm ffmpeg gloo mkl mpi nnpack +numpy onednn openblas opencl opencv openmp qnnpack rocm xnnpack"
RESTRICT="test"
REQUIRED_USE="
${PYTHON_REQUIRED_USE}
ffmpeg? ( opencv )
mpi? ( distributed )
gloo? ( distributed )
-" # ?? ( cuda rocm )
+ ?? ( cuda rocm )
+ rocm? ( || ( ${ROCM_REQUIRED_USE} ) )
+"
# CUDA 12 not supported yet: https://github.com/pytorch/pytorch/issues/91122
RDEPEND="
@@ -57,6 +60,20 @@ RDEPEND="
opencl? ( virtual/opencl )
opencv? ( media-libs/opencv:= )
qnnpack? ( sci-libs/QNNPACK )
+ rocm? (
+ >=dev-util/hip-5.7
+ >=dev-libs/rccl-5.7[${ROCM_USEDEP}]
+ >=sci-libs/rocThrust-5.7[${ROCM_USEDEP}]
+ >=sci-libs/rocPRIM-5.7[${ROCM_USEDEP}]
+ >=sci-libs/hipBLAS-5.7[${ROCM_USEDEP}]
+ >=sci-libs/hipFFT-5.7[${ROCM_USEDEP}]
+ >=sci-libs/hipSPARSE-5.7[${ROCM_USEDEP}]
+ >=sci-libs/hipRAND-5.7[${ROCM_USEDEP}]
+ >=sci-libs/hipCUB-5.7[${ROCM_USEDEP}]
+ >=sci-libs/hipSOLVER-5.7[${ROCM_USEDEP}]
+ >=sci-libs/miopen-5.7[${ROCM_USEDEP}]
+ >=dev-util/roctracer-5.7[${ROCM_USEDEP}]
+ )
distributed? ( sci-libs/tensorpipe[cuda?] )
xnnpack? ( >=sci-libs/XNNPACK-2022.12.22 )
mkl? ( sci-libs/mkl )
@@ -89,6 +106,7 @@ PATCHES=(
"${FILESDIR}"/${PN}-2.0.0-cudnn_include_fix.patch
"${FILESDIR}"/${PN}-2.1.2-fix-rpath.patch
"${FILESDIR}"/${PN}-2.1.2-fix-openmp-link.patch
+ "${FILESDIR}"/${PN}-2.1.2-rocm-fix-std-cpp17.patch
)
src_prepare() {
@@ -115,6 +133,17 @@ src_prepare() {
cmake/Dependencies.cmake \
torch/CMakeLists.txt \
CMakeLists.txt
+
+ if use rocm; then
+ sed -e "s:/opt/rocm:/usr:" \
+ -e "s:lib/cmake:$(get_libdir)/cmake:g" \
+ -e "s/HIP 1.0/HIP 1.0 REQUIRED/" \
+ -i cmake/public/LoadHIP.cmake || die
+
+ ebegin "HIPifying cuda sources"
+ ${EPYTHON} tools/amd_build/build_amd.py || die
+ eend $?
+ fi
}
src_configure() {
@@ -137,9 +166,6 @@ src_configure() {
-DUSE_CCACHE=OFF
-DUSE_CUDA=$(usex cuda)
- -DUSE_CUDNN=$(usex cuda)
- -DTORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5 7.0}"
- -DBUILD_NVFUSER=$(usex cuda)
-DUSE_DISTRIBUTED=$(usex distributed)
-DUSE_MPI=$(usex mpi)
-DUSE_FAKELOWP=OFF
@@ -152,7 +178,6 @@ src_configure() {
-DUSE_LEVELDB=OFF
-DUSE_MAGMA=OFF # TODO: In GURU as sci-libs/magma
-DUSE_MKLDNN=$(usex onednn)
- -DUSE_NCCL=OFF # TODO: NVIDIA Collective Communication Library
-DUSE_NNPACK=$(usex nnpack)
-DUSE_QNNPACK=$(usex qnnpack)
-DUSE_XNNPACK=$(usex xnnpack)
@@ -163,7 +188,7 @@ src_configure() {
-DUSE_OPENCL=$(usex opencl)
-DUSE_OPENCV=$(usex opencv)
-DUSE_OPENMP=$(usex openmp)
- -DUSE_ROCM=OFF # TODO
+ -DUSE_ROCM=$(usex rocm)
-DUSE_SYSTEM_CPUINFO=ON
-DUSE_SYSTEM_PYBIND11=ON
-DUSE_UCC=OFF
@@ -197,8 +222,18 @@ src_configure() {
addpredict "/dev/char"
mycmakeargs+=(
+ -DUSE_CUDNN=ON
+ -DTORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5 7.0}"
+ -DUSE_NCCL=OFF # TODO: NVIDIA Collective Communication Library
-DCMAKE_CUDA_FLAGS="$(cuda_gccdir -f | tr -d \")"
)
+ elif use rocm; then
+ export PYTORCH_ROCM_ARCH="$(get_amdgpu_flags)"
+
+ mycmakeargs+=(
+ -DUSE_NCCL=ON
+ -DUSE_SYSTEM_NCCL=ON
+ )
fi
if use onednn; then
@@ -211,6 +246,9 @@ src_configure() {
fi
cmake_src_configure
+
+ # do not rerun cmake and the build process in src_install
+ sed '/RERUN/,+1d' -i "${BUILD_DIR}"/build.ninja || die
}
src_install() {
diff --git a/sci-libs/caffe2/files/caffe2-2.1.2-rocm-fix-std-cpp17.patch b/sci-libs/caffe2/files/caffe2-2.1.2-rocm-fix-std-cpp17.patch
new file mode 100644
index 000000000000..cb0fa0c48e80
--- /dev/null
+++ b/sci-libs/caffe2/files/caffe2-2.1.2-rocm-fix-std-cpp17.patch
@@ -0,0 +1,68 @@
+Fix for error: invalid argument '-std=c++17' not allowed with 'C'
+https://github.com/pytorch/pytorch/issues/103222
+--- a/c10/hip/CMakeLists.txt
++++ b/c10/hip/CMakeLists.txt
+@@ -30,6 +30,7 @@ hip_add_library(c10_hip ${C10_HIP_SRCS} ${C10_HIP_HEADERS})
+
+ # Propagate HIP_CXX_FLAGS that were set from Dependencies.cmake
+ target_compile_options(c10_hip PRIVATE ${HIP_CXX_FLAGS})
++set_target_properties(c10_hip PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS OFF)
+
+ # caffe2_hip adds a bunch of dependencies like rocsparse, but c10/hip is supposed to be
+ # minimal. I'm not sure if we need hip_hcc or not; for now leave it out
+--- a/caffe2/CMakeLists.txt
++++ b/caffe2/CMakeLists.txt
+@@ -1598,6 +1598,7 @@ if(USE_ROCM)
+
+ # Since PyTorch files contain HIP headers, these flags are required for the necessary definitions to be added.
+ target_compile_options(torch_hip PUBLIC ${HIP_CXX_FLAGS}) # experiment
++ set_target_properties(torch_hip PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS OFF)
+ target_link_libraries(torch_hip PUBLIC c10_hip)
+
+ if(NOT INTERN_BUILD_MOBILE)
+@@ -1774,6 +1775,7 @@ if(BUILD_TEST)
+ target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
+ target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE} ${Caffe2_HIP_INCLUDE})
+ target_compile_options(${test_name} PRIVATE ${HIP_CXX_FLAGS})
++ set_target_properties(${test_name} PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS OFF)
+ add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
+ if(INSTALL_TEST)
+ install(TARGETS ${test_name} DESTINATION test)
+@@ -1955,6 +1957,7 @@ if(BUILD_PYTHON)
+ endif()
+ if(NOT MSVC)
+ target_compile_options(caffe2_pybind11_state_hip PRIVATE ${HIP_CXX_FLAGS} -fvisibility=hidden)
++ set_target_properties(caffe2_pybind11_state_hip PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS OFF)
+ endif()
+ set_target_properties(caffe2_pybind11_state_hip PROPERTIES PREFIX "")
+ set_target_properties(caffe2_pybind11_state_hip PROPERTIES SUFFIX ${PY_EXT_SUFFIX})
+--- a/cmake/Dependencies.cmake
++++ b/cmake/Dependencies.cmake
+@@ -1287,7 +1287,6 @@ if(USE_ROCM)
+ list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier)
+ list(APPEND HIP_CXX_FLAGS -DCAFFE2_USE_MIOPEN)
+ list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP)
+- list(APPEND HIP_CXX_FLAGS -std=c++17)
+ add_definitions(-DROCM_VERSION=${ROCM_VERSION_DEV_INT})
+ add_definitions(-DTORCH_HIP_VERSION=${TORCH_HIP_VERSION})
+ message("TORCH_HIP_VERSION=${TORCH_HIP_VERSION} is added as a compiler defines")
+--- a/cmake/public/utils.cmake
++++ b/cmake/public/utils.cmake
+@@ -335,6 +335,7 @@ function(caffe2_hip_binary_target target_name_or_src)
+ caffe2_binary_target(${target_name_or_src})
+
+ target_compile_options(${__target} PRIVATE ${HIP_CXX_FLAGS})
++ set_target_properties(${__target} PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS OFF)
+ target_include_directories(${__target} PRIVATE ${Caffe2_HIP_INCLUDE})
+ endfunction()
+
+--- a/modules/detectron/CMakeLists.txt
++++ b/modules/detectron/CMakeLists.txt
+@@ -31,6 +31,7 @@ if(BUILD_CAFFE2_OPS)
+ ${Detectron_CPU_SRCS}
+ ${Detectron_HIP_SRCS})
+ target_compile_options(caffe2_detectron_ops_hip PRIVATE ${HIP_CXX_FLAGS})
++ set_target_properties(caffe2_detectron_ops_hip PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS OFF)
+ if(USE_MKLDNN)
+ target_link_libraries(caffe2_detectron_ops_hip PRIVATE caffe2::mkldnn)
+ endif()
diff --git a/sci-libs/caffe2/metadata.xml b/sci-libs/caffe2/metadata.xml
index 3fe84b0977fc..ed1f9fa58993 100644
--- a/sci-libs/caffe2/metadata.xml
+++ b/sci-libs/caffe2/metadata.xml
@@ -18,6 +18,7 @@
<flag name="opencv">Add support for image processing operators</flag>
<flag name="openmp">Use OpenMP for parallel code</flag>
<flag name="qnnpack">Use QNNPACK</flag>
+ <flag name="rocm">Enable ROCm gpu computing support</flag>
<flag name="tensorpipe">Use tensorpipe</flag>
<flag name="xnnpack">Use XNNPACK</flag>
</use>