diff options
author | Sv. Lockal <lockalsash@gmail.com> | 2024-03-11 16:37:02 +0000 |
---|---|---|
committer | Alfredo Tupone <tupone@gentoo.org> | 2024-03-11 20:27:48 +0100 |
commit | a5bd494c9be931e7bdcf88f75f37d9f4d8594864 (patch) | |
tree | 83ec33cb40798cab58d10826104a264fc126a381 /sci-libs/caffe2 | |
parent | sci-libs/caffe2: fix compilation with USE=fbgemm (diff) | |
download | gentoo-a5bd494c9be931e7bdcf88f75f37d9f4d8594864.tar.gz gentoo-a5bd494c9be931e7bdcf88f75f37d9f4d8594864.tar.bz2 gentoo-a5bd494c9be931e7bdcf88f75f37d9f4d8594864.zip |
sci-libs/caffe2: add USE=rocm flag for AMDGPU support for 2.1.2 and 2.2.1
Closes: https://bugs.gentoo.org/905286
Signed-off-by: Sv. Lockal <lockalsash@gmail.com>
Closes: https://github.com/gentoo/gentoo/pull/35713
Signed-off-by: Alfredo Tupone <tupone@gentoo.org>
Diffstat (limited to 'sci-libs/caffe2')
-rw-r--r-- | sci-libs/caffe2/caffe2-2.1.2-r7.ebuild (renamed from sci-libs/caffe2/caffe2-2.1.2-r6.ebuild) | 61 | ||||
-rw-r--r-- | sci-libs/caffe2/caffe2-2.2.1-r1.ebuild (renamed from sci-libs/caffe2/caffe2-2.2.1.ebuild) | 54 | ||||
-rw-r--r-- | sci-libs/caffe2/files/caffe2-2.1.2-rocm-fix-std-cpp17.patch | 68 | ||||
-rw-r--r-- | sci-libs/caffe2/metadata.xml | 1 |
4 files changed, 166 insertions, 18 deletions
diff --git a/sci-libs/caffe2/caffe2-2.1.2-r6.ebuild b/sci-libs/caffe2/caffe2-2.1.2-r7.ebuild index 969c36754c5c..f57406145c6a 100644 --- a/sci-libs/caffe2/caffe2-2.1.2-r6.ebuild +++ b/sci-libs/caffe2/caffe2-2.1.2-r7.ebuild @@ -4,7 +4,8 @@ EAPI=8 PYTHON_COMPAT=( python3_{9..12} ) -inherit python-single-r1 cmake cuda flag-o-matic prefix +ROCM_VERSION=5.7 +inherit python-single-r1 cmake cuda flag-o-matic prefix rocm MYPN=pytorch MYP=${MYPN}-${PV} @@ -17,7 +18,7 @@ SRC_URI="https://github.com/pytorch/${MYPN}/archive/refs/tags/v${PV}.tar.gz LICENSE="BSD" SLOT="0" KEYWORDS="~amd64" -IUSE="cuda distributed fbgemm ffmpeg gloo mkl mpi nnpack +numpy onednn openblas opencl opencv openmp qnnpack tensorpipe xnnpack" +IUSE="cuda distributed fbgemm ffmpeg gloo mkl mpi nnpack +numpy onednn openblas opencl opencv openmp qnnpack rocm tensorpipe xnnpack" RESTRICT="test" REQUIRED_USE=" ${PYTHON_REQUIRED_USE} @@ -26,7 +27,9 @@ REQUIRED_USE=" tensorpipe? ( distributed ) distributed? ( tensorpipe ) gloo? ( distributed ) -" # ?? ( cuda rocm ) + ?? ( cuda rocm ) + rocm? ( || ( ${ROCM_REQUIRED_USE} ) ) +" # CUDA 12 not supported yet: https://github.com/pytorch/pytorch/issues/91122 RDEPEND=" @@ -59,6 +62,20 @@ RDEPEND=" opencl? ( virtual/opencl ) opencv? ( media-libs/opencv:= ) qnnpack? ( sci-libs/QNNPACK ) + rocm? ( + >=dev-util/hip-5.7 + >=dev-libs/rccl-5.7[${ROCM_USEDEP}] + >=sci-libs/rocThrust-5.7[${ROCM_USEDEP}] + >=sci-libs/rocPRIM-5.7[${ROCM_USEDEP}] + >=sci-libs/hipBLAS-5.7[${ROCM_USEDEP}] + >=sci-libs/hipFFT-5.7[${ROCM_USEDEP}] + >=sci-libs/hipSPARSE-5.7[${ROCM_USEDEP}] + >=sci-libs/hipRAND-5.7[${ROCM_USEDEP}] + >=sci-libs/hipCUB-5.7[${ROCM_USEDEP}] + >=sci-libs/hipSOLVER-5.7[${ROCM_USEDEP}] + >=sci-libs/miopen-5.7[${ROCM_USEDEP}] + >=dev-util/roctracer-5.7[${ROCM_USEDEP}] + ) tensorpipe? ( sci-libs/tensorpipe[cuda?] ) xnnpack? ( >=sci-libs/XNNPACK-2022.12.22 ) mkl? ( sci-libs/mkl ) @@ -92,6 +109,7 @@ PATCHES=( "${FILESDIR}"/${PN}-2.1.1-cudaExtra.patch "${FILESDIR}"/${PN}-2.1.2-fix-rpath.patch "${FILESDIR}"/${PN}-2.1.2-fix-openmp-link.patch + "${FILESDIR}"/${PN}-2.1.2-rocm-fix-std-cpp17.patch ) src_prepare() { @@ -118,6 +136,18 @@ src_prepare() { cmake/Dependencies.cmake \ torch/CMakeLists.txt \ CMakeLists.txt + + if use rocm; then + sed -e "s:ROCM_PATH /opt/rocm:ROCM_PATH /usr:" \ + -e "s:HIP_PATH \${ROCM_PATH}/hip:HIP_PATH /usr:" \ + -e "s:\${HIP_PATH}/cmake:/usr/$(get_libdir)/cmake/hip:g" \ + -e "s/HIP 1.0/HIP 1.0 REQUIRED/" \ + -i cmake/public/LoadHIP.cmake || die + + ebegin "HIPifying cuda sources" + ${EPYTHON} tools/amd_build/build_amd.py || die + eend $? + fi } src_configure() { @@ -140,9 +170,6 @@ src_configure() { -DUSE_CCACHE=OFF -DUSE_CUDA=$(usex cuda) - -DUSE_CUDNN=$(usex cuda) - -DTORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5 7.0}" - -DBUILD_NVFUSER=$(usex cuda) -DUSE_DISTRIBUTED=$(usex distributed) -DUSE_MPI=$(usex mpi) -DUSE_FAKELOWP=OFF @@ -155,7 +182,6 @@ src_configure() { -DUSE_LEVELDB=OFF -DUSE_MAGMA=OFF # TODO: In GURU as sci-libs/magma -DUSE_MKLDNN=$(usex onednn) - -DUSE_NCCL=OFF # TODO: NVIDIA Collective Communication Library -DUSE_NNPACK=$(usex nnpack) -DUSE_QNNPACK=$(usex qnnpack) -DUSE_XNNPACK=$(usex xnnpack) @@ -166,7 +192,7 @@ src_configure() { -DUSE_OPENCL=$(usex opencl) -DUSE_OPENCV=$(usex opencv) -DUSE_OPENMP=$(usex openmp) - -DUSE_ROCM=OFF # TODO + -DUSE_ROCM=$(usex rocm) -DUSE_SYSTEM_CPUINFO=ON -DUSE_SYSTEM_PYBIND11=ON -DUSE_UCC=OFF @@ -200,8 +226,20 @@ src_configure() { addpredict "/dev/char" mycmakeargs+=( + -DUSE_CUDNN=ON + -DTORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5 7.0}" + -DBUILD_NVFUSER=ON + -DUSE_NCCL=OFF # TODO: NVIDIA Collective Communication Library -DCMAKE_CUDA_FLAGS="$(cuda_gccdir -f | tr -d \")" ) + elif use rocm; then + export PYTORCH_ROCM_ARCH="$(get_amdgpu_flags)" + + mycmakeargs+=( + -DBUILD_NVFUSER=ON + -DUSE_NCCL=ON + -DUSE_SYSTEM_NCCL=ON + ) fi if use onednn; then @@ -214,6 +252,9 @@ src_configure() { fi cmake_src_configure + + # do not rerun cmake and the build process in src_install + sed '/RERUN/,+1d' -i "${BUILD_DIR}"/build.ninja || die } src_install() { @@ -225,7 +266,7 @@ src_install() { rm -rf python mkdir -p python/torch/include || die mv "${ED}"/usr/lib/python*/site-packages/caffe2 python/ || die - if use cuda; then + if use cuda || use rocm; then mv "${ED}${S}"/nvfuser python/nvfuser || die mv "${ED}"/usr/$(get_libdir)/nvfuser.so python/nvfuser/_C.so || die fi @@ -234,7 +275,7 @@ src_install() { python_domodule python/torch ln -s ../../../../../include/torch \ "${D}$(python_get_sitedir)"/torch/include/torch || die # bug 923269 - if use cuda; then + if use cuda || use rocm; then python_domodule python/nvfuser fi rm -rf "${ED}${WORKDIR}" diff --git a/sci-libs/caffe2/caffe2-2.2.1.ebuild b/sci-libs/caffe2/caffe2-2.2.1-r1.ebuild index 6f96107154b7..80dc2b500a0f 100644 --- a/sci-libs/caffe2/caffe2-2.2.1.ebuild +++ b/sci-libs/caffe2/caffe2-2.2.1-r1.ebuild @@ -4,7 +4,8 @@ EAPI=8 PYTHON_COMPAT=( python3_{9..12} ) -inherit python-single-r1 cmake cuda flag-o-matic prefix +ROCM_VERSION=5.7 +inherit python-single-r1 cmake cuda flag-o-matic prefix rocm MYPN=pytorch MYP=${MYPN}-${PV} @@ -17,14 +18,16 @@ SRC_URI="https://github.com/pytorch/${MYPN}/archive/refs/tags/v${PV}.tar.gz LICENSE="BSD" SLOT="0" KEYWORDS="~amd64" -IUSE="cuda distributed fbgemm ffmpeg gloo mkl mpi nnpack +numpy onednn openblas opencl opencv openmp qnnpack xnnpack" +IUSE="cuda distributed fbgemm ffmpeg gloo mkl mpi nnpack +numpy onednn openblas opencl opencv openmp qnnpack rocm xnnpack" RESTRICT="test" REQUIRED_USE=" ${PYTHON_REQUIRED_USE} ffmpeg? ( opencv ) mpi? ( distributed ) gloo? ( distributed ) -" # ?? ( cuda rocm ) + ?? ( cuda rocm ) + rocm? ( || ( ${ROCM_REQUIRED_USE} ) ) +" # CUDA 12 not supported yet: https://github.com/pytorch/pytorch/issues/91122 RDEPEND=" @@ -57,6 +60,20 @@ RDEPEND=" opencl? ( virtual/opencl ) opencv? ( media-libs/opencv:= ) qnnpack? ( sci-libs/QNNPACK ) + rocm? ( + >=dev-util/hip-5.7 + >=dev-libs/rccl-5.7[${ROCM_USEDEP}] + >=sci-libs/rocThrust-5.7[${ROCM_USEDEP}] + >=sci-libs/rocPRIM-5.7[${ROCM_USEDEP}] + >=sci-libs/hipBLAS-5.7[${ROCM_USEDEP}] + >=sci-libs/hipFFT-5.7[${ROCM_USEDEP}] + >=sci-libs/hipSPARSE-5.7[${ROCM_USEDEP}] + >=sci-libs/hipRAND-5.7[${ROCM_USEDEP}] + >=sci-libs/hipCUB-5.7[${ROCM_USEDEP}] + >=sci-libs/hipSOLVER-5.7[${ROCM_USEDEP}] + >=sci-libs/miopen-5.7[${ROCM_USEDEP}] + >=dev-util/roctracer-5.7[${ROCM_USEDEP}] + ) distributed? ( sci-libs/tensorpipe[cuda?] ) xnnpack? ( >=sci-libs/XNNPACK-2022.12.22 ) mkl? ( sci-libs/mkl ) @@ -89,6 +106,7 @@ PATCHES=( "${FILESDIR}"/${PN}-2.0.0-cudnn_include_fix.patch "${FILESDIR}"/${PN}-2.1.2-fix-rpath.patch "${FILESDIR}"/${PN}-2.1.2-fix-openmp-link.patch + "${FILESDIR}"/${PN}-2.1.2-rocm-fix-std-cpp17.patch ) src_prepare() { @@ -115,6 +133,17 @@ src_prepare() { cmake/Dependencies.cmake \ torch/CMakeLists.txt \ CMakeLists.txt + + if use rocm; then + sed -e "s:/opt/rocm:/usr:" \ + -e "s:lib/cmake:$(get_libdir)/cmake:g" \ + -e "s/HIP 1.0/HIP 1.0 REQUIRED/" \ + -i cmake/public/LoadHIP.cmake || die + + ebegin "HIPifying cuda sources" + ${EPYTHON} tools/amd_build/build_amd.py || die + eend $? + fi } src_configure() { @@ -137,9 +166,6 @@ src_configure() { -DUSE_CCACHE=OFF -DUSE_CUDA=$(usex cuda) - -DUSE_CUDNN=$(usex cuda) - -DTORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5 7.0}" - -DBUILD_NVFUSER=$(usex cuda) -DUSE_DISTRIBUTED=$(usex distributed) -DUSE_MPI=$(usex mpi) -DUSE_FAKELOWP=OFF @@ -152,7 +178,6 @@ src_configure() { -DUSE_LEVELDB=OFF -DUSE_MAGMA=OFF # TODO: In GURU as sci-libs/magma -DUSE_MKLDNN=$(usex onednn) - -DUSE_NCCL=OFF # TODO: NVIDIA Collective Communication Library -DUSE_NNPACK=$(usex nnpack) -DUSE_QNNPACK=$(usex qnnpack) -DUSE_XNNPACK=$(usex xnnpack) @@ -163,7 +188,7 @@ src_configure() { -DUSE_OPENCL=$(usex opencl) -DUSE_OPENCV=$(usex opencv) -DUSE_OPENMP=$(usex openmp) - -DUSE_ROCM=OFF # TODO + -DUSE_ROCM=$(usex rocm) -DUSE_SYSTEM_CPUINFO=ON -DUSE_SYSTEM_PYBIND11=ON -DUSE_UCC=OFF @@ -197,8 +222,18 @@ src_configure() { addpredict "/dev/char" mycmakeargs+=( + -DUSE_CUDNN=ON + -DTORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5 7.0}" + -DUSE_NCCL=OFF # TODO: NVIDIA Collective Communication Library -DCMAKE_CUDA_FLAGS="$(cuda_gccdir -f | tr -d \")" ) + elif use rocm; then + export PYTORCH_ROCM_ARCH="$(get_amdgpu_flags)" + + mycmakeargs+=( + -DUSE_NCCL=ON + -DUSE_SYSTEM_NCCL=ON + ) fi if use onednn; then @@ -211,6 +246,9 @@ src_configure() { fi cmake_src_configure + + # do not rerun cmake and the build process in src_install + sed '/RERUN/,+1d' -i "${BUILD_DIR}"/build.ninja || die } src_install() { diff --git a/sci-libs/caffe2/files/caffe2-2.1.2-rocm-fix-std-cpp17.patch b/sci-libs/caffe2/files/caffe2-2.1.2-rocm-fix-std-cpp17.patch new file mode 100644 index 000000000000..cb0fa0c48e80 --- /dev/null +++ b/sci-libs/caffe2/files/caffe2-2.1.2-rocm-fix-std-cpp17.patch @@ -0,0 +1,68 @@ +Fix for error: invalid argument '-std=c++17' not allowed with 'C' +https://github.com/pytorch/pytorch/issues/103222 +--- a/c10/hip/CMakeLists.txt ++++ b/c10/hip/CMakeLists.txt +@@ -30,6 +30,7 @@ hip_add_library(c10_hip ${C10_HIP_SRCS} ${C10_HIP_HEADERS}) + + # Propagate HIP_CXX_FLAGS that were set from Dependencies.cmake + target_compile_options(c10_hip PRIVATE ${HIP_CXX_FLAGS}) ++set_target_properties(c10_hip PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS OFF) + + # caffe2_hip adds a bunch of dependencies like rocsparse, but c10/hip is supposed to be + # minimal. I'm not sure if we need hip_hcc or not; for now leave it out +--- a/caffe2/CMakeLists.txt ++++ b/caffe2/CMakeLists.txt +@@ -1598,6 +1598,7 @@ if(USE_ROCM) + + # Since PyTorch files contain HIP headers, these flags are required for the necessary definitions to be added. + target_compile_options(torch_hip PUBLIC ${HIP_CXX_FLAGS}) # experiment ++ set_target_properties(torch_hip PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS OFF) + target_link_libraries(torch_hip PUBLIC c10_hip) + + if(NOT INTERN_BUILD_MOBILE) +@@ -1774,6 +1775,7 @@ if(BUILD_TEST) + target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>) + target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE} ${Caffe2_HIP_INCLUDE}) + target_compile_options(${test_name} PRIVATE ${HIP_CXX_FLAGS}) ++ set_target_properties(${test_name} PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS OFF) + add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>) + if(INSTALL_TEST) + install(TARGETS ${test_name} DESTINATION test) +@@ -1955,6 +1957,7 @@ if(BUILD_PYTHON) + endif() + if(NOT MSVC) + target_compile_options(caffe2_pybind11_state_hip PRIVATE ${HIP_CXX_FLAGS} -fvisibility=hidden) ++ set_target_properties(caffe2_pybind11_state_hip PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS OFF) + endif() + set_target_properties(caffe2_pybind11_state_hip PROPERTIES PREFIX "") + set_target_properties(caffe2_pybind11_state_hip PROPERTIES SUFFIX ${PY_EXT_SUFFIX}) +--- a/cmake/Dependencies.cmake ++++ b/cmake/Dependencies.cmake +@@ -1287,7 +1287,6 @@ if(USE_ROCM) + list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier) + list(APPEND HIP_CXX_FLAGS -DCAFFE2_USE_MIOPEN) + list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP) +- list(APPEND HIP_CXX_FLAGS -std=c++17) + add_definitions(-DROCM_VERSION=${ROCM_VERSION_DEV_INT}) + add_definitions(-DTORCH_HIP_VERSION=${TORCH_HIP_VERSION}) + message("TORCH_HIP_VERSION=${TORCH_HIP_VERSION} is added as a compiler defines") +--- a/cmake/public/utils.cmake ++++ b/cmake/public/utils.cmake +@@ -335,6 +335,7 @@ function(caffe2_hip_binary_target target_name_or_src) + caffe2_binary_target(${target_name_or_src}) + + target_compile_options(${__target} PRIVATE ${HIP_CXX_FLAGS}) ++ set_target_properties(${__target} PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS OFF) + target_include_directories(${__target} PRIVATE ${Caffe2_HIP_INCLUDE}) + endfunction() + +--- a/modules/detectron/CMakeLists.txt ++++ b/modules/detectron/CMakeLists.txt +@@ -31,6 +31,7 @@ if(BUILD_CAFFE2_OPS) + ${Detectron_CPU_SRCS} + ${Detectron_HIP_SRCS}) + target_compile_options(caffe2_detectron_ops_hip PRIVATE ${HIP_CXX_FLAGS}) ++ set_target_properties(caffe2_detectron_ops_hip PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS OFF) + if(USE_MKLDNN) + target_link_libraries(caffe2_detectron_ops_hip PRIVATE caffe2::mkldnn) + endif() diff --git a/sci-libs/caffe2/metadata.xml b/sci-libs/caffe2/metadata.xml index 3fe84b0977fc..ed1f9fa58993 100644 --- a/sci-libs/caffe2/metadata.xml +++ b/sci-libs/caffe2/metadata.xml @@ -18,6 +18,7 @@ <flag name="opencv">Add support for image processing operators</flag> <flag name="openmp">Use OpenMP for parallel code</flag> <flag name="qnnpack">Use QNNPACK</flag> + <flag name="rocm">Enable ROCm gpu computing support</flag> <flag name="tensorpipe">Use tensorpipe</flag> <flag name="xnnpack">Use XNNPACK</flag> </use> |