Linuxtiny-cuda-nn直接安装_tinycudann资源-CSDN文库

共2000个文件

html：1278个

h：545个

js：76个

linux

需积分: 5 83 浏览量 2023-11-19 20:29:54 上传评论 3 收藏 160.27MB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

Linuxtiny-cuda-nn直接安装（2000个子文件）

problem_space.cpp 38KB

cudnn_helpers.cpp 17KB

performance_report.cpp 14KB

enumerated_types.cpp 8KB

visualize_layout.cpp 6KB

gpu_timer.cpp 4KB

manifest.cpp 4KB

main.cpp 2KB

stbi_wrapper.cpp 1KB

doxygen.css 27KB

matrix.h 369KB

stb_image.h 281KB

mma_tensor_op_tile_iterator.h 137KB

default_mma_core_sm80.h 103KB

mma_tensor_op_tile_iterator_sm70.h 100KB

mma_complex_tensor_op_tile_iterator_sm80.h 79KB

mma_tensor_op_tile_iterator_sm80.h 76KB

stb_image_write.h 71KB

predicated_tile_access_iterator.h 70KB

default_multistage_mma_complex_core_sm80.h 64KB

common_device.h 63KB

predicated_tile_iterator.h 62KB

functional.h 62KB

mma_simt_tile_iterator.h 60KB

default_mma_core_simt.h 58KB

default_conv2d_fprop.h 57KB

mma_sm80.h 55KB

default_conv2d_dgrad.h 54KB

convolution.h 48KB

regular_tile_access_iterator_tensor_op_sm80.h 48KB

constants.h 48KB

tensor_fill.h 47KB

numeric_conversion.h 46KB

regular_tile_iterator_tensor_op_sm70.h 44KB

grid.h 44KB

vec.h 43KB

mma_sparse_sm80.h 43KB

gemm_operation.h 43KB

default_mma_core_sm75.h 43KB

tensor_fill.h 42KB

library.h 39KB

predicated_tile_iterator.h 37KB

regular_tile_iterator_tensor_op.h 36KB

gemm_with_softmax.h 35KB

matrix.h 35KB

default_gemm.h 34KB

default_mma.h 34KB

shampoo.h 34KB

b2b_mma_multistage.h 34KB

b2b_mma_multistage_smem_accumulator.h 33KB

tensor_op_multiplicand_sm75.h 33KB

pitch_linear_thread_map.h 33KB

default_mma_core_sparse_sm80.h 32KB

b2b_implicit_gemm_multistage.h 32KB

predicated_tile_access_iterator_triangular_matrix.h 32KB

b2b_implicit_gemm_multistage_smem_accumulator.h 32KB

mma_sm60.h 30KB

mma_sm75.h 30KB

conv2d_params.h 30KB

tensor_op_multiplicand_sm70.h 30KB

implicit_gemm_fprop_fusion_multistage.h 30KB

tensor_op_multiplicand_sm80.h 30KB

epilogue_with_reduction.h 29KB

default_conv2d_wgrad.h 29KB

epilogue_with_broadcast.h 29KB

default_b2b_conv2d_fprop_smem_accumulator_sm75.h 29KB

convolution.h 29KB

predicated_tile_access_iterator_2dthreadtile.h 28KB

b2b_interleaved_conv2d_run.h 28KB

predicated_tile_iterator_triangular_matrix.h 28KB

default_b2b_conv2d_fprop_smem_accumulator_sm80.h 28KB

mma_complex_tensor_op.h 28KB

problem_space.h 28KB

regular_tile_access_iterator_tensor_op.h 28KB

mma_blas3_multistage.h 28KB

default_b2b_mma_smem_accumulator.h 27KB

predicated_tile_iterator_2dthreadtile.h 27KB

mma_tensor_op_tile_iterator_wmma.h 27KB

default_b2b_mma.h 27KB

default_b2b_conv2d_fprop_sm80.h 27KB

default_b2b_conv2d_fprop_sm75.h 27KB

trmm.h 27KB

implicit_gemm_wgrad_fusion_multistage.h 26KB

b2b_conv2d_run.h 26KB

conv2d_dgrad_output_gradient_tile_access_iterator_optimized.h 26KB

platform.h 26KB

mma_sparse_multistage.h 26KB

default_multistage_trmm_complex.h 25KB

gemm_with_fused_epilogue.h 25KB

gemm.h 25KB

b2b_interleaved_gemm_run.h 25KB

fast_math.h 25KB

rank_2k_universal.h 24KB

gemm_with_k_reduction.h 24KB

gpu_memory.h 24KB

half.h 24KB

symm_universal.h 24KB

gemm_universal.h 24KB

gemm_planar_complex.h 24KB

linear_combination_clamp.h 24KB

共 2000 条

/*************************************************************************************************** * Copyright (c) 2017 - 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: BSD-3-Clause * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************************************/ /* \file \brief Matrix classes with value semantics. */ #pragma once #if !defined(__CUDACC_RTC__) #include <iosfwd> #include <cmath> #endif #include "cutlass/cutlass.h" #include "cutlass/array.h" #include "cutlass/coord.h" #include "cutlass/fast_math.h" #include "cutlass/layout/matrix.h" namespace cutlass { ///////////////////////////////////////////////////////////////////////////////////////////////// /// Primary template with partial specializations to follow template <typename Element, int Rows, int Columns> struct Matrix; ///////////////////////////////////////////////////////////////////////////////////////////////// /// 1-by-2 matrix template class definition template <typename Element_> struct Matrix<Element_, 1, 2> { // // Type definitions // /// Element data type using Element = Element_; /// Number of rows in matrix static int const kRows = 1; /// Number of columns in matrix static int const kColumns = 2; /// Layout of matrix in underlying array using Layout = layout::RowMajor; /// Number of elements in matrix static int const kCount = 2; // // Data members // /// Elements of the matrix in row-major layout Array<Element, kCount> data; // // Methods // /// Constructs a zero matrix CUTLASS_HOST_DEVICE Matrix() { data.clear(); } /// Copy constructor for a 1-by-2 matrix CUTLASS_HOST_DEVICE Matrix(Matrix const &rhs) { data = rhs.data; } /// Constucts a 1-by-2 matrix from scalar elements CUTLASS_HOST_DEVICE Matrix( Element _0_0, Element _0_1 ) { data[0] = _0_0; data[1] = _0_1; } /// Constructs a matrix from a uniform element CUTLASS_HOST_DEVICE static Matrix uniform(Element s) { Matrix m; m.data[0] = s; m.data[1] = s; return m; } /// Constructs a matrix from a uniform element 1 CUTLASS_HOST_DEVICE static Matrix ones() { return uniform(Element(1)); } /// Constructs a matrix from a uniform element 0 CUTLASS_HOST_DEVICE static Matrix zero() { return Matrix(); } /// Returns a transposed matrix CUTLASS_HOST_DEVICE Matrix<Element, 2, 1> transpose() const { Matrix<Element, 2, 1> mt; mt.data[0] = data[0]; mt.data[1] = data[1]; return mt; } /// Accesses an element by coordinate CUTLASS_HOST_DEVICE Element at(int i, int j) const { return data[i * 1 + j]; } /// Accesses an element by coordinate CUTLASS_HOST_DEVICE Element & at(int i, int j) { return data[i * 1 + j]; } /// Accesses an element by coordinate CUTLASS_HOST_DEVICE Element at(Coord<2> const &coord) const { return at(coord[0], coord[1]); } /// Accesses an element by coordinate CUTLASS_HOST_DEVICE Element & at(Coord<2> const &coord) { return at(coord[0], coord[1]); } /// Accesses an element by offset CUTLASS_HOST_DEVICE Element &at(int offset) { return data[offset]; } /// Accesses an element by offset CUTLASS_HOST_DEVICE Element at(int offset) const { return data[offset]; } /// Accesses an element by coordinate CUTLASS_HOST_DEVICE Element operator[](Coord<2> const &coord) const { return at(coord[0], coord[1]); } /// Accesses an element by coordinate CUTLASS_HOST_DEVICE Element & operator[](Coord<2> const &coord) { return at(coord[0], coord[1]); } /// Accesses an element by offset CUTLASS_HOST_DEVICE Element & operator[](int offset) { return data[offset]; } /// Accesses an element by offset CUTLASS_HOST_DEVICE Element operator[](int offset) const { return data[offset]; } /// Gets a submatrix with optional offset CUTLASS_HOST_DEVICE Matrix<Element, 1, 2> slice_1x2(int i = 0, int j = 0) const { Matrix<Element, 1, 2> m; m.data[0] = data[i * 2 + j + 0]; m.data[1] = data[i * 2 + j + 1]; return m; } /// Overwrites a submatrix with optional offset CUTLASS_HOST_DEVICE Matrix & set_slice_1x2(Matrix<Element, 1, 2> const &m, int i = 0, int j = 0) { data[i * 2 + j + 0] = m.data[0]; data[i * 2 + j + 1] = m.data[1]; return *this; } CUTLASS_HOST_DEVICE Matrix<Element, 1, 2> row(int i) const { return slice_1x2(i, 0); } CUTLASS_HOST_DEVICE Matrix &set_row(Matrix<Element, 1, 2> const &v, int i = 0) { return set_slice_1x2(v, i, 0); } /// Forms a 1-by-2 matrix by horizontally concatenating an Element with an Element CUTLASS_HOST_DEVICE static Matrix hcat(Element lhs, Element rhs) { return Matrix( lhs, rhs); } /// Concatenates this matrix with a an Element to form a 1-by-3 matrix CUTLASS_HOST_DEVICE Matrix<Element, 1, 3> hcat(Element rhs) const { return Matrix<Element, 1, 3>::hcat(*this, rhs); } /// Concatenates this matrix with a a 1-by-2 matrix to form a 1-by-4 matrix CUTLASS_HOST_DEVICE Matrix<Element, 1, 4> hcat(Matrix<Element, 1, 2> const & rhs) const { return Matrix<Element, 1, 4>::hcat(*this, rhs); } /// Concatenates this matrix with a a 1-by-2 matrix to form a 2-by-2 matrix CUTLASS_HOST_DEVICE Matrix<Element, 2, 2> vcat(Matrix<Element, 1, 2> const & rhs) const { return Matrix<Element, 2, 2>::vcat(*this, rhs); } /// Concatenates this matrix with a a 2-by-2 matrix to form a 3-by-2 matrix CUTLASS_HOST_DEVICE Matrix<Element, 3, 2> vcat(Matrix<Element, 2, 2> const & rhs) const { return Matrix<Element, 3, 2>::vcat(*this, rhs); } /// Concatenates this matrix with a a 3-by-2 matrix to form a 4-by-2 matrix CUTLASS_HOST_DEVICE Matrix<Element, 4, 2> vcat(Matrix<Element, 3, 2> const & rhs) const { return Matrix<Element, 4, 2>::vcat(*this, rhs); } /// Elementwise add operator (1-by-2) CUTLASS_HOST_DEVICE Matrix add(Matrix const &rhs) const { Matrix result; result.data[0] = data[0] + rhs.data[0]; resu

评论收藏

内容反馈