JavaCPP Presets for CUDA
========================
Introduction
------------
This directory contains the JavaCPP Presets module for:
* CUDA 7.5 https://developer.nvidia.com/cuda-zone
Please refer to the parent README.md file for more detailed information about the JavaCPP Presets.
Documentation
-------------
Java API documentation is available here:
* http://bytedeco.org/javacpp-presets/cuda/apidocs/
∗ We can also [use Thrust with JavaCPP](https://github.com/bytedeco/javacpp/wiki/Interface-Thrust-and-CUDA).
Sample Usage
------------
Here is a simple example of cuDNN ported to Java from the `mnistCUDNN.cpp` sample file included in `cudnn-sample-v2.tgz` available at:
* https://developer.nvidia.com/cudnn
We can use [Maven 3](http://maven.apache.org/) to download and install automatically all the class files as well as the native binaries. To run this sample code, after creating the `pom.xml` and `src/main/java/MNISTCUDNN.java` source files below, simply execute on the command line:
```bash
$ mvn compile exec:java
```
### The `pom.xml` build file
```xml
<project>
<modelVersion>4.0.0</modelVersion>
<groupId>org.bytedeco.javacpp-presets.cuda</groupId>
<artifactId>mnistcudnn</artifactId>
<version>1.1</version>
<properties>
<exec.mainClass>MNISTCUDNN</exec.mainClass>
</properties>
<dependencies>
<dependency>
<groupId>org.bytedeco.javacpp-presets</groupId>
<artifactId>cuda</artifactId>
<version>7.5-1.1</version>
</dependency>
</dependencies>
</project>
```
### The `src/main/java/MNISTCUDNN.java` source file
```java
/**
* Copyright 2014 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
/*
* This example demonstrates how to use CUDNN library to implement forward
* pass. The sample loads weights and biases from trained network,
* takes a few images of digits and recognizes them. The network was trained on
* the MNIST dataset using Caffe. The network consists of two
* convolution layers, two pooling layers, one relu and two
* fully connected layers. Final layer gets processed by Softmax.
* cublasSgemv is used to implement fully connected layers.
*/
import java.io.*;
import org.bytedeco.javacpp.*;
import static org.bytedeco.javacpp.cublas.*;
import static org.bytedeco.javacpp.cuda.*;
import static org.bytedeco.javacpp.cudnn.*;
public class MNISTCUDNN {
static final int IMAGE_H = 28;
static final int IMAGE_W = 28;
static final String first_image = "one_28x28.pgm";
static final String second_image = "three_28x28.pgm";
static final String third_image = "five_28x28.pgm";
static final String conv1_bin = "conv1.bin";
static final String conv1_bias_bin = "conv1.bias.bin";
static final String conv2_bin = "conv2.bin";
static final String conv2_bias_bin = "conv2.bias.bin";
static final String ip1_bin = "ip1.bin";
static final String ip1_bias_bin = "ip1.bias.bin";
static final String ip2_bin = "ip2.bin";
static final String ip2_bias_bin = "ip2.bias.bin";
/********************************************************
* Prints the error message, and exits
* ******************************************************/
static final int EXIT_FAILURE = 1;
static final int EXIT_SUCCESS = 0;
static final int EXIT_WAIVED = 0;
static void FatalError(String s) {
System.err.println(s);
Thread.dumpStack();
System.err.println("Aborting...");
cudaDeviceReset();
System.exit(EXIT_FAILURE);
}
static void checkCUDNN(int status) {
if (status != CUDNN_STATUS_SUCCESS) {
FatalError("CUDNN failure: " + status);
}
}
static void checkCudaErrors(int status) {
if (status != 0) {
FatalError("Cuda failure: " + status);
}
}
static String get_path(String fname, String pname) {
return "data/" + fname;
}
static class Layer_t {
int inputs = 0;
int outputs = 0;
// linear dimension (i.e. size is kernel_dim * kernel_dim)
int kernel_dim = 0;
FloatPointer[] data_h = new FloatPointer[1], data_d = new FloatPointer[1];
FloatPointer[] bias_h = new FloatPointer[1], bias_d = new FloatPointer[1];
Layer_t(int _inputs, int _outputs, int _kernel_dim, String fname_weights,
String fname_bias, String pname) {
inputs = _inputs; outputs = _outputs; kernel_dim = _kernel_dim;
String weights_path, bias_path;
if (pname != null) {
weights_path = get_path(fname_weights, pname);
bias_path = get_path(fname_bias, pname);
} else {
weights_path = fname_weights; bias_path = fname_bias;
}
readBinaryFile(weights_path, inputs * outputs * kernel_dim * kernel_dim,
data_h, data_d);
readBinaryFile(bias_path, outputs, bias_h, bias_d);
}
public void release() {
checkCudaErrors( cudaFree(data_d[0]) );
}
private void readBinaryFile(String fname, int size, FloatPointer[] data_h, FloatPointer[] data_d) {
try {
FileInputStream stream = new FileInputStream(fname);
int size_b = size*Float.BYTES;
byte[] data = new byte[size_b];
if (stream.read(data) < size_b) {
FatalError("Error reading file " + fname);
}
stream.close();
data_h[0] = new FloatPointer(new BytePointer(data));
data_d[0] = new FloatPointer();
checkCudaErrors( cudaMalloc(data_d[0], size_b) );
checkCudaErrors( cudaMemcpy(data_d[0], data_h[0],
size_b,
cudaMemcpyHostToDevice) );
} catch (IOException e) {
FatalError("Error opening file " + fname);
}
}
}
static void printDeviceVector(int size, FloatPointer vec_d) {
FloatPointer vec = new FloatPointer(size);
cudaDeviceSynchronize();
cudaMemcpy(vec, vec_d, size*Float.BYTES, cudaMemcpyDeviceToHost);
for (int i = 0; i < size; i++) {
System.out.print(vec.get(i) + " ");
}
System.out.println();
}
static class network_t {
int dataType = CUDNN_DATA_FLOAT;
int tensorFormat = CUDNN_TENSOR_NCHW;
cudnnContext cudnnHandle = new cudnnContext();
cudnnTensorStruct srcTensorDesc = new cudnnTensorStruct(),
dstTensorDesc = new cudnnTensorStruct(),
biasTensorDesc = new cudnnTensorStruct();
cudnnFilterStruct filterDesc = new cudnnFilterStruct();
cudnnConvolutionStruct convDesc = new cudnnConvolutionStruct();
cudnnPoolingStruct poolingDesc = new cudnnPoolingStruct();
cublasContext cublasHandle = new cublasContext();
void createHandles() {
checkCUDNN( cudnnCreate(cudnnHandle) );
checkCUDNN( cudnnCreateTensorDescriptor(srcTensorDesc) );
checkCUDNN( cudnnCreateTensorDescriptor(dstTensorDesc) );
checkCUDNN( cudnnCreateTensorDescriptor(biasTensorDesc) );
checkCUDNN( cudnnCreateFilterDescriptor(filterDesc) );
checkCUDNN( cudnnCreateConvolutionDescriptor(convDesc) );
checkCUDNN( cudnnCreatePoolingDescriptor(poolingDesc) );
checkCudaErrors( cublasCreate_v2(cublasHandle) );
}
void destroyHandles() {
checkCUDNN( cudnn
评论0