#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/time.h>
#include <unistd.h>
#include <iostream>
#include <pthread.h>
#include <time.h>
#include <sstream>
#include <fstream>
#include <string.h>
using namespace std;
#define SIZE ((1024*1024*512) / (sizeof (double)))
#define mats 10000
int NUM, FUN;
u_int64_t BLOCK;
// Huge Matrix add to flush memory
void matadd() {
long double *a[mats], *b[mats], *c[mats];
for (volatile u_int32_t i = 0; i < mats; i++) {
a[i] = (long double *) malloc(sizeof (long double) *mats);
b[i] = (long double *) malloc(sizeof (long double) *mats);
c[i] = (long double *) malloc(sizeof (long double) *mats);
}
srand((unsigned int) time(0));
for (volatile u_int32_t i = 0; i < mats; i++) {
for (volatile u_int32_t j = 0; j < mats; j++) {
a[i][j] = rand() / (RAND_MAX / 11);
b[i][j] = rand() / (RAND_MAX / 11);
}
}
for (volatile u_int32_t i = 0; i < mats; i++) {
for (volatile u_int32_t j = 0; j < mats; j++) {
c[i][j] += a[i][j] + b[j][j];
}
}
cout << "matadd done\n";
}
static inline u_int64_t
rdtsc() {
u_int64_t d;
// Instruction is volatile because we don't want it to move
// over an adjacent gettimeofday. That would ruin the timing
// calibrations.
__asm__ __volatile__("rdtsc" : "=&A" (d));
return d;
}
// Compute 64 bit value / timeval (treated as a real).
static inline u_int64_t
operator /(u_int64_t v, timeval t) {
return u_int64_t(v / (double (t.tv_sec) + double (t.tv_usec) / 1000000.));
}
// Compute left - right for timeval structures.
static inline timeval
operator -(const timeval &left, const timeval &right) {
double left_us = (double) left.tv_sec * 1000000 + left.tv_usec;
double right_us = (double) right.tv_sec * 1000000 + right.tv_usec;
double diff_us = left_us - right_us;
timeval r = {diff_us / 1000000, (u_int64_t) diff_us % 1000000};
return r;
}
// Thread arguments - Block size, Initial array for read etc.
typedef struct thd_memargs {
int tid;
int dsize;
int *bl, n, max;
double *a, *b;
struct timeval start1, end1;
struct timeval start2, end2;
void init() {
a = (double *) malloc(SIZE * sizeof (double));
b = (double *) malloc(SIZE * sizeof (double));
n = SIZE / BLOCK;
max = SIZE - BLOCK;
bl = (int *) malloc(sizeof (int) * SIZE);
srand((unsigned int) time(0));
for (volatile u_int64_t i = 0; i < SIZE; i++) {
a[i] = 5.5;
}
a[SIZE - 1] = 10.0;
for (volatile u_int64_t i = 0; i < SIZE; i = i + BLOCK) {
bl[i] = int(rand() % max); // Random nos. for random read/write
}
}
} tmem;
//Sequential Read
void *seqread(void *arg) {
tmem *td;
td = (tmem *) arg;
srand((unsigned int) time(0));
double *a, *b;
a = td->a;
gettimeofday(&td->start2, 0);
for (volatile u_int64_t i = 0; i < SIZE; i = i + BLOCK) {
b = (double *) memchr(a + i, 10.0, BLOCK); //Memory Compare
}
gettimeofday(&td->end2, 0);
}
//Sequential Write
void *seqwrite(void *arg) {
tmem *td;
td = (tmem *) arg;
srand((unsigned int) time(0));
double *a, *b;
a = td->a;
b = td->b;
gettimeofday(&td->start2, 0);
for (volatile u_int64_t i = 0; i < SIZE; i = i + BLOCK) {
memcpy(b + i, a + i, BLOCK); // Memory Copy
}
gettimeofday(&td->end2, 0);
}
//Random Read
void *randread(void *arg) {
tmem *td;
td = (tmem *) arg;
srand((unsigned int) time(0));
double *a, *b;
a = td->a;
b = td->b;
int *bl;
bl = td->bl;
gettimeofday(&td->start2, 0);
for (volatile u_int64_t i = 0; i < SIZE; i = i + BLOCK) {
b = (double *) memchr(&a[bl[i]], 10.0, BLOCK); // Memory Compare
}
gettimeofday(&td->end2, 0);
}
//Random Write
void *randwrite(void *arg) {
tmem *td;
td = (tmem *) arg;
srand((unsigned int) time(0));
double *a, *b;
a = td->a;
b = td->b;
int *bl;
bl = td->bl;
gettimeofday(&td->start2, 0);
for (volatile u_int64_t i = 0; i < SIZE; i = i + BLOCK) {
memcpy(&b[bl[i]], &a[bl[i]], BLOCK); //Memory Copy
}
gettimeofday(&td->end2, 0);
}
//Set the user defined parameters(no. of threads, Read/Write)
void set(char *argv[]) {
NUM = atoi(argv[1]);
BLOCK = u_int64_t(atoi(argv[2]));
FUN = atoi(argv[3]);
}
int main(int argc, char* argv[]) {
int i, rc;
if (argc < 4) {
cout << "Please enter all the required parameters\n";
exit(0);
}
set(argv);
pthread_t threads[NUM];
tmem td[NUM];
double speed;
typedef void *(*func)(void *);
func farr[4] = {NULL};
farr[0] = &seqread;
farr[1] = &seqwrite;
farr[2] = &randread;
farr[3] = &randwrite;
for (int i = 0; i < NUM; i++) {
td[i].tid = i;
td[i].init();
}
timeval dif;
// Thread Creation
for (i = 0; i < NUM; ++i) {
printf("In main: creating thread %d\n", i);
rc = pthread_create(&threads[i], NULL, farr[FUN], (void *) &td[i]);
}
//Thread join
for (i = 0; i < NUM; ++i) {
rc = pthread_join(threads[i], NULL);
dif = (td[i].end2 - td[i].start2);
speed = (sizeof (double) *SIZE) / dif;
cout << speed << endl;
}
gettimeofday(&td->start1, 0);
for (volatile u_int64_t i = 0; i < SIZE; i = i + 1) {
}
gettimeofday(&td->end1, 0);
// Time for empty loop
u_int64_t emps, empe, empt, tot;
emps = (u_int64_t) td->start1.tv_sec * 1000000 + td->start1.tv_usec;
empe = (u_int64_t) td->end1.tv_sec * 1000000 + td->end1.tv_usec;
empt = empe - emps;
tot = NUM * empt;
//Calculate the start time of first thread and end time of last thread
double dst[NUM], det[NUM], fst = 0, fet = 0;
for (i = 0; i < NUM; i++) {
dst[i] = (double) td[i].start2.tv_sec * 1000000 + td[i].start2.tv_usec;
det[i] = (double) td[i].end2.tv_sec * 1000000 + td[i].end2.tv_usec;
}
fst = det[NUM - 1];
for (i = 0; i < NUM; i++) {
if (det[i] > fet) {
fet = det[i];
}
if (fst > dst[i]) {
fst = dst[i];
}
}
speed = (NUM * sizeof (double) * SIZE) / (fet - fst);
cout << "Throughput(in MBps): " << speed << endl; // Result
// Write results to file
stringstream name;
string fname;
name << "MEM_" << FUN << "_" << BLOCK << "_" << NUM << ".txt";
fname = name.str();
ofstream myfile;
myfile.open(fname.c_str(), ios::app);
if (myfile.is_open()) {
myfile << speed << endl;
myfile.close();
}
// Huge Matrix addition to flush memory
matadd();
return 0;
}