numa.rar_NUMA资源-CSDN文库

共2个文件

c：1个

h：1个

版权申诉

numa

159 浏览量 2022-09-14 22:20:54 上传评论收藏 11KB RAR 举报

资源推荐

资源详情

资源评论

收起资源包目录

numa.rar （2个子文件）

numa.c 41KB

numa.h 253B

/* * numa.c * * numa: Simulate NUMA-sensitive workload and measure their NUMA performance */ #include "../perf.h" #include "../builtin.h" #include "../util/util.h" #include "../util/parse-options.h" #include "bench.h" #include <errno.h> #include <sched.h> #include <stdio.h> #include <assert.h> #include <malloc.h> #include <signal.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <pthread.h> #include <sys/mman.h> #include <sys/time.h> #include <sys/wait.h> #include <sys/prctl.h> #include <sys/types.h> #include <numa.h> #include <numaif.h> /* * Regular printout to the terminal, supressed if -q is specified: */ #define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0) /* * Debug printf: */ #define dprintf(x...) do { if (g && g->p.show_details >= 1) printf(x); } while (0) struct thread_data { int curr_cpu; cpu_set_t bind_cpumask; int bind_node; u8 *process_data; int process_nr; int thread_nr; int task_nr; unsigned int loops_done; u64 val; u64 runtime_ns; pthread_mutex_t *process_lock; }; /* Parameters set by options: */ struct params { /* Startup synchronization: */ bool serialize_startup; /* Task hierarchy: */ int nr_proc; int nr_threads; /* Working set sizes: */ const char *mb_global_str; const char *mb_proc_str; const char *mb_proc_locked_str; const char *mb_thread_str; double mb_global; double mb_proc; double mb_proc_locked; double mb_thread; /* Access patterns to the working set: */ bool data_reads; bool data_writes; bool data_backwards; bool data_zero_memset; bool data_rand_walk; u32 nr_loops; u32 nr_secs; u32 sleep_usecs; /* Working set initialization: */ bool init_zero; bool init_random; bool init_cpu0; /* Misc options: */ int show_details; int run_all; int thp; long bytes_global; long bytes_process; long bytes_process_locked; long bytes_thread; int nr_tasks; bool show_quiet; bool show_convergence; bool measure_convergence; int perturb_secs; int nr_cpus; int nr_nodes; /* Affinity options -C and -N: */ char *cpu_list_str; char *node_list_str; }; /* Global, read-writable area, accessible to all processes and threads: */ struct global_info { u8 *data; pthread_mutex_t startup_mutex; int nr_tasks_started; pthread_mutex_t startup_done_mutex; pthread_mutex_t start_work_mutex; int nr_tasks_working; pthread_mutex_t stop_work_mutex; u64 bytes_done; struct thread_data *threads; /* Convergence latency measurement: */ bool all_converged; bool stop_work; int print_once; struct params p; }; static struct global_info *g = NULL; static int parse_cpus_opt(const struct option *opt, const char *arg, int unset); static int parse_nodes_opt(const struct option *opt, const char *arg, int unset); struct params p0; static const struct option options[] = { OPT_INTEGER('p', "nr_proc" , &p0.nr_proc, "number of processes"), OPT_INTEGER('t', "nr_threads" , &p0.nr_threads, "number of threads per process"), OPT_STRING('G', "mb_global" , &p0.mb_global_str, "MB", "global memory (MBs)"), OPT_STRING('P', "mb_proc" , &p0.mb_proc_str, "MB", "process memory (MBs)"), OPT_STRING('L', "mb_proc_locked", &p0.mb_proc_locked_str,"MB", "process serialized/locked memory access (MBs), <= process_memory"), OPT_STRING('T', "mb_thread" , &p0.mb_thread_str, "MB", "thread memory (MBs)"), OPT_UINTEGER('l', "nr_loops" , &p0.nr_loops, "max number of loops to run"), OPT_UINTEGER('s', "nr_secs" , &p0.nr_secs, "max number of seconds to run"), OPT_UINTEGER('u', "usleep" , &p0.sleep_usecs, "usecs to sleep per loop iteration"), OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via writes (can be mixed with -W)"), OPT_BOOLEAN('W', "data_writes" , &p0.data_writes, "access the data via writes (can be mixed with -R)"), OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards, "access the data backwards as well"), OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"), OPT_BOOLEAN('r', "data_rand_walk", &p0.data_rand_walk, "access the data with random (32bit LFSR) walk"), OPT_BOOLEAN('z', "init_zero" , &p0.init_zero, "bzero the initial allocations"), OPT_BOOLEAN('I', "init_random" , &p0.init_random, "randomize the contents of the initial allocations"), OPT_BOOLEAN('0', "init_cpu0" , &p0.init_cpu0, "do the initial allocations on CPU#0"), OPT_INTEGER('x', "perturb_secs", &p0.perturb_secs, "perturb thread 0/0 every X secs, to test convergence stability"), OPT_INCR ('d', "show_details" , &p0.show_details, "Show details"), OPT_INCR ('a', "all" , &p0.run_all, "Run all tests in the suite"), OPT_INTEGER('H', "thp" , &p0.thp, "MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"), OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details"), OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"), OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "bzero the initial allocations"), OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"), /* Special option string parsing callbacks: */ OPT_CALLBACK('C', "cpus", NULL, "cpu[,cpu2,...cpuN]", "bind the first N tasks to these specific cpus (the rest is unbound)", parse_cpus_opt), OPT_CALLBACK('M', "memnodes", NULL, "node[,node2,...nodeN]", "bind the first N tasks to these specific memory nodes (the rest is unbound)", parse_nodes_opt), OPT_END() }; static const char * const bench_numa_usage[] = { "perf bench numa <options>", NULL }; static const char * const numa_usage[] = { "perf bench numa mem [<options>]", NULL }; static cpu_set_t bind_to_cpu(int target_cpu) { cpu_set_t orig_mask, mask; int ret; ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask); BUG_ON(ret); CPU_ZERO(&mask); if (target_cpu == -1) { int cpu; for (cpu = 0; cpu < g->p.nr_cpus; cpu++) CPU_SET(cpu, &mask); } else { BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus); CPU_SET(target_cpu, &mask); } ret = sched_setaffinity(0, sizeof(mask), &mask); BUG_ON(ret); return orig_mask; } static cpu_set_t bind_to_node(int target_node) { int cpus_per_node = g->p.nr_cpus/g->p.nr_nodes; cpu_set_t orig_mask, mask; int cpu; int ret; BUG_ON(cpus_per_node*g->p.nr_nodes != g->p.nr_cpus); BUG_ON(!cpus_per_node); ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask); BUG_ON(ret); CPU_ZERO(&mask); if (target_node == -1) { for (cpu = 0; cpu < g->p.nr_cpus; cpu++) CPU_SET(cpu, &mask); } else { int cpu_start = (target_node + 0) * cpus_per_node; int cpu_stop = (target_node + 1) * cpus_per_node; BUG_ON(cpu_stop > g->p.nr_cpus); for (cpu = cpu_start; cpu < cpu_stop; cpu++) CPU_SET(cpu, &mask); } ret = sched_setaffinity(0, sizeof(mask), &mask); BUG_ON(ret); return orig_mask; } static void bind_to_cpumask(cpu_set_t mask) { int ret; ret = sched_setaffinity(0, sizeof(mask), &mask); BUG_ON(ret); } static void mempol_restore(void) { int ret; ret = set_mempolicy(MPOL_DEFAULT, NULL, g->p.nr_nodes-1); BUG_ON(ret); } static void bind_to_memnode(int node) { unsigned long nodemask; int ret; if (node == -1) return; BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)); nodemask = 1L << node; ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8); dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret); BUG_ON(ret); } #define HPSIZE (2*1024*1024) #define set_taskname(fmt...) \ do { \ char name[20]; \ \ snprintf(name, 20, fmt); \ prctl(PR_SET_NAME, name); \ } while (0) static u8 *alloc_data(ssize_t bytes0, int map_flags, int init_zero, int init_cpu0, int thp, int init_random) { cpu_set_t orig_mask; ssize_t bytes; u8 *buf; int ret; if (!bytes0) return NULL; /* Allocate and initialize all memory on CPU#0: */ if (init_cp

评论收藏

内容反馈

版权申诉