cfq-iosched.rar_fairness资源-CSDN文库

共1个文件

c：1个

版权申诉

54 浏览量 2022-09-23 00:42:37 上传评论收藏 28KB RAR 举报

CFQ（Complete Fairness Queueing，完全公平队列调度）是一种磁盘调度算法，主要应用于Linux内核中，旨在提供一种公平的I/O资源分配机制。这个算法的主要目标是确保系统中的所有进程都能获得大致相等的磁盘访问时间，避免了单一进程独占I/O资源，从而提高了系统的整体性能和响应速度。 CFQ的工作原理是将每个请求分为多个小的I/O块，并将这些块放入独立的队列中。每个队列代表一个进程或一组具有相同优先级的进程。当有新的I/O请求到来时，CFQ会根据进程的优先级和历史I/O行为来决定哪个队列应优先服务。这种设计使得每个进程都有机会访问磁盘，减少了等待时间，尤其是对于那些低优先级或者长时间未执行I/O操作的进程。在`cfq-iosched.c`源代码文件中，我们可以看到CFQ调度器的具体实现细节。源代码通常包括以下关键部分： 1. **初始化**：在系统启动或挂载文件系统时，CFQ会被初始化。这里涉及设置默认参数，如队列的数量、最大I/O大小等。 2. **请求处理**：每当有新的I/O请求到达时，调度器需要决定如何处理。CFQ会根据请求类型（读或写）、进程优先级、当前队列状态等因素进行决策。 3. **队列管理**：CFQ维护着多个队列，每个队列对应一个进程或进程组。队列的管理涉及到如何分配新请求到适当的队列，以及何时从队列中取出请求进行服务。 4. **调度策略**：CFQ可能采用时间片轮转、优先级调度、I/O强度均衡等多种策略来确保公平性。例如，它可能会为每个进程分配一定的时间窗口，然后在这段时间内处理其I/O请求。 5. **优先级管理**：根据进程的实时性需求，CFQ可以调整进程的优先级。例如，对用户交互进程赋予更高的优先级，以确保良好的用户体验。 6. **统计和反馈**：CFQ会收集和分析每个进程的I/O行为，以便动态调整调度策略。例如，如果一个进程连续发出大量I/O请求，CFQ可能会降低它的优先级以防止其占用过多资源。 7. **性能优化**：除了公平性，CFQ还需要考虑效率。因此，源代码中可能存在针对特定场景的优化，如预读取、延迟合并等，以减少磁盘寻道时间和提高整体吞吐量。理解并深入研究`cfq-iosched.c`源代码，有助于我们更好地了解Linux内核是如何管理和调度I/O操作的，以及如何通过调整参数和策略来优化系统的性能和公平性。对于开发人员来说，这有助于他们在设计和实现自己的调度算法时，借鉴和学习CFQ的优秀设计思路。

资源推荐

资源详情

资源评论

收起资源包目录

cfq-iosched.rar （1个子文件）

cfq-iosched.c 120KB

/* * CFQ, or complete fairness queueing, disk scheduler. * * Based on ideas from a previously unfinished io * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli. * * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> */ #include <linux/module.h> #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/elevator.h> #include <linux/jiffies.h> #include <linux/rbtree.h> #include <linux/ioprio.h> #include <linux/blktrace_api.h> #include "blk.h" #include "blk-cgroup.h" /* * tunables */ /* max queue in one round of service */ static const int cfq_quantum = 8; static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; /* maximum backwards seek, in KiB */ static const int cfq_back_max = 16 * 1024; /* penalty of a backwards seek */ static const int cfq_back_penalty = 2; static const int cfq_slice_sync = HZ / 10; static int cfq_slice_async = HZ / 25; static const int cfq_slice_async_rq = 2; static int cfq_slice_idle = HZ / 125; static int cfq_group_idle = HZ / 125; static const int cfq_target_latency = HZ * 3/10; /* 300 ms */ static const int cfq_hist_divisor = 4; /* * offset from end of service tree */ #define CFQ_IDLE_DELAY (HZ / 5) /* * below this threshold, we consider thinktime immediate */ #define CFQ_MIN_TT (2) #define CFQ_SLICE_SCALE (5) #define CFQ_HW_QUEUE_MIN (5) #define CFQ_SERVICE_SHIFT 12 #define CFQQ_SEEK_THR (sector_t)(8 * 100) #define CFQQ_CLOSE_THR (sector_t)(8 * 1024) #define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32) #define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8) #define RQ_CIC(rq) icq_to_cic((rq)->elv.icq) #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elv.priv[0]) #define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elv.priv[1]) static struct kmem_cache *cfq_pool; #define CFQ_PRIO_LISTS IOPRIO_BE_NR #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) #define sample_valid(samples) ((samples) > 80) #define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node) struct cfq_ttime { unsigned long last_end_request; unsigned long ttime_total; unsigned long ttime_samples; unsigned long ttime_mean; }; /* * Most of our rbtree usage is for sorting with min extraction, so * if we cache the leftmost node we don't have to walk down the tree * to find it. Idea borrowed from Ingo Molnars CFS scheduler. We should * move this into the elevator for the rq sorting as well. */ struct cfq_rb_root { struct rb_root rb; struct rb_node *left; unsigned count; u64 min_vdisktime; struct cfq_ttime ttime; }; #define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \ .ttime = {.last_end_request = jiffies,},} /* * Per process-grouping structure */ struct cfq_queue { /* reference count */ int ref; /* various state flags, see below */ unsigned int flags; /* parent cfq_data */ struct cfq_data *cfqd; /* service_tree member */ struct rb_node rb_node; /* service_tree key */ unsigned long rb_key; /* prio tree member */ struct rb_node p_node; /* prio tree root we belong to, if any */ struct rb_root *p_root; /* sorted list of pending requests */ struct rb_root sort_list; /* if fifo isn't expired, next request to serve */ struct request *next_rq; /* requests queued in sort_list */ int queued[2]; /* currently allocated requests */ int allocated[2]; /* fifo list of requests in sort_list */ struct list_head fifo; /* time when queue got scheduled in to dispatch first request. */ unsigned long dispatch_start; unsigned int allocated_slice; unsigned int slice_dispatch; /* time when first request from queue completed and slice started. */ unsigned long slice_start; unsigned long slice_end; long slice_resid; /* pending priority requests */ int prio_pending; /* number of requests that are on the dispatch list or inside driver */ int dispatched; /* io prio of this group */ unsigned short ioprio, org_ioprio; unsigned short ioprio_class; pid_t pid; u32 seek_history; sector_t last_request_pos; struct cfq_rb_root *service_tree; struct cfq_queue *new_cfqq; struct cfq_group *cfqg; /* Number of sectors dispatched from queue in single dispatch round */ unsigned long nr_sectors; }; /* * First index in the service_trees. * IDLE is handled separately, so it has negative index */ enum wl_class_t { BE_WORKLOAD = 0, RT_WORKLOAD = 1, IDLE_WORKLOAD = 2, CFQ_PRIO_NR, }; /* * Second index in the service_trees. */ enum wl_type_t { ASYNC_WORKLOAD = 0, SYNC_NOIDLE_WORKLOAD = 1, SYNC_WORKLOAD = 2 }; struct cfqg_stats { #ifdef CONFIG_CFQ_GROUP_IOSCHED /* total bytes transferred */ struct blkg_rwstat service_bytes; /* total IOs serviced, post merge */ struct blkg_rwstat serviced; /* number of ios merged */ struct blkg_rwstat merged; /* total time spent on device in ns, may not be accurate w/ queueing */ struct blkg_rwstat service_time; /* total time spent waiting in scheduler queue in ns */ struct blkg_rwstat wait_time; /* number of IOs queued up */ struct blkg_rwstat queued; /* total sectors transferred */ struct blkg_stat sectors; /* total disk time and nr sectors dispatched by this group */ struct blkg_stat time; #ifdef CONFIG_DEBUG_BLK_CGROUP /* time not charged to this cgroup */ struct blkg_stat unaccounted_time; /* sum of number of ios queued across all samples */ struct blkg_stat avg_queue_size_sum; /* count of samples taken for average */ struct blkg_stat avg_queue_size_samples; /* how many times this group has been removed from service tree */ struct blkg_stat dequeue; /* total time spent waiting for it to be assigned a timeslice. */ struct blkg_stat group_wait_time; /* time spent idling for this blkcg_gq */ struct blkg_stat idle_time; /* total time with empty current active q with other requests queued */ struct blkg_stat empty_time; /* fields after this shouldn't be cleared on stat reset */ uint64_t start_group_wait_time; uint64_t start_idle_time; uint64_t start_empty_time; uint16_t flags; #endif /* CONFIG_DEBUG_BLK_CGROUP */ #endif /* CONFIG_CFQ_GROUP_IOSCHED */ }; /* This is per cgroup per device grouping structure */ struct cfq_group { /* must be the first member */ struct blkg_policy_data pd; /* group service_tree member */ struct rb_node rb_node; /* group service_tree key */ u64 vdisktime; /* * The number of active cfqgs and sum of their weights under this * cfqg. This covers this cfqg's leaf_weight and all children's * weights, but does not cover weights of further descendants. * * If a cfqg is on the service tree, it's active. An active cfqg * also activates its parent and contributes to the children_weight * of the parent. */ int nr_active; unsigned int children_weight; /* * vfraction is the fraction of vdisktime that the tasks in this * cfqg are entitled to. This is determined by compounding the * ratios walking up from this cfqg to the root. * * It is in fixed point w/ CFQ_SERVICE_SHIFT and the sum of all * vfractions on a service tree is approximately 1. The sum may * deviate a bit due to rounding errors and fluctuations caused by * cfqgs entering and leaving the service tree. */ unsigned int vfraction; /* * There are two weights - (internal) weight is the weight of this * cfqg against the sibling cfqgs. leaf_weight is the wight of * this cfqg against the child cfqgs. For the root cfqg, both * weights are kept in sync for backward compatibility. */ unsigned int weight; unsigned int new_weight; unsigned int dev_weight; unsigned int leaf_weight; unsigned int new_leaf_weight; unsigned int dev_leaf_weight; /* number of cfqq currently on this group */ int nr_cfqq; /* * Per group busy queues average. Useful for workload slice calc. We * create the array for each prio class but at run time it is used * only for RT and BE class and slot for IDLE class remains unused. * This is primarily done to a

评论收藏

内容反馈

版权申诉