窗体顶端
nohz下的timer机制和进程调度
在2.6.21内核之前,时钟中断是周期的,即以HZ为频率,系统总是被动的接受时钟中断,然后运行中断处理程序。如果实在没有任务可以运行,那 么就执行idle,这也许也算一种创意,可 是时钟中断还是会周期性的打破idle,然后查询有没有需要做的事情,如果没有继续idle
以往的进程在特定的固定时间片内运行,时钟的定时中断提供了时间片的监督工作,一切显得十分和谐,可是系统内核本身就是没有主权,一切都在硬件的安排下进行。
随后的2.6.22以后,nohz才出现,nohz其实就是动态设置下一次的中断时间而不是使用系统无条件的默认的HZ中断。
这样cfs调度器,再也不用受制于底层的时钟以及时间片分配特性,linux可以动态设置时间片长短,按照自己的方式来进行调度。
以往的进程在特定的固定时间片内运行,时钟的定时中断提供了时间片的监督工作,一切显得十分和谐,可是系统内核本身就是没有主权,一切都在硬件的安排下进行。
随后的2.6.22以后,nohz才出现,nohz其实就是动态设置下一次的中断时间而不是使用系统无条件的默认的HZ中断。
这样cfs调度器,再也不用受制于底层的时钟以及时间片分配特性,linux可以动态设置时间片长短,按照自己的方式来进行调度。
nohz其实就是托了抽象出来的clocksource和 clock_event_device的福,clocksource和 clock_event_device,这两个结构体就是时钟以及时钟行为的抽象。
先熟悉两个数据结构
struct timer_list :软件时钟,记录了软件时钟的到期时间以及到期后要执行的操作。
struct tvec_base :用于组织、管理软件时钟的结构。在 SMP 系统中,每个 CPU 有一个。
struct timer_list {
struct list_head entry; //所在的链表
unsigned long expires; //到期时间,以 tick 为单位
void (*function)(unsigned long); //回调函数,到期后执行的操作
unsigned long data; //回调函数的参数
struct tvec_t_base_s *base; //记录该软件时钟所在的 struct tvec_base 变量
#ifdef CONFIG_TIMER_STATS
void *start_site;
char start_comm[16];
struct timer_list :软件时钟,记录了软件时钟的到期时间以及到期后要执行的操作。
struct tvec_base :用于组织、管理软件时钟的结构。在 SMP 系统中,每个 CPU 有一个。
struct timer_list {
struct list_head entry; //所在的链表
unsigned long expires; //到期时间,以 tick 为单位
void (*function)(unsigned long); //回调函数,到期后执行的操作
unsigned long data; //回调函数的参数
struct tvec_t_base_s *base; //记录该软件时钟所在的 struct tvec_base 变量
#ifdef CONFIG_TIMER_STATS
void *start_site;
char start_comm[16];
int start_pid;
#endif
};
#endif
};
struct tvec_t_base_s {
spinlock_t lock;
struct timer_list *running_timer; //正在处理的软件时钟
unsigned long timer_jiffies; //当前正在处理的软件时钟到期时间
tvec_root_t tv1; //保存了到期时间从 timer_jiffies 到 timer_jiffies + 2的8次方 之间(包括边缘值)的所有软件时钟
tvec_t tv2; //保存了到期时间从 timer_jiffies + 2的8次方 到 timer_jiffies + 2的14次方 之间(包括边缘值)的 所有软件时钟
tvec_t tv3; //14~linux下的sleep函数20
tvec_t tv4; //20~26
tvec_t tv5; //26~32
} ____cacheline_aligned;
spinlock_t lock;
struct timer_list *running_timer; //正在处理的软件时钟
unsigned long timer_jiffies; //当前正在处理的软件时钟到期时间
tvec_root_t tv1; //保存了到期时间从 timer_jiffies 到 timer_jiffies + 2的8次方 之间(包括边缘值)的所有软件时钟
tvec_t tv2; //保存了到期时间从 timer_jiffies + 2的8次方 到 timer_jiffies + 2的14次方 之间(包括边缘值)的 所有软件时钟
tvec_t tv3; //14~linux下的sleep函数20
tvec_t tv4; //20~26
tvec_t tv5; //26~32
} ____cacheline_aligned;
typedef struct tvec_t_base_s tvec_base_t;
//下面开始跟踪内核中timer的代码,内核版本2.6.24
/*
* This function runs timers and the timer-tq in bottom half context.
*/
static void run_timer_softirq(struct softirq_action *h) //定时器中断下半部
{
tvec_base_t *base = __get_cpu_var(tvec_bases); //取得CPU的tvec_base_t结构数据
/*
* This function runs timers and the timer-tq in bottom half context.
*/
static void run_timer_softirq(struct softirq_action *h) //定时器中断下半部
{
tvec_base_t *base = __get_cpu_var(tvec_bases); //取得CPU的tvec_base_t结构数据
hrtimer_run_queues(); //这里有机会切换到nohz或者hres
if (time_after_eq(jiffies, base->timer_jiffies)) //如果当前jiffies >= 定时器到期base->timer_jiffies
__run_timers(base); //运行定时器回调函数
}
__run_timers(base); //运行定时器回调函数
}
/*
* Called from timer softirq every jiffy, expire hrtimers:
*
* For HRT its the fall back code to run the softirq in the timer
* softirq context in case the hrtimer initialization failed or has
* not been done yet.
*/
void hrtimer_run_queues(void)
{
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
int i;
* Called from timer softirq every jiffy, expire hrtimers:
*
* For HRT its the fall back code to run the softirq in the timer
* softirq context in case the hrtimer initialization failed or has
* not been done yet.
*/
void hrtimer_run_queues(void)
{
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
int i;
if (hrtimer_hres_active())
return;
return;
/*
* This _is_ ugly: We have to check in the softirq context,
* whether we can switch to highres and / or nohz mode. The
* clocksource switch happens in the timer interrupt with
* xtime_lock held. Notification from there only sets the
* check bit in the tick_oneshot code, otherwise we might
* deadlock vs. xtime_lock.
*/
if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) //这个if判断就是具体切换到hres或者nohz的代码
if (hrtimer_switch_to_hres())
return;
* This _is_ ugly: We have to check in the softirq context,
* whether we can switch to highres and / or nohz mode. The
* clocksource switch happens in the timer interrupt with
* xtime_lock held. Notification from there only sets the
* check bit in the tick_oneshot code, otherwise we might
* deadlock vs. xtime_lock.
*/
if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) //这个if判断就是具体切换到hres或者nohz的代码
if (hrtimer_switch_to_hres())
return;
hrtimer_get_softirq_time(cpu_base);
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
run_hrtimer_queue(cpu_base, i);
}
}
/**
* Check, if a change happened, which makes oneshot possible.
*
* Called cyclic from the hrtimer softirq (driven by the timer
* softirq) allow_nohz signals, that we can switch into low-res nohz
* mode, because high resolution timers are disabled (either compile
* or runtime).
*/
int tick_check_oneshot_change(int allow_nohz)
{
struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
* Check, if a change happened, which makes oneshot possible.
*
* Called cyclic from the hrtimer softirq (driven by the timer
* softirq) allow_nohz signals, that we can switch into low-res nohz
* mode, because high resolution timers are disabled (either compile
* or runtime).
*/
int tick_check_oneshot_change(int allow_nohz)
{
struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
if (!test_and_clear_bit(0, &ts->check_clocks))
return 0;
return 0;
if (ts->nohz_mode != NOHZ_MODE_INACTIVE)
return 0;
return 0;
if (!timekeeping_is_continuous() || !tick_is_oneshot_available())
return 0;
return 0;
if (!allow_nohz)
return 1;
return 1;
tick_nohz_switch_to_nohz(); //如果满足调节,切换到nohz
return 0;
}
return 0;
}
/**
* tick_nohz_switch_to_nohz - switch to nohz mode
*/
static void tick_nohz_switch_to_nohz(void)
* tick_nohz_switch_to_nohz - switch to nohz mode
*/
static void tick_nohz_switch_to_nohz(void)
{
struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
ktime_t next;
struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
ktime_t next;
if (!tick_nohz_enabled)
return;
return;
local_irq_disable();
if (tick_switch_to_oneshot(tick_nohz_handler)) { //timer改成oneshot模式(一次性定时器),同时指定回调函数tick_nohz_handler
local_irq_enable();
return;
}
if (tick_switch_to_oneshot(tick_nohz_handler)) { //timer改成oneshot模式(一次性定时器),同时指定回调函数tick_nohz_handler
local_irq_enable();
return;
}
ts->nohz_mode = NOHZ_MODE_LOWRES;
/*
* Recycle the hrtimer in ts, so we can share the
* hrtimer_forward with the highres code.
*/
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
/* Get the next period */
next = tick_init_jiffy_update();
* hrtimer_forward with the highres code.
*/
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
/* Get the next period */
next = tick_init_jiffy_update();
for (;;) {
ts->pires = next;
if (!tick_program_event(next, 0))
break;
next = ktime_add(next, tick_period);
}
local_irq_enable();
ts->pires = next;
if (!tick_program_event(next, 0))
break;
next = ktime_add(next, tick_period);
}
local_irq_enable();
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论