/proc/interrupts 的数值是如何获得的?

之前为了确认 /proc/interrupts 文件第一列的缩进方式,看了一下相关源码,在这里做一些记录。

系统一共有多少个中断?

系统可用的中断数量主要由架构决定,x86 的具体数量可以参考以下定义。

/* kernel/irq/irqdesc.c */

 96 int nr_irqs = NR_IRQS;
 97 EXPORT_SYMBOL_GPL(nr_irqs);
/* arch/x86/include/asm/irq_vectors.h */

152 #define NR_IRQS_LEGACY            16
153    
154 #define IO_APIC_VECTOR_LIMIT        ( 32 * MAX_IO_APICS )
155    
156 #ifdef CONFIG_X86_IO_APIC
157 # define CPU_VECTOR_LIMIT       (64 * NR_CPUS)
158 # define NR_IRQS                    \
159     (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ?  \
160         (NR_VECTORS + CPU_VECTOR_LIMIT)  :  \
161         (NR_VECTORS + IO_APIC_VECTOR_LIMIT))
162 #else /* !CONFIG_X86_IO_APIC: */
163 # define NR_IRQS            NR_IRQS_LEGACY
164 #endif

打印/显示 /proc/interrupts 文件

/proc/interrupts 是一个 sequence file. 关于 sequence file 的接口定义,可以参考[2].

对于 /proc/interrupts,简单地说,在读取这个文件时,系统会遍历 0 ~ nr_irq (包含 nr_irq)个中断号,对每个中断号都调用 show_interrupts() 来获取该中断的信息。

/* fs/proc/interrupts.c */

  8 /*
  9  * /proc/interrupts
 10  */
 11 static void *int_seq_start(struct seq_file *f, loff_t *pos)
 12 {   
 13     return (*pos <= nr_irqs) ? pos : NULL;
 14 }
 15    
 16 static void *int_seq_next(struct seq_file *f, void *v, loff_t *pos)
 17 {  
 18     (*pos)++;
 19     if (*pos > nr_irqs)
 20         return NULL;
 21     return pos;
 22 }  
 23    
 24 static void int_seq_stop(struct seq_file *f, void *v)
 25 {  
 26     /* Nothing to do */
 27 }  
 28    
 29 static const struct seq_operations int_seq_ops = {
 30     .start = int_seq_start,
 31     .next  = int_seq_next,
 32     .stop  = int_seq_stop,
 33     .show  = show_interrupts
 34 }; 
 35    
 36 static int interrupts_open(struct inode *inode, struct file *filp)
 37 {  
 38     return seq_open(filp, &int_seq_ops);
 39 }  
 40    
 41 static const struct file_operations proc_interrupts_operations = {
 42     .open       = interrupts_open,
 43     .read       = seq_read,
 44     .llseek     = seq_lseek,
 45     .release    = seq_release,
 46 }; 
 47    
 48 static int __init proc_interrupts_init(void)
 49 {  
 50     proc_create("interrupts", 0, NULL, &proc_interrupts_operations);
 51     return 0;
 52 }
 53 module_init(proc_interrupts_init); 

show_interrupts()

很惭愧, show_interrupts() 就做了一点微小的工作,大概三件事:

  • 1. 计算 /proc/interrupts 第一列的宽度。 这是我主要关注的问题,应该第一列的宽度/缩进,将影响到一些监测工具如何正确读取 /proc/interrupts 的信息。在第一次调用 show_interrupts() 的时候,会计算输出的第一列的宽度, prec. 如果最大中断号小于1000,则 prec 为3,如果最大中断号大于1000小于10000,则 prec 为4,以此类推。
  • 2. 获取每个中断号的信息并显示; 对于 0 <= i < nr_irq, 的中断,会调用 irq_to_desc() 获取中断的信息,并打印每个 CPU 对应的统计数量 kstat_irqs_cpu().
  • 3. 所有中断号信息打印后,调用 arch_show_interrupts(),打印架构相关的中断信息。比如 MNI, TLB 等统计信息。
  • /* kernel/irq/proc.c */
    
    414 int show_interrupts(struct seq_file *p, void *v)
    415 {   
    416     static int prec;
    417     
    418     unsigned long flags, any_count = 0;
    419     int i = *(loff_t *) v, j;
    420     struct irqaction *action;
    421     struct irq_desc *desc;
    422     
    423     if (i > ACTUAL_NR_IRQS)
    424         return 0;
    425     
    426     if (i == ACTUAL_NR_IRQS)
    427         return arch_show_interrupts(p, prec);
    428     
    429     /* print header and calculate the width of the first column */
    430     if (i == 0) {
    431         for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec)
    432             j *= 10;
    433     
    434         seq_printf(p, "%*s", prec + 8, "");
    435         for_each_online_cpu(j)
    436             seq_printf(p, "CPU%-8d", j);
    437         seq_putc(p, '\n');
    438     }
    439     
    440     desc = irq_to_desc(i);
    441     if (!desc)
    442         return 0;
    443     
    444     raw_spin_lock_irqsave(&desc->lock, flags);
    445     for_each_online_cpu(j)
    446         any_count |= kstat_irqs_cpu(i, j);
    447     action = desc->action;
    448     if (!action && !any_count)
    449         goto out;
    450     
    451     seq_printf(p, "%*d: ", prec, i);
    452     for_each_online_cpu(j)
    453         seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
    454     
    455     if (desc->irq_data.chip) {
    456         if (desc->irq_data.chip->irq_print_chip)
    457             desc->irq_data.chip->irq_print_chip(&desc->irq_data, p);
    458         else if (desc->irq_data.chip->name)
    459             seq_printf(p, " %8s", desc->irq_data.chip->name);
    460         else
    461             seq_printf(p, " %8s", "-");
    462     } else {
    463         seq_printf(p, " %8s", "None");
    464     }
    465 #ifdef CONFIG_GENERIC_IRQ_SHOW_LEVEL
    466     seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge");
    467 #endif
    468     if (desc->name)
    469         seq_printf(p, "-%-8s", desc->name);
    470    
    471     if (action) {
    472         seq_printf(p, "  %s", action->name);
    473         while ((action = action->next) != NULL)
    474             seq_printf(p, ", %s", action->name);
    475     }
    476    
    477     seq_putc(p, '\n');
    478 out:
    479     raw_spin_unlock_irqrestore(&desc->lock, flags);
    480     return 0;
    481 }
    
    /* arch/x86/kernel/irq.c */
    
     50 #define irq_stats(x)        (&per_cpu(irq_stat, x))
     51 #define rh_irq_stats(x)     (&per_cpu(rh_irq_stat, x))
     52 /*                                        
     53  * /proc/interrupts printing for arch specific interrupts
     54  */                                       
     55 int arch_show_interrupts(struct seq_file *p, int prec)
     56 {                                         
     57     int j;
     58     
     59     seq_printf(p, "%*s: ", prec, "NMI");
     60     for_each_online_cpu(j)             
     61         seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
     62     seq_printf(p, "  Non-maskable interrupts\n");
     63 #ifdef CONFIG_X86_LOCAL_APIC           
     64     seq_printf(p, "%*s: ", prec, "LOC");
     65     for_each_online_cpu(j)             
     66         seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
     67     seq_printf(p, "  Local timer interrupts\n");
     68      
     69     seq_printf(p, "%*s: ", prec, "SPU");
     70     for_each_online_cpu(j) 
     71         seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
     72     seq_printf(p, "  Spurious interrupts\n");
     73     seq_printf(p, "%*s: ", prec, "PMI");  
     74     for_each_online_cpu(j) 
     75         seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
     76     seq_printf(p, "  Performance monitoring interrupts\n");
     77     seq_printf(p, "%*s: ", prec, "IWI");  
     78     for_each_online_cpu(j)             
     79         seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
     80     seq_printf(p, "  IRQ work interrupts\n");
     81     seq_printf(p, "%*s: ", prec, "RTR");  
     82     for_each_online_cpu(j)
     83         seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count);
     84     seq_printf(p, "  APIC ICR read retries\n");
     85 #endif   
     86     if (x86_platform_ipi_callback) {
     87         seq_printf(p, "%*s: ", prec, "PLT");
     88         for_each_online_cpu(j) 
     89             seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis);
     90         seq_printf(p, "  Platform interrupts\n");
     91     } 
     92 #ifdef CONFIG_SMP                       
     93     seq_printf(p, "%*s: ", prec, "RES");
     94     for_each_online_cpu(j) 
     95         seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
     96     seq_printf(p, "  Rescheduling interrupts\n");
     97     seq_printf(p, "%*s: ", prec, "CAL");
     98     for_each_online_cpu(j) 
     99         seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
    100                     irq_stats(j)->irq_tlb_count);
    101     seq_printf(p, "  Function call interrupts\n");
    102     seq_printf(p, "%*s: ", prec, "TLB");
    103     for_each_online_cpu(j) 
    104         seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
    105     seq_printf(p, "  TLB shootdowns\n");
    106 #endif                                  
    107 #ifdef CONFIG_X86_THERMAL_VECTOR        
    108     seq_printf(p, "%*s: ", prec, "TRM");
    109     for_each_online_cpu(j) 
    110         seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
    111     seq_printf(p, "  Thermal event interrupts\n");
    112 #endif 
    113 #ifdef CONFIG_X86_MCE_THRESHOLD         
    114     seq_printf(p, "%*s: ", prec, "THR");
    115     for_each_online_cpu(j) 
    116         seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
    117     seq_printf(p, "  Threshold APIC interrupts\n");
    118 #endif                                  
    119 #ifdef CONFIG_X86_MCE                   
    120     seq_printf(p, "%*s: ", prec, "MCE");
    121     for_each_online_cpu(j)                
    122         seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
    123     seq_printf(p, "  Machine check exceptions\n");
    124     seq_printf(p, "%*s: ", prec, "MCP");
    125     for_each_online_cpu(j) 
    126         seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
    127     seq_printf(p, "  Machine check polls\n");
    128 #endif                                    
    129     if (test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) {
    130         seq_printf(p, "%*s: ", prec, "HYP");
    131         for_each_online_cpu(j) 
    132             seq_printf(p, "%10u ", 
    133                 ¦  rh_irq_stats(j)->irq_hv_callback_count);
    134         seq_printf(p, "  Hypervisor callback interrupts\n");
    135     }                                     
    136     seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
    137 #if defined(CONFIG_X86_IO_APIC)
    138     seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
    139 #endif                                    
    140     return 0; 
    141 } 
    

    中断描述符 - interrupt descriptor

    说到底, /proc/interrupts 的统计数据是从每个中断号对应的 interrupt descriptor 得到的。

    interrupt descriptor 的定义如下。本人水平有限,就暂时不解释每一个值的含义了。

    /* include/linux/irqdesc.h */
    
    /**
     * struct irq_desc - interrupt descriptor
     * @irq_data:		per irq and chip data passed down to chip functions
     * @kstat_irqs:		irq stats per cpu
     * @handle_irq:		highlevel irq-events handler
     * @preflow_handler:	handler called before the flow handler (currently used by sparc)
     * @action:		the irq action chain
     * @status:		status information
     * @core_internal_state__do_not_mess_with_it: core internal status information
     * @depth:		disable-depth, for nested irq_disable() calls
     * @wake_depth:		enable depth, for multiple irq_set_irq_wake() callers
     * @irq_count:		stats field to detect stalled irqs
     * @last_unhandled:	aging timer for unhandled count
     * @irqs_unhandled:	stats field for spurious unhandled interrupts
     * @lock:		locking for SMP
     * @affinity_hint:	hint to user space for preferred irq affinity
     * @affinity_notify:	context for notification of affinity changes
     * @pending_mask:	pending rebalanced interrupts
     * @threads_oneshot:	bitfield to handle shared oneshot threads
     * @threads_active:	number of irqaction threads currently running
     * @wait_for_threads:	wait queue for sync_irq to wait for threaded handlers
     * @dir:		/proc/irq/ procfs entry
     * @name:		flow handler name for /proc/interrupts output
     */
    struct irq_desc {
    	struct irq_data		irq_data;
    	unsigned int __percpu	*kstat_irqs;
    	irq_flow_handler_t	handle_irq;
    #ifdef CONFIG_IRQ_PREFLOW_FASTEOI
    	irq_preflow_handler_t	preflow_handler;
    #endif
    	struct irqaction	*action;	/* IRQ action list */
    	unsigned int		status_use_accessors;
    	unsigned int		core_internal_state__do_not_mess_with_it;
    	unsigned int		depth;		/* nested irq disables */
    	unsigned int		wake_depth;	/* nested wake enables */
    	unsigned int		irq_count;	/* For detecting broken IRQs */
    	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
    	unsigned int		irqs_unhandled;
    	raw_spinlock_t		lock;
    	struct cpumask		*percpu_enabled;
    #ifdef CONFIG_SMP
    	const struct cpumask	*affinity_hint;
    	struct irq_affinity_notify *affinity_notify;
    #ifdef CONFIG_GENERIC_PENDING_IRQ
    	cpumask_var_t		pending_mask;
    #endif
    #endif
    	unsigned long		threads_oneshot;
    	atomic_t		threads_active;
    	wait_queue_head_t       wait_for_threads;
    #ifdef CONFIG_PROC_FS
    	struct proc_dir_entry	*dir;
    #endif
    	int			parent_irq;
    	struct module		*owner;
    	const char		*name;
    } ____cacheline_internodealigned_in_smp;
    

    参考资料

    1. [Proc interrupts] http://www.crashcourse.ca/wiki/index.php/Proc_interrupts
    本文主要搬运这篇文档。

    2. [5.4. Manage /proc file with seq_file] http://www.tldp.org/LDP/lkmpg/2.6/html/x861.html
    描述 sequence file 接口。

    3. [Lesson 13: Proc files and sequence files] http://www.crashcourse.ca/introduction-linux-kernel-programming/lesson-13-proc-files-and-sequence-files-part-3
    关于 sequence file 更详细的描述。