linux内核seq_file接口

seq相关头文件linux/seq_file.h，seq相关函数的实现在fs/seq_file.c。seq函数最早是在2001年就引入了，但以前内核中一直用得不多，而到了2.6内核后，许多/proc的只读文件中大量使用了seq函数处理。

由于procfs的默认操作函数只使用一页的缓存，在处理较大的proc文件时就有点麻烦，并且在输出一系列结构体中的数据时也比较不灵活，需要自己在read_proc函数中实现迭代，容易出现Bug。所以内核黑客们对一些/proc代码做了研究，抽象出共性，最终形成了seq_file（Sequence file：序列文件）接口。这个接口提供了一套简单的函数来解决以上proc接口编程时存在的问题，使得编程更加容易，降低了Bug出现的机会。

在需要创建一个由一系列数据顺序组合而成的虚拟文件或一个较大的虚拟文件时，推荐使用seq_file接口。但是我个人认为，并不是只有procfs才可以使用这个seq_file接口，因为其实seq_file是实现的是一个操作函数集，这个函数集并不是与proc绑定的，同样可以用在其他的地方。

实现

seq_file结构体定义于linux/seq_file.h

struct seq_file {
    char *buf;  //序列文件对应的数据缓冲区，要导出的数据是首先打印到这个缓冲区，然后才被拷贝到指定的用户缓冲区。
    size_t size;  //缓冲区大小，默认为1个页面大小，随着需求会动态以2的级数倍扩张，4k,8k,16k...
    size_t from;  //没有拷贝到用户空间的数据在buf中的起始偏移量
    size_t count; //buf中没有拷贝到用户空间的数据的字节数，调用seq_printf()等函数向buf写数据的同时相应增加m->count
    size_t pad_until; 
    loff_t index;  //正在或即将读取的数据项索引，和seq_operations中的start、next操作中的pos项一致，一条记录为一个索引
    loff_t read_pos;  //当前读取数据（file）的偏移量，字节为单位
    u64 version;  //文件的版本
    struct mutex lock;  //序列化对这个文件的并行操作
    const struct seq_operations *op;  //指向seq_operations
    int poll_event; 
    const struct file *file; // seq_file相关的proc或其他文件
    void *private;  //指向文件的私有数据
};

seq操作函数：

struct seq_operations {
    void * (*start) (struct seq_file *m, loff_t *pos); //开始读数据项，通常需要在这个函数中加锁，以防止并行访问数据
    void (*stop) (struct seq_file *m, void *v); //停止数据项，和start相对，通常需要解锁
    void * (*next) (struct seq_file *m, void *v, loff_t *pos); //下一个要处理的数据项
    int (*show) (struct seq_file *m, void *v); //打印数据项到临时缓冲区
};

start在*pos为0时可以返回SEQ_START_TOKEN，通过这个值传递给show的时候，show会打印表格头。

start和next返回一条数据记录，stop停止打印，show显示一条记录。

注意：要在next中对pos递增处理，但递增的单位与迭代器有关，可能不是1。

一些有用的全局函数:

seq_open：通常会在打开文件的时候调用，以第二个参数为seq_operations表创建seq_file结构体。
seq_read, seq_lseek和seq_release：他们通常都直接对应着文件操作表中的read, llseek和release。
seq_escape：将一个字符串中的需要转义的字符（字节长）以8进制的方式打印到seq_file。
seq_putc, seq_puts, seq_printf：他们分别和C语言中的putc，puts和printf相对应。
seq_path：用于输出文件名。
single_open, single_release: 打开和释放只有一条记录的文件。
seq_open_private, __seq_open_private, seq_release_private：和seq_open类似，不过打开seq_file的时候创建一小块文件私有数据。

特别提取了双向链表和hash链表处理函数：

struct list_head *seq_list_start(struct list_head *head, loff_t pos) 
{
    struct list_head *lh; 

    list_for_each(lh, head)
        if (pos-- == 0)
            return lh;

    return NULL;
}
EXPORT_SYMBOL(seq_list_start);

struct list_head *seq_list_start_head(struct list_head *head, loff_t pos) 
{
    if (!pos)
        return head;

    return seq_list_start(head, pos - 1);
}
EXPORT_SYMBOL(seq_list_start_head);

struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos)
{
    struct list_head *lh;

    lh = ((struct list_head *)v)->next;
    ++*ppos;
    return lh == head ? NULL : lh;
}
EXPORT_SYMBOL(seq_list_next);

seq_file实现中关键函数为seq_read()，将记录逐条读取到用户空间：

/**
 *  seq_read -  ->read() method for sequential files.
 *  @file: the file to read from
 *  @buf: the buffer to read to
 *  @size: the maximum number of bytes to read
 *  @ppos: the current position in the file
 *
 *  Ready-made ->f_op->read()
 */
ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
{
    struct seq_file *m = file->private_data;
    size_t copied = 0;
    loff_t pos;
    size_t n;
    void *p;
    int err = 0;

    mutex_lock(&m->lock);
    /*
     * seq_file->op->..m_start/m_stop/m_next may do special actions
     * or optimisations based on the file->f_version, so we want to
     * pass the file->f_version to those methods.
     *
     * seq_file->version is just copy of f_version, and seq_file
     * methods can treat it simply as file version.
     * It is copied in first and copied out after all operations.
     * It is convenient to have it as  part of structure to avoid the
     * need of passing another argument to all the seq_file methods.
     */
    m->version = file->f_version;

    /* Don't assume *ppos is where we left it */
    if (unlikely(*ppos != m->read_pos)) {   // 读取位置与当前buf位置不同，traverse遍历
        while ((err = traverse(m, *ppos)) == -EAGAIN)
            ;
        if (err) {
            /* With prejudice... */
            m->read_pos = 0;
            m->version = 0;
            m->index = 0;
            m->count = 0;
            goto Done;
        } else {
            m->read_pos = *ppos;
        }
    }

    /* grab buffer if we didn't have one */
    if (!m->buf) {
        m->buf = seq_buf_alloc(m->size = PAGE_SIZE);
        if (!m->buf)
            goto Enomem;
    }
    /* if not empty - flush it first */
    if (m->count) {  //操作完后两情况：buf数据读取完，m->count=0；未读完，size=0，下次再读，本次读取结束
        n = min(m->count, size);
        err = copy_to_user(buf, m->buf + m->from, n);
        if (err)
            goto Efault;
        m->count -= n;
        m->from += n;
        size -= n;
        buf += n;
        copied += n;
        if (!m->count) {
            m->from = 0;
            m->index++;
        }
        if (!size)
            goto Done;
    }
    /* we need at least one record in buffer */  
    pos = m->index;
    p = m->op->start(m, &pos);
    while (1) {   //容纳一条数据后执行Fill，不能容纳一条数据就分配更大空间，若读到最后记录就break。
        err = PTR_ERR(p);
        if (!p || IS_ERR(p))
            break;
        err = m->op->show(m, p);
        if (err < 0)
            break;
        if (unlikely(err))
            m->count = 0;
        if (unlikely(!m->count)) {
            p = m->op->next(m, p, &pos);
            m->index = pos;
            continue;
        }
        if (m->count < m->size)
            goto Fill;
        m->op->stop(m, p);
        kvfree(m->buf);
        m->count = 0;
        m->buf = seq_buf_alloc(m->size <<= 1);
        if (!m->buf)
            goto Enomem;
        m->version = 0;
        pos = m->index;
        p = m->op->start(m, &pos);
    }
    m->op->stop(m, p);
    m->count = 0;
    goto Done;
Fill:  //退出条件为没有记录可读（next返回NULL）或buf缓冲区满
    /* they want more? let's try to get some more */
    while (m->count < size) {
        size_t offs = m->count;
        loff_t next = pos;
        p = m->op->next(m, p, &next);
        if (!p || IS_ERR(p)) {
            err = PTR_ERR(p);
            break;
        }
        err = m->op->show(m, p);
        if (seq_has_overflowed(m) || err) {
            m->count = offs;
            if (likely(err <= 0))
                break;
        }
        pos = next;
    }
    m->op->stop(m, p);
    n = min(m->count, size);
    err = copy_to_user(buf, m->buf, n);
    if (err)
        goto Efault;
    copied += n;
    m->count -= n;
    if (m->count)
        m->from = n;
    else
        pos++;
    m->index = pos;
Done:   // 操作完成后，出错返回错误，成功则file位置偏移和seq读指针偏移增加拷贝字节数
    if (!copied)
        copied = err;
    else {
        *ppos += copied;
        m->read_pos += copied;
    }
    file->f_version = m->version;
    mutex_unlock(&m->lock);
    return copied;
Enomem:
    err = -ENOMEM;
    goto Done;
Efault:
    err = -EFAULT;
    goto Done;
}
EXPORT_SYMBOL(seq_read);

示例

如下例用于列出进程相关信息，在ubuntu 16/18上测试通过：

//#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/percpu.h>
#include <linux/init.h>
#include <linux/sched.h>
// #include <linux/sched/signal.h>

static struct proc_dir_entry *entry;
static loff_t offset = 1;

static void *l_start(struct seq_file *m, loff_t * pos)
{
    loff_t index = *pos;
    loff_t i = 0;
    struct task_struct * task ;

    if (index == 0) {
        seq_printf(m, "Current all the processes in system:
"
                "%-24s%-5s
", "name", "pid");
        printk(KERN_EMERG "++++++++++=========>%5d
", 0);
//        offset = 1;
        return &init_task;
    }else {
        for(i = 0, task=&init_task; i < index; i++){
            task = next_task(task);    
        }
        BUG_ON(i != *pos);
        if(task == &init_task){
            return NULL;
        }

        printk(KERN_EMERG "++++++++++>%5d
", task->pid);
        return task;
    }
}

static void *l_next(struct seq_file *m, void *p, loff_t * pos)
{
    struct task_struct * task = (struct task_struct *)p;

    task = next_task(task);
    if ((*pos != 0) && (task == &init_task)) {
//    if ((task == &init_task)) {
//        printk(KERN_EMERG "=====>%5d
", task->pid);
        return NULL;
    }

    printk(KERN_EMERG "=====>%5d
", task->pid);
    offset = ++(*pos);

    return task;
}

static void l_stop(struct seq_file *m, void *p)
{
    printk(KERN_EMERG "------>
");
}

static int l_show(struct seq_file *m, void *p)
{
    struct task_struct * task = (struct task_struct *)p;

    seq_printf(m, "%-24s%-5d	%lld
", task->comm, task->pid, offset);
//    seq_printf(m, "======>%-24s%-5d
", task->comm, task->pid);
    return 0;
}

static struct seq_operations exam_seq_op = {
    .start = l_start,
    .next  = l_next,
    .stop  = l_stop,
    .show  = l_show
};

static int exam_seq_open(struct inode *inode, struct file *file)
{
    return seq_open(file, &exam_seq_op);
}

static struct file_operations exam_seq_fops = {
    .open = exam_seq_open,
    .read = seq_read,
    .llseek = seq_lseek,
    .release = seq_release,
};

static int __init exam_seq_init(void)
{

//    entry = create_proc_entry("exam_esq_file", 0, NULL);
    entry = proc_create("exam_esq_file", 0444, NULL, &exam_seq_fops);
    if (!entry)
        printk(KERN_EMERG "proc_create error.
");
        //entry->proc_fops = &exam_seq_fops;

    printk(KERN_EMERG "exam_seq_init.
");
    return 0;
}

static void __exit exam_seq_exit(void)
{
    remove_proc_entry("exam_esq_file", NULL);
    printk(KERN_EMERG "exam_seq_exit.
");
}

module_init(exam_seq_init);
module_exit(exam_seq_exit);
MODULE_LICENSE("GPL");

makefile

obj-m := seq.o  
KDIR := /lib/modules/$(shell uname -r)/build  
#KDIR := ~/source_ap/build_dir/target-arm_cortex-a7_uClibc-1.0.14_eabi/linux-ipq806x/linux-3.14.77  
PWD := $(shell pwd)  

default:  
    $(MAKE) -C $(KDIR) SUBDIRS=$(PWD) modules  

clean:  
    rm -rf *.o *.ko *.mod* *.order *.sym*

参考：

1. 内核proc文件系统与seq接口（4）---seq_file接口编程浅析 tekkaman

2. 内核proc文件系统与seq接口（5）---通用proc接口与seq_file接口实验 tekkaman

3. seq_file工作机制实例