用 mmap 将某些 struct 存放到文件中 (map virtual memory to a file)

背景

在 RHEL7 中, dovecot 在运行的过程中其中一个 imap 进程挂掉,出来这么一个 coredump.

Program terminated with signal 7, Bus error.

(gdb) f 0
#0  0x00007fbb15868c17 in mail_cache_transaction_open_if_needed (ctx=ctx@entry=0x7fbb173c1430)
    at mail-cache-transaction.c:218
218				if (ext->reset_id == cache->hdr->file_seq || i == 2)

(gdb) p cache
$1 = (struct mail_cache *) 0x7fbb173af2d0

(gdb) p cache->hdr
$3 = (const struct mail_cache_header *) 0x7fbb15ce5000

(gdb) p cache->hdr->file_seq
Cannot access memory at address 0x7fbb15ce5008

# cat maps | grep 7fbb15ce5000
7fbb15ce5000-7fbb15ced000 r--s 00000000 00:2c xxxxxxxxx    /xxxxxxx/dovecot.index.cache

挂掉的原因是 "signal 7, bus error" ((bad memory access)). 从 gdb 中可以看到,进程挂的时候在尝试访问 ext->reset_id 和 cache->hdr->file_seq, 而从 maps 可以看出 cache->hdr 指向的是 file-backend 的地址空间。

那么问题来了,怎样可以把一个结构体,映射到一个文件,而不是匿名页?另外一个问题是,在 gdb 中看到 “Cannot access memory at address” 是意味着这段地址存在问题吗?

测试环境

Debian 8 - jessie

测试程序

https://stackoverflow.com/questions/27697228/mmap-and-struct-in-c 中,找到了一段将数据映射到文件的可行代码。下面根据实际需要进行了改动。

这些 man page 会有所帮助: man 2 lseek, man 2 mmap, man 2 open, man 2 write.

写 - 将一个 struct person 存到文件中

写一个 mmtest.c 程序。这个程序会创建一个 /tmp/tom.bin 文件,并将一个 struct person 的内容存放到这个文件中。

/tmp$ cat mmtest.c 

#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>

#define FILEPATH "/tmp/tom.bin"

// Define an arbitrary data structure.
struct person
{
    int id;
    int height;
    int weight;
};


int main(int argc, char** argv)
{
    int fd;
    int result;
    struct person *tom;

    // Create a file.
    fd = open(FILEPATH, O_RDWR | O_CREAT | O_TRUNC, (mode_t)0600);
    if (fd == -1)
    {
        perror("Error opening file for writing");
        exit(EXIT_FAILURE);
    }
    
    // Reserve enough space to store person struct.
    result = lseek(fd, sizeof(struct person) - 1, SEEK_SET);
    if (result == -1)
    {
        close(fd);
        perror("Error calling lseek() to 'stretch' the file");
        exit(EXIT_FAILURE);
    }

    // Write a '\0' in the end of the file.
    // Now the file size == sizeof(struct person)
    result = write(fd, "", 1);
    if (result != 1)
    {
        close(fd);
        perror("Error writing last byte of the file");
        exit(EXIT_FAILURE);
    }

    // Map tom to the file. Now tom is a pointer to the memory address that mapped to the file.
    tom = (struct person *)mmap(0, sizeof(struct person), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
    if (tom == MAP_FAILED)
    {
        close(fd);
        perror("Error mmapping the file");
        exit(EXIT_FAILURE);
    }

    // Finally, we can write something to the struct.
    tom -> id = 5;
    tom -> height = 180;
    tom -> weight = 65;

    // Buy some time for investigation.
    sleep(300);
    return 0;
}

可以看到,这个 tom.bin 文件里存放了 id=5, height=180, weight=65 的信息。

/tmp$ hexdump tom.bin 
0000000 0005 0000 00b4 0000 0041 0000          
000000c

读 - 从文件中读取 struct person

另一个程序 mmtest_read.c, 它通过 mmap() 读取这个文件存放的内容。

/tmp$ cat mmtest_read.c 
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>

#define FILEPATH "/tmp/tom.bin"

// Define a person struct which matches the mmtest.c file.
struct person
{
    int id;
    int height;
    int weight;
};


int main(int argc, char** argv)
{
    int fd;
    int result;
    struct person *tom;

    // Open the file. (We saved something to this file in mmtest.c)
    fd = open(FILEPATH, O_RDWR);
    if (fd == -1)
    {
        perror("Error opening file for writing");
        exit(EXIT_FAILURE);
    }
    

    // Now tom is pointing the the memory address that mapped to the file.
    tom = (struct person *)mmap(0, sizeof(struct person), PROT_READ, MAP_SHARED, fd, 0);
    if (tom == MAP_FAILED)
    {
        close(fd);
        perror("Error mmapping the file");
        exit(EXIT_FAILURE);
    }

    // Read data from tom.
    printf("id = %d\n", tom->id);
    printf("height = %d\n", tom->height);
    printf("weight = %d\n", tom->weight);

    // Buy some time for investigation.
    sleep(300);
    return 0;
}

能顺利读取 id=5, height=180, weight=65 的信息。

/tmp$ ./mmtest_read 
id = 5
height = 180
weight = 65

查看内存映射 (maps)

从 /proc//maps 里,可以看到一个进程的内存映射情况。从 mmtest_read 可以看到 7f97ca1e2000-7f97ca1e3000 映射到了 /tmp/tom.bin 文件。

/tmp$ ps aux | grep mmtest
feichas+  3819  0.0  0.0   4084   684 pts/0    S+   10:19   0:00 ./mmtest_read

/tmp$ cat /proc/3819/maps 
00400000-00401000 r-xp 00000000 fd:00 13770524                           /tmp/mmtest_read
00600000-00601000 rw-p 00000000 fd:00 13770524                           /tmp/mmtest_read
7f97c9c1a000-7f97c9dbb000 r-xp 00000000 fd:00 5931530                    /lib/x86_64-linux-gnu/libc-2.19.so
7f97c9dbb000-7f97c9fbb000 ---p 001a1000 fd:00 5931530                    /lib/x86_64-linux-gnu/libc-2.19.so
7f97c9fbb000-7f97c9fbf000 r--p 001a1000 fd:00 5931530                    /lib/x86_64-linux-gnu/libc-2.19.so
7f97c9fbf000-7f97c9fc1000 rw-p 001a5000 fd:00 5931530                    /lib/x86_64-linux-gnu/libc-2.19.so
7f97c9fc1000-7f97c9fc5000 rw-p 00000000 00:00 0 
7f97c9fc5000-7f97c9fe5000 r-xp 00000000 fd:00 5931526                    /lib/x86_64-linux-gnu/ld-2.19.so
7f97ca1c7000-7f97ca1ca000 rw-p 00000000 00:00 0 
7f97ca1e1000-7f97ca1e2000 rw-p 00000000 00:00 0 
7f97ca1e2000-7f97ca1e3000 r--s 00000000 fd:00 13770511                   /tmp/tom.bin
7f97ca1e3000-7f97ca1e5000 rw-p 00000000 00:00 0 
7f97ca1e5000-7f97ca1e6000 r--p 00020000 fd:00 5931526                    /lib/x86_64-linux-gnu/ld-2.19.so
7f97ca1e6000-7f97ca1e7000 rw-p 00021000 fd:00 5931526                    /lib/x86_64-linux-gnu/ld-2.19.so
7f97ca1e7000-7f97ca1e8000 rw-p 00000000 00:00 0 
7ffc41521000-7ffc41542000 rw-p 00000000 00:00 0                          [stack]
7ffc415c6000-7ffc415c8000 r-xp 00000000 00:00 0                          [vdso]
7ffc415c8000-7ffc415ca000 r--p 00000000 00:00 0                          [vvar]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]

gdb - Cannot access memory at address

如果收取 mmtest_read 的 coredump,可以看到,尽管这个程序能正常访问这段内存,但是在 coredump 中仍然会遇到"Cannot access memory at address" 的问题。这有可能是 coredump 不收集有文件映射的内存数据。(有待查证).

/tmp$ gdb mmtest_read core

(gdb) bt
#0  0x00007f97c9cd3f10 in __nanosleep_nocancel () at ../sysdeps/unix/syscall-template.S:81
#1  0x00007f97c9cd3dc4 in __sleep (seconds=0) at ../sysdeps/unix/sysv/linux/sleep.c:137
#2  0x0000000000400795 in main (argc=1, argv=0x7ffc4153f668) at mmtest_read.c:55
(gdb) f 2
#2  0x0000000000400795 in main (argc=1, argv=0x7ffc4153f668) at mmtest_read.c:55
(gdb) p tom
$1 = (struct person *) 0x7f97ca1e2000
(gdb) p *tom
Cannot access memory at address 0x7f97ca1e2000

这么说,在 coredump 里看到"Cannot access memory at address" 并不能说明这段地址出现问题。

signal 7 - bus error

在 dovecot 的 coredump 中看到了 signal 7, 如何能触发 signal 7 呢?

嗯,如果 mmap 的文件出现问题,就可能触发问题。比如,清空这个 tom.bin 文件。

/tmp$ > tom.bin
/tmp$ ./mmtest_read 
Bus error (core dumped)

/tmp$ gdb mmtest_read core
Program terminated with signal SIGBUS, Bus error.
#0  0x0000000000400748 in main (argc=1, argv=0x7ffed46bbdf8) at mmtest_read.c:43
43	    printf("id = %d\n", tom->id);

至于那个背景问题

至于 dovecot 的 signal 7 - bus error 问题,经过调查后,有可能是 mmap() 所使用的 dovecot.index.cache 文件存放在 NFS 中,导致了意外。 可以通过在 dovecot 中禁用 mmap 解决:
https://wiki2.dovecot.org/NFS