骑麦兜看落日

[Binary]IO_FILE

字数统计: 2.1k阅读时长: 11 min
2018/11/14 Share

文件结构

FILE 在 Linux 系统的标准 IO 库中是用于描述文件的结构,称为文件流。

所有的文件流通过链表连接,全局变量_IO_list_all指向链表头部

1
2
3
// glibc/libio/stdfiles.c

struct _IO_FILE_plus *_IO_list_all = &_IO_2_1_stderr_;

在程序启动时会创建三个文件流stdinstdoutstderr

1
2
3
4
5
// glibc/libio/libio.h

extern struct _IO_FILE_plus _IO_2_1_stdin_;
extern struct _IO_FILE_plus _IO_2_1_stdout_;
extern struct _IO_FILE_plus _IO_2_1_stderr_;

并且这三个文件流位于libc的数据段,而使用fopen创建的文件流位于堆中

文件流的结构体定义如下

1
2
3
4
5
6
7
// glibc/libio/libioP.h

struct _IO_FILE_plus
{
_IO_FILE file;
const struct _IO_jump_t *vtable;
};

_IO_FILE

_IO_FILE结构体包含了文件的所有属性

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
// glibc/libio/libio.h

struct _IO_FILE {
int _flags; /* High-order word is _IO_MAGIC; rest is flags. */
#define _IO_file_flags _flags

/* The following pointers correspond to the C++ streambuf protocol. */
/* Note: Tk uses the _IO_read_ptr and _IO_read_end fields directly. */
char* _IO_read_ptr; /* Current read pointer */
char* _IO_read_end; /* End of get area. */
char* _IO_read_base; /* Start of putback+get area. */
char* _IO_write_base; /* Start of put area. */
char* _IO_write_ptr; /* Current put pointer. */
char* _IO_write_end; /* End of put area. */
char* _IO_buf_base; /* Start of reserve area. */
char* _IO_buf_end; /* End of reserve area. */
/* The following fields are used to support backing up and undo. */
char *_IO_save_base; /* Pointer to start of non-current get area. */
char *_IO_backup_base; /* Pointer to first valid character of backup area */
char *_IO_save_end; /* Pointer to end of non-current get area. */

struct _IO_marker *_markers;

struct _IO_FILE *_chain;

int _fileno;
#if 0
int _blksize;
#else
int _flags2;
#endif
_IO_off_t _old_offset; /* This used to be _offset but it's too small. */

#define __HAVE_COLUMN /* temporary */
/* 1+column number of pbase(); 0 is unknown. */
unsigned short _cur_column;
signed char _vtable_offset;
char _shortbuf[1];

/* char* _save_gptr; char* _save_egptr; */

_IO_lock_t *_lock;
#ifdef _IO_USE_OLD_IO_FILE
};

其作用如下

偏移 属性 作用
0x00 _flags 高四位为魔数0xfbad0000,低四位为标志符
0x08 _IO_read_ptr 输入流指向的缓冲区
0x10 _IO_read_end 输入流缓冲区结束
0x18 _IO_read_base
0x20 _IO_write_base
0x28 _IO_write_ptr 输出流指向的缓冲区
0x30 _IO_write_end 输出流缓冲区结束
0x38 _IO_buf_base 保护区起始
0x40 _IO_buf_end 保护区结束
0x48 _IO_save_base
0x50 _IO_backup_base
0x58 _IO_save_end
0x60 _markers
0x68 _chain 指向下一个文件流
0x70 _fileno 文件描述符
0x74 _flags2 标志符
0x78 _old_offset
0x80 _cur_column
0x82 _vtable_offset
0x83 _shortbuf
0x88 _IO_stdfile_1_lock 锁结构体
0x90 _offset 文件描述符的偏移
0x98 _codecvt
0xa0 _IO_wide_data_1 宽字节流
0xa8 _freeres_list
0xb0 _freeres_buf
0xb8 __pad5
0xc0 _mode 标记是否为宽字节
0xc4 _unused2

vtable

vtable位于0xd8处

_IO_jump_t结构体包含了一些函数指针

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
// glibc/libio/libioP.h

struct _IO_jump_t
{
JUMP_FIELD(size_t, __dummy);
JUMP_FIELD(size_t, __dummy2);
JUMP_FIELD(_IO_finish_t, __finish);
JUMP_FIELD(_IO_overflow_t, __overflow);
JUMP_FIELD(_IO_underflow_t, __underflow);
JUMP_FIELD(_IO_underflow_t, __uflow);
JUMP_FIELD(_IO_pbackfail_t, __pbackfail);
/* showmany */
JUMP_FIELD(_IO_xsputn_t, __xsputn);
JUMP_FIELD(_IO_xsgetn_t, __xsgetn);
JUMP_FIELD(_IO_seekoff_t, __seekoff);
JUMP_FIELD(_IO_seekpos_t, __seekpos);
JUMP_FIELD(_IO_setbuf_t, __setbuf);
JUMP_FIELD(_IO_sync_t, __sync);
JUMP_FIELD(_IO_doallocate_t, __doallocate);
JUMP_FIELD(_IO_read_t, __read);
JUMP_FIELD(_IO_write_t, __write);
JUMP_FIELD(_IO_seek_t, __seek);
JUMP_FIELD(_IO_close_t, __close);
JUMP_FIELD(_IO_stat_t, __stat);
JUMP_FIELD(_IO_showmanyc_t, __showmanyc);
JUMP_FIELD(_IO_imbue_t, __imbue);
#if 0
get_column;
set_column;
#endif
};

其作用如下

偏移 hook 函数 作用
0x00 dummy
0x08 dummy2
0x10 finish 清理_IO_FILE对象
0x18 overflow 刷新缓冲区
0x20 underflow 返回get缓冲区的下一个字节
0x28 uflow 返回输入流的下一个字节
0x30 pbackfail 处理备份操作
0x38 xsputn 向缓冲区写N个字符
0x40 xsgetn 从缓冲区读N个字符
0x48 seekoff 将流位置移动到新位置
0x50 seekpos 将流位置移动到新的绝对位置
0x58 setbuf 为文件开辟缓冲区
0x60 sync 将文件内部数据结构与外部状态同步
0x68 doallocate 告诉文件分配缓冲区
0x70 sysread 读数据
0x78 syswrite 写数据
0x80 sysseek
0x88 sysclose 结束文件
0x90 sysstat
0x98 showmany
0xa0 imbue

标志符

_flag

描述
_IO_MAGIC 0xFBAD0000 魔数
_OLD_STDIO_MAGIC 0xFABC0000 兼容旧版魔数
_IO_MAGIC_MASK 0xFFFF0000
_IO_USER_BUF 1 /* User owns buffer; don’t delete it on close. */
_IO_UNBUFFERED 2
_IO_NO_READS 4 不可读
_IO_NO_WRITES 8 不可写
_IO_EOF_SEEN 0x10
_IO_ERR_SEEN 0x20
_IO_DELETE_DONT_CLOSE 0x40 /* Don’t call close(_fileno) on cleanup. */
_IO_LINKED 0x80 链表连接标识符
_IO_IN_BACKUP 0x100
_IO_LINE_BUF 0x200
_IO_TIED_PUT_GET 0x400 put与get逻辑绑定
_IO_CURRENTLY_PUTTING 0x800
_IO_IS_APPENDING 0x1000 附加模式
_IO_IS_FILEBUF 0x2000 文件流标志符
_IO_BAD_SEEN 0x4000
_IO_USER_LOCK 0x8000

_flag2

描述
_IO_FLAGS2_MMAP 1 映射区域
_IO_FLAGS2_NOTCANCEL 2
_IO_FLAGS2_FORTIFY 4
_IO_FLAGS2_USER_WBUF 8
_IO_FLAGS2_SCANF_STD 16
_IO_FLAGS2_NOCLOSE 32
_IO_FLAGS2_CLOEXEC 64
_IO_FLAGS2_NEED_LOCK 128

I/0函数

fopen

fopen函数定义在glibc/libio/iofopen.c

1
2
3
4
5
6
7
// glibc/libio/iofopen.c

_IO_FILE *
_IO_new_fopen (const char *filename, const char *mode)
{
return __fopen_internal (filename, mode, 1);
}

其内部调用__fopen_intetnal

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
// glibc/libio/iofopen.c

_IO_FILE *
__fopen_internal (const char *filename, const char *mode, int is32)
{
struct locked_FILE
{
struct _IO_FILE_plus fp;
#ifdef _IO_MTSAFE_IO
_IO_lock_t lock;
#endif
struct _IO_wide_data wd;
} *new_f = (struct locked_FILE *) malloc (sizeof (struct locked_FILE));
/* 创建文件结构体及其结构体属性 */
/* 通过fopen函数创建的文件流都位于堆中 */

if (new_f == NULL)
return NULL;
/* 申请失败则返回 */

#ifdef _IO_MTSAFE_IO
new_f->fp.file._lock = &new_f->lock;
#endif
/* 绑定锁结构体属性 */

_IO_no_init (&new_f->fp.file, 0, 0, &new_f->wd, &_IO_wfile_jumps); // 初始化文件结构
_IO_JUMPS (&new_f->fp) = &_IO_file_jumps; // 绑定虚函数表
_IO_new_file_init_internal (&new_f->fp); // 设置flag标志位,文件描述符,绑定_chain
#if !_IO_UNIFIED_JUMPTABLES
new_f->fp.vtable = NULL;
#endif
if (_IO_file_fopen ((_IO_FILE *) new_f, filename, mode, is32) != NULL)
return __fopen_maybe_mmap (&new_f->fp.file);
/* 打开文件 */

_IO_un_link (&new_f->fp);
free (new_f);
/* 打开失败则返回 */

return NULL;
}

_IO_new_file_init_internal函数主要对文件结构体设置了标志位和文件描述符,并且将文件流绑定到FILE链表中

1
2
3
4
5
6
7
8
9
10
11
12
13
// glibc/libio/fileops.c

_IO_new_file_init_internal (struct _IO_FILE_plus *fp)
{
/* POSIX.1 allows another file handle to be used to change the position
of our file descriptor. Hence we actually don't know the actual
position before we do the first fseek (and until a following fflush). */
fp->file._offset = _IO_pos_BAD; // 设置_offset为-1
fp->file._IO_file_flags |= CLOSED_FILEBUF_FLAGS; // 设置读写权限等

_IO_link_in (fp); // 绑定_chain
fp->file._fileno = -1; // 设置文件描述符
}

_IO_link_in函数将_IO_list_all赋值为当前文件流,并设置当前文件流_chain为原_IO_list_all

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
// glibc/libio/genops.c

void
_IO_link_in (struct _IO_FILE_plus *fp)
{
if ((fp->file._flags & _IO_LINKED) == 0)
{
fp->file._flags |= _IO_LINKED;
#ifdef _IO_MTSAFE_IO
_IO_cleanup_region_start_noarg (flush_cleanup);
_IO_lock_lock (list_all_lock);
run_fp = (_IO_FILE *) fp;
_IO_flockfile ((_IO_FILE *) fp);
#endif
fp->file._chain = (_IO_FILE *) _IO_list_all;
_IO_list_all = fp;
#ifdef _IO_MTSAFE_IO
_IO_funlockfile ((_IO_FILE *) fp);
run_fp = NULL;
_IO_lock_unlock (list_all_lock);
_IO_cleanup_region_end (0);
#endif
}
}
libc_hidden_def (_IO_link_in)

再看最后的_IO_file_fopen,指向_IO_new_file_fopen

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
// glibc/libio/fileops.c

_IO_FILE *
_IO_new_file_fopen (_IO_FILE *fp, const char *filename, const char *mode,
int is32not64)
{
int oflags = 0, omode;
int read_write;
int oprot = 0666;
int i;
_IO_FILE *result;
const char *cs;
const char *last_recognized;

if (_IO_file_is_open (fp)) // 检查是否打开,fp->_fileno=-1
return 0;
switch (*mode)
{
case 'r':
omode = O_RDONLY;
read_write = _IO_NO_WRITES;
break;
case 'w':
omode = O_WRONLY;
oflags = O_CREAT|O_TRUNC;
read_write = _IO_NO_READS;
break;
case 'a':
omode = O_WRONLY;
oflags = O_CREAT|O_APPEND;
read_write = _IO_NO_READS|_IO_IS_APPENDING;
break;
default:
__set_errno (EINVAL);
return NULL;
}
/* 选择打开模式 */

last_recognized = mode;
for (i = 1; i < 7; ++i)
{
switch (*++mode)
{
case '\0':
break;
case '+':
omode = O_RDWR;
read_write &= _IO_IS_APPENDING;
last_recognized = mode;
continue;
case 'x':
oflags |= O_EXCL;
last_recognized = mode;
continue;
case 'b':
last_recognized = mode;
continue;
case 'm':
fp->_flags2 |= _IO_FLAGS2_MMAP;
continue;
case 'c':
fp->_flags2 |= _IO_FLAGS2_NOTCANCEL;
continue;
case 'e':
oflags |= O_CLOEXEC;
fp->_flags2 |= _IO_FLAGS2_CLOEXEC;
continue;
default:
/* Ignore. */
continue;
}
break;
}
/* 选择附加模式 */

result = _IO_file_open (fp, filename, omode|oflags, oprot, read_write,
is32not64);

if (result != NULL)
{
/* Test whether the mode string specifies the conversion. */
cs = strstr (last_recognized + 1, ",ccs=");
/* 检测mode中是否包含",css=" */

if (cs != NULL)
{
/* Yep. Load the appropriate conversions and set the orientation
to wide. */
struct gconv_fcts fcts;
struct _IO_codecvt *cc;
char *endp = __strchrnul (cs + 5, ',');
char *ccs = malloc (endp - (cs + 5) + 3);

if (ccs == NULL)
{
int malloc_err = errno; /* Whatever malloc failed with. */
(void) _IO_file_close_it (fp);
__set_errno (malloc_err);
return NULL;
}

*((char *) __mempcpy (ccs, cs + 5, endp - (cs + 5))) = '\0';
strip (ccs, ccs);

if (__wcsmbs_named_conv (&fcts, ccs[2] == '\0'
? upstr (ccs, cs + 5) : ccs) != 0)
{
/* Something went wrong, we cannot load the conversion modules.
This means we cannot proceed since the user explicitly asked
for these. */
(void) _IO_file_close_it (fp);
free (ccs);
__set_errno (EINVAL);
return NULL;
}

free (ccs);

assert (fcts.towc_nsteps == 1);
assert (fcts.tomb_nsteps == 1);

fp->_wide_data->_IO_read_ptr = fp->_wide_data->_IO_read_end;
fp->_wide_data->_IO_write_ptr = fp->_wide_data->_IO_write_base;

/* Clear the state. We start all over again. */
memset (&fp->_wide_data->_IO_state, '\0', sizeof (__mbstate_t));
memset (&fp->_wide_data->_IO_last_state, '\0', sizeof (__mbstate_t));

cc = fp->_codecvt = &fp->_wide_data->_codecvt;

/* The functions are always the same. */
*cc = __libio_codecvt;

cc->__cd_in.__cd.__nsteps = fcts.towc_nsteps;
cc->__cd_in.__cd.__steps = fcts.towc;

cc->__cd_in.__cd.__data[0].__invocation_counter = 0;
cc->__cd_in.__cd.__data[0].__internal_use = 1;
cc->__cd_in.__cd.__data[0].__flags = __GCONV_IS_LAST;
cc->__cd_in.__cd.__data[0].__statep = &result->_wide_data->_IO_state;

cc->__cd_out.__cd.__nsteps = fcts.tomb_nsteps;
cc->__cd_out.__cd.__steps = fcts.tomb;

cc->__cd_out.__cd.__data[0].__invocation_counter = 0;
cc->__cd_out.__cd.__data[0].__internal_use = 1;
cc->__cd_out.__cd.__data[0].__flags
= __GCONV_IS_LAST | __GCONV_TRANSLIT;
cc->__cd_out.__cd.__data[0].__statep =
&result->_wide_data->_IO_state;

/* From now on use the wide character callback functions. */
_IO_JUMPS_FILE_plus (fp) = fp->_wide_data->_wide_vtable;

/* Set the mode now. */
result->_mode = 1;
}
}

return result;
}
libc_hidden_ver (_IO_new_file_fopen, _IO_file_fopen)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
// glibc/libio/fileops.c

_IO_FILE *
_IO_file_open (_IO_FILE *fp, const char *filename, int posix_mode, int prot,
int read_write, int is32not64)
{
int fdesc;
if (__glibc_unlikely (fp->_flags2 & _IO_FLAGS2_NOTCANCEL))
fdesc = __open_nocancel (filename,
posix_mode | (is32not64 ? 0 : O_LARGEFILE), prot);
else
fdesc = __open (filename, posix_mode | (is32not64 ? 0 : O_LARGEFILE), prot);
/* 判断是否以线程取消的方式打开 */

if (fdesc < 0)
return NULL;
fp->_fileno = fdesc;
_IO_mask_flags (fp, read_write,_IO_NO_READS+_IO_NO_WRITES+_IO_IS_APPENDING);
/* For append mode, send the file offset to the end of the file. Don't
update the offset cache though, since the file handle is not active. */
if ((read_write & (_IO_IS_APPENDING | _IO_NO_READS))
== (_IO_IS_APPENDING | _IO_NO_READS))
{
_IO_off64_t new_pos = _IO_SYSSEEK (fp, 0, _IO_seek_end);
if (new_pos == _IO_pos_BAD && errno != ESPIPE)
{
__close_nocancel (fdesc);
return NULL;
}
}
/* 如果文件已附加模式打开,则寻找文件尾 */

_IO_link_in ((struct _IO_FILE_plus *) fp);
/* 由于已经连接过了这里不会再连接了 */

return fp;
}
libc_hidden_def (_IO_file_open)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
// glibc/sysdeps/unix/sysv/linux/open64.c

__libc_open64 (const char *file, int oflag, ...)
{
int mode = 0;

if (__OPEN_NEEDS_MODE (oflag))
{
va_list arg;
va_start (arg, oflag);
mode = va_arg (arg, int);
va_end (arg);
}

return SYSCALL_CANCEL (openat, AT_FDCWD, file, oflag | EXTRA_OPEN_FLAGS,
mode);
}

fwrite


参考资料

CATALOG
  1. 1. 文件结构
    1. 1.1. _IO_FILE
    2. 1.2. vtable
    3. 1.3. 标志符
      1. 1.3.1. _flag
      2. 1.3.2. _flag2
  2. 2. I/0函数
    1. 2.1. fopen
    2. 2.2. fwrite
  3. 3. 参考资料