APUE——文件与文件IO

1 系统IO与文件IO的比较

文件IO的响应速度更快，但是标准IO的吞吐量更大！
系统IO比文件IO多了一步写入stdio buffer的操作，可以参考下图

注意标准IO和文件IO不要混用，因为两者的文件指针指向的buf不一样，前者是std buf，后者是内核buf（file*里有），可以用如下两个函数进行转换：

int fileno(FILE *stream); 将指针转成文件描述符FILE *fdopen(int fd, const char *mode); 将文件描述符转成指针

2 文件操作

2.1 mode_t位图分析（16位）

关于mode_t类型，st_mode是一个16位的位图，用于表示文件类型，文件访问权限，以及特殊权限位

键入ls -l 查看文件状态，其中第一列为st_mode，其为16位的位图

实际16位如下所示，前四位表示文件类型，后九位表示文件权限

2.2 stat获取文件属性

int stat(const char *pathname, struct stat *statbuf);//返回statbufint fstat(int fd, struct stat *statbuf);int lstat(const char *pathname, struct stat *statbuf);lstat与stat区别在于，lstat函数遇到link文件，不会解析其源文件，只会解析其link文件本身，而stat会解析其源文件

struct stat结构体如下所示：

stat命令：

这里区分lstat与stat，如果入参为link类型，则stat描述的是符号链接链接的源文件，而lstat则描述符号链接文件

#include <stdlib.h>#include <stdio.h>#include <sys/types.h>#include <sys/stat.h>#include <unistd.h>int main(int argc, char** argv){int rs1,rs2;struct stat *statbuf1= (struct stat *)malloc(sizeof(struct stat));struct stat *statbuf2= (struct stat *)malloc(sizeof(struct stat));rs1 = stat(argv[1],statbuf1);rs2 = lstat(argv[1],statbuf2);if(rs1<0){perror(\"stat\");exit(1);}if(rs2<0){perror(\"lstat\");exit(1);}printf(\"no1 of %d\\n\",statbuf1->st_blocks);printf(\"no2 of %d\\n\",statbuf2->st_blocks);free(statbuf1);free(statbuf2);exit(0);}

2.3 umask

umask主要为了防止权限过松的文件
umask默认为0022，创建文件open，mkdir时，使用默认0666 & ~umask
也可以指定文件权限的位图，例如上例子open第三个参数，0600

mode_t umask(mode_t mask);

umask()将调用进程的文件权限创建掩码(umask)设置为mask & 0777(即:，只使用掩码的文件权限位)，并返回之前的掩码值。

2.4 chmod

chmod可以更改权限，例如chmod 666 ./hellodup6, 更改成666权限

int chmod(const char *pathname, mode_t mode);int fchmod(int fd, mode_t mode);

2.5 目录创建和销毁

增加文件目录和删除文件目录
mkdir

NAMEmkdir, mkdirat - create a directorySYNOPSIS#include <sys/stat.h>#include <sys/types.h>int mkdir(const char *pathname, mode_t mode);

rmdir

NAMErmdir - delete a directorySYNOPSIS#include <unistd.h>int rmdir(const char *pathname); //删除空目录DESCRIPTIONrmdir() deletes a directory, which must be empty.

2.6 更改当前工作路径，获取

chdir() = cd

NAMEchdir, fchdir - change working directorySYNOPSIS#include <unistd.h>int chdir(const char *path);int fchdir(int fd);DESCRIPTIONchdir() changes the current working directory of the calling process to the directory specified in path.fchdir() is identical to chdir(); the only difference is that the directory is given as an open file descriptor.

获取当前路径，getcwd()= pwd

NAMEgetcwd, getwd, get_current_dir_name - get current working directorySYNOPSIS#include <unistd.h>char *getcwd(char *buf, size_t size);输入参数buf为指定字符串，以及size长度，如果当前目录超过size，则报错。如果buf写NULL，则会自动malloc，需要free注意，成功时，buf为当前目录

使用getcwd与readdir，opendir，获取当前目录下所有文件

#include <stdio.h>#include <stdlib.h>#include <dirent.h>#include <errno.h>#include <unistd.h>#define BUFSIZE 50int main(int argc,char** argv){DIR* dir;struct dirent* sdir;char* pwd = NULL;  //这里不能用char pwd[50]; pwd = getcwd(buf,BUFSIZE), 数组名是常量指针char buf[BUFSIZE];pwd = getcwd(buf,BUFSIZE); //如果是NULL，则会malloc，记得freeputs(buf);dir = opendir(pwd);if(dir == NULL){perror(\"dir\");exit(1);}while(1){sdir = readdir(dir);if((sdir == NULL)&&(errno!= 0)){perror(\"readdir\");exit(1);}else if(sdir == NULL)break;puts(sdir->d_name);}closedir(dir);exit(0);}

可以用glob函数，获取某个目录下（通配符）的所有文件。
glob函数，使用通配符pattern，来指定目录，其与int argc char** argv类似./glob ./*.c
glob申请了gl_pathv的内存，需要搭配globfree函数来销毁

typedef struct {size_t   gl_pathc;    /* Count of paths matched so far  */char   **gl_pathv;    /* List of matched pathnames.  */size_t   gl_offs;     /* Slots to reserve in gl_pathv.  */} glob_t

这里注意，glob_t.gl_pathc≈argc ，glob_t.gl_pathv≈argv

#include <stdio.h>#include <stdlib.h>#include <glob.h>#define PAT \"./ *.c\"#define DIR \"./ *\" //为非隐藏，./.*为隐藏，这里不能用./来当做pattern参数int main(int argc,char** argv){glob_t globres,globdir;int revalue = glob(PAT,0,NULL,&globres);int rdvalue = glob(DIR,0,NULL,&globdir);if(revalue){printf(\"Errror code = %d\\n\",revalue);exit(1);}if(rdvalue){printf(\"Errror code = %d\\n\",rdvalue);exit(1);}for(int i=0;i<globres.gl_pathc;i++){printf(\"%s\\n\",globres.gl_pathv[i]);}puts(\"--------------------------------------\");for(int i=1;argv[i]!=NULL;i++){printf(\"%s\\n\",argv[i]);}puts(\"--------------------------------------\");for(int i=0;i<globdir.gl_pathc;i++){printf(\"%s\\n\",globdir.gl_pathv[i]);}globfree(revalue);globfree(rdvalue);exit(0);}

上述两种获得当前路径文件的方法，glob与readdir等函数，其实际可能是利用了dentry结构中的双向链表遍历，同一层dentry的子目录来获取

2.8 目录流操作函数

文件操作与目录操作类似，均

DIR *opendir(const char *name); //打开目录流DIR *fdopendir(int fd);struct dirent *readdir(DIR *dirp);int closedir(DIR *dirp);void rewinddir(DIR *dirp);  //复位目录流，回到头部void seekdir(DIR *dirp, long loc);  //设置目录流位置long telldir(DIR *dirp);  //告诉目录流当前位置

这里分析下readdir函数：

struct dirent *readdir(DIR *dirp);返回值：正常为struct dirent *类型的指针，读到底为NULL（当errno不等于0时，在判断出错）struct dirent *类型如下所示：struct dirent {ino_t          d_ino;       /* Inode number */off_t          d_off;       /* Not an offset; see below 这里d_off的与telldir是一样的，目录文件指针的位置*/unsigned short d_reclen;    /* Length of this record */unsigned char  d_type;      /* Type of file; not supportedby all filesystem types */char           d_name[256]; /* Null-terminated filename 文件名*/};d_type有如下8种类型，其中7种为基础文件类型DT_BLK      This is a block device.DT_CHR      This is a character device.DT_DIR      This is a directory.DT_FIFO     This is a named pipe (FIFO).DT_LNK      This is a symbolic link.DT_REG      This is a regular file.DT_SOCK     This is a UNIX domain socket.DT_UNKNOWN  The file type could not be determined.

使用目录流操作函数来获取某目录下的所有文件，

#include <stdio.h>#include <stdlib.h>#include <dirent.h>#include <errno.h>int main(int argc,char** argv){DIR* dir;struct dirent* sdir;dir = opendir(argv[1]);if(dir == NULL){perror(\"dir\");exit(1);}while(1){sdir = readdir(dir);  //持续读，有目录流指针定位if((sdir == NULL)&&(errno!= 0)){perror(\"readdir\");exit(1);}else if(sdir == NULL)break;puts(sdir->d_name);}closedir(dir);exit(0);}

2.9 口令文件与阴影文件操作

2.9.1 口令文件

通过vim /etc/passwd可以查看口令文件

这里字段解析如下所示

struct passwd *getpwnam(const char *name);  通过用户名访问，获得passwdstruct passwd *getpwuid(uid_t uid);   通过UID获得passwdThe passwd structure is defined in <pwd.h> as follows:struct passwd {char   *pw_name;       /* username */char   *pw_passwd;     /* user password */uid_t   pw_uid;        /* user ID */gid_t   pw_gid;        /* group ID */char   *pw_gecos;      /* user information */char   *pw_dir;        /* home directory */char   *pw_shell;      /* shell program */};

2.9.2 阴影文件

加密口令是单向加密的，不能反向解析出密码

函数分析1：

struct spwd *getspnam(const char *name); 通过用户名访问，获取spwd结构体struct spwd *getspent(void);struct spwd {char *sp_namp;     /* Login name */char *sp_pwdp;     /* Encrypted password */long  sp_lstchg;   /* Date of last change(measured in days since1970-01-01 00:00:00 +0000 (UTC)) */long  sp_min;      /* Min # of days between changes */long  sp_max;      /* Max # of days between changes */long  sp_warn;     /* # of days before password expiresto warn user to change it */long  sp_inact;    /* # of days after password expiresuntil account is disabled */long  sp_expire;   /* Date when account expires(measured in days since1970-01-01 00:00:00 +0000 (UTC)) */unsigned long sp_flag;  /* Reserved */};

函数分析2：

char *getpass(const char *prompt); 不回返，获取密码char *crypt(const char *key, const char *salt);  通过key密码和salt，加密获得明文$id$salt$encrypted，这里会提取$id$salt$，其中id为加密方法！

加密方法有如下几种，crypt将Method的ID和salt和起来。

ID  | Method─────────────────────────────────────────────────────────1   | MD52a  | Blowfish (not in mainline glibc; added in some| Linux distributions)5   | SHA-256 (since glibc 2.7)6   | SHA-512 (since glibc 2.7)

例子分析，实现通过getspnam获取spwd，再crypt加密，与spwd第二个参数获取的明文比对，判断是否输入正确

#define _XOPEN_SOURCE#include <stdlib.h>#include <stdio.h>#include <unistd.h>#include <shadow.h>#include <string.h>#define PWSIZE 50int main(int argc, char** argv){char *pw;char *crt;struct spwd *newpd;int i;pw = getpass(\"input a password: \");puts(pw);newpd = getspnam(argv[1]);if(newpd == NULL){perror(\"getspnam\");exit(1);}crt = crypt(pw,newpd->sp_pwdp);puts(crt);if(crt == NULL){perror(\"crypt\");exit(1);}i = strcmp(newpd->sp_pwdp,crt);if(i == 0){puts(\"correct\");}exit(0);}

3 open函数

int open(const char *pathname, int flags);int open(const char *pathname, int flags, mode_t mode);

flags可以如下设置

mode可以如下设置

open(\"hellodup5\",O_RDWR|O_CREAT|O_TRUNC,0600);0600 = user read 400 + write 200 ，其他为0

其中前三个表示文件拥有者的权限，中间三个表示文件所属组拥有的权限，最后三个表示其他用户拥有的权限。

4 lseek函数（与空洞文件）

#include <unistd.h>off_t lseek( int filedes, off_t offset, int whence );

返回值：若成功则返回新的文件偏移量，若出错则返回-1。

按系统默认情况，当打开一个文件时，除非指定O_APPEND选项，否则该偏移量被设置为0。
lseek仅将当前的文件偏移量记录在内核中，它并不引起任何I/O操作。然后，该偏移量用于下一个读或写操作。
文件偏移量可以大于文件的当前长度，在这种情况下，对该文件的下一次写将加长该文件，并在文件中构成一个空洞，文件中空洞均为0。

#include <stdio.h>#include <stdlib.h>#include <fcntl.h>#include <unistd.h>int main(int argc, char* argv[]){int fd;fd = open(argv[1],O_WRONLY|O_CREAT|O_TRUNC,0600);if(fd<0){perror(\"open()\");exit(1);}lseek(fd,5ll*1024ll*1024ll*1024ll-1ll,SEEK_SET);write(fd,\"\",1);  //系统调用，导致lseek偏移的并非为空洞文件，直接cp，会导致Blocks为0，为空洞文件close(fd);exit(0);}

通过 cp操作后， 使用stat或者ls -l查看文件状态，发现其Blocks为0，因为cp发现其为空洞文件，则没有执行write操作

lseek = fseek + ftell , 先执行fseek，设置文件指针，后通过ftell获取文件指针

4 程序重定向（dup=duplication）

dup与dup2，fcntl，类似于echo “???”> PATH

int dup(int oldfd); //将oldfd的指针复制到当前可用的最小的int fcntl(int fd, F_DUPFD, arg );  //fcntl(fd,F_DUPFD,0) = dup(fd),注意，要大于等于arg的最小值Duplicate the  file  descriptor  fd  using  the  lowest-numberedavailable file descriptor greater than or equal to arg.  This isdifferent from dup2(2), which uses exactly the  file  descriptorspecifiedint dup2(int oldfd, int newfd); //如果newfd已经打开，则close(newfd),再dup(oldfd),不再是最小的，而是newfd如果oldfd = newfd的话，则dup2什么都不会做，返回newfd

dup（dup使用时，可能是非原子，使用dup2代替可以）
例子1

#include <stdio.h>#include <stdlib.h>#include <sys/types.h>#include <sys/stat.h>#include <fcntl.h>#include <unistd.h>int main(){int fd;FILE* fp;//close(1);fd = open(\"hellodup5\",O_RDWR|O_CREAT|O_TRUNC,0600);// fp = fopen(\"hellodup1\",\"w+\");// fd = fileno(fp);if(fd<0){    perror(\"fopen\");exit(1);}close(1);dup(fd);  //这两步可以用dup(fd,1)代替if(fd != 1)close(fd);printf(\"helloworld\\n\");exit(0);}

例子2

#include <stdio.h>#include <stdlib.h>#include <sys/types.h>#include <sys/stat.h>#include <fcntl.h>#include <unistd.h>int main(){int fd;FILE* fp;//close(1);fd = open(\"hellodup5\",O_RDWR|O_CREAT|O_TRUNC,0600);// fp = fopen(\"hellodup1\",\"w+\");// fd = fileno(fp);if(fd<0){    perror(\"fopen\");exit(1);}dup2(fd,1);  //如果fd = 1，则dup2什么都不干if(fd != 1)close(fd); //则要判断fd是否为1，是1，则不closeprintf(\"helloworld\\n\");exit(0);}

dup2，指向性替换fd

#include <stdio.h>#include <stdlib.h>#include <unistd.h>#include <fcntl.h>int main(){FILE* fp;int fd;fp = fopen(\"helloworld\",\"w+\");if(fp==NULL){perror(\"fopen\");exit(1);}fd = fileno(fp);printf(\"fd = %d\\n\",fd);//close(2);int fdn = dup2(fd,2);  //dup2是close(2)，与dup(fd)，的原子操作printf(\"???????\\n\");//fcntl(fd,F_DUPFD,1);for(int i=0;i<10; i++){fprintf(stderr,\"hello+\");}fflush(stderr);printf(\"end/n\");fclose(fp);exit(0);}