Pipes
Processes
COP-3402
Table of Contents
Processes and standard I/O
Each process given standard input, output, and error
Why standard I/O?
Why do this? Abstract away I/O decisions from the program developer. E.g., the ls command need not decide where to store output.
Who sets standard I/O?
Who decides where to direct standard I/O? The parent process sets up the I/O before exec. Another reason for separating process creation and program execution (fork and exec).
Interprocess Communication
- Processes are isolated: separate memory spaces
- Fork/exec
- No longer same address space
- No longer same running program
- Process isolation protects from accessing each other's memory
- How do we pass information between them?
Pipes
Redirect stdin to stdout of another process
find /usr/bin | grep zip
What does this pipeline do?
Creates two processes, one for find and one for grep, redirects the standard out of find to the standard in of grep.
Implementing pipes: pipe()
pipe()
creates two file descriptors- write end takes in data
- read end reads out the data (FIFO)
FIFO means first in first out, i.e., a queue.
(Diagram)
find /usr/bin | grep zip
- bash creates a pipe
- bash forks two new processes
- bash redirects p1's stdout to the write end of the pipe
- what syscall to use for redirection?
- bash redirects p2's stdin from the read end of the pipe
- bash invokes exec for find in p1
- bash invokes exec for grep in p2
(Diagram)
find /usr/bin | grep zip | wc -l
Add an additional pipeline processor that counts the resulting lines of output
Creating pipes
Symbol | Reference | Reading |
---|---|---|
pipe() | man 2 pipe |
LPI 44.2 |
man 2 pipe
pipe.c
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/wait.h> #include <unistd.h> int main(int argc, char **argv) { int pipefd[2]; char buf; pid_t cpid; if (pipe(pipefd) == -1) { perror("pipe"); exit(EXIT_FAILURE); } cpid = fork(); if (-1 == cpid) { perror("fork"); exit(EXIT_FAILURE); } if (0 == cpid) { // child close(pipefd[1]); // close write end (parent does writing) while (read(pipefd[0], &buf, 1) > 0) { write(STDOUT_FILENO, &buf, 1); } write(STDOUT_FILENO, "\n", 1); close(pipefd[0]); _exit(EXIT_SUCCESS); } else { // parent close(pipefd[0]); // close read end (child does reading) write(pipefd[1], argv[1], strlen(argv[1])); close(pipefd[1]); wait(NULL); exit(EXIT_SUCCESS); } }
Reminder that parent and child are effectively running in parallel (or at least separately and independently).
Piping between different programs
Example: "ls | wc"
Setup
- Three processes
- Master process
- ls
- wc
- Master process
- Creates the pipe
- Fork/exec the two programs
- Replace stdio with the pipe
Algorithm for master process
- Create pipe
- Fork process for ls
- Replace ls's stdout with the pipe
- Run ls with the new stdout
- Fork process for wc
- Replace wc's stdin with the pipe
- Run wc with the new stdin
- Wait for processes the finish
Questions:
- Why does forking itself not run ls or wc? What does forking do?
- Why is ls's stdout replaced, not its stdin?
- What happens to ls's stdin?
Diagram
- Master process: filter.c
- Create pipe
- Fork new process
- Redirect process's output to the pipe
- Run ls
- Fork another process
- Redirect process's input to the pipe
- Run wc
- Let everything run as usual
- Note the benefit of having stdio
- Programs don't need to written specifically for piping. Availability of stdio makes this possible.
filter.c
- A program that runs "ls | wc"
- Create a pipe
- Fork-exec two processes
- Make ls write to the pipe
- Make wc read from the pipe
Implementat in the style of stepwise refinement.
Pseudo-code
pipefd = new pipe // create a new pipe leftpid = fork // fork process for ls if in child // code for the child close(pipefd[read_end]) // close read end, since left child writes dup2(pipefd(write_end), stdout) // replace stdou with the pipe exec("ls") // ls is running, control not returned here // back in the paren rightpid = fork if in child close(pipefd[write_end]) // close the write end, since right child reads dup2(pipefd(read_end), stdin) // replace stdin with the pipe's output exec("wc") // wc is running, control not returned here // back in the parent close(pipefd[read_end]) close(pipefd[write_end]) wait() // wait for one child wait() // wait for the other
Documentation
Symbol | Reference | Reading |
---|---|---|
pipe() | man 2 pipe |
LPI 44.2 |
dup2() | man 2 close |
LPI 44.4 |
pipe_exec.c
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/wait.h> #include <unistd.h> // pipe(), dup2() // man 2 pipe // man 2 dup2 int main(int argc, char **argv) { int pipefd[2]; char buf; pid_t leftpid, rightpid; if (pipe(pipefd) == -1) { perror("pipe"); exit(EXIT_FAILURE); } // pipefd[1] is the write end // pipefd[0] is the read end // find /usr/bin | grep zip // create a process for the first program, ls leftpid = fork(); if (-1 == leftpid) { perror("fork"); exit(EXIT_FAILURE); } if (0 == leftpid) { // child if (close(pipefd[0]) == -1) { // close read end, since that will be used by the second program perror("close"); exit(EXIT_FAILURE); } // redirect the program's standard out with the input to the pipe (write end, which is pipefd[1]) printf("rightpipe dup2(%d, %d)\n", pipefd[1], STDOUT_FILENO); if (dup2(pipefd[1], STDOUT_FILENO) == -1) { perror("dup2"); exit(EXIT_FAILURE); } if (close(pipefd[1]) == -1) { perror("close"); exit(EXIT_FAILURE); } char *prog = "/usr/bin/find"; char *newargv[] = { "/usr/bin/find", "/usr/bin", NULL }; char *newenv[] = { NULL }; execve(prog, newargv, newenv); perror("execve"); _exit(EXIT_FAILURE); } else { // in parent, continue to fork second program } rightpid = fork(); if (-1 == rightpid) { perror("fork"); exit(EXIT_FAILURE); } if (0 == rightpid) { // child if (close(pipefd[1]) == -1) { // close write, since that is being used by the first program perror("close"); exit(EXIT_FAILURE); } // redirect the program's standard in with the output of the pipe (read end, which is pipefd[0]) printf("leftpipe dup2(%d, %d)\n", pipefd[0], STDIN_FILENO); if (dup2(pipefd[0], STDIN_FILENO) == -1) { perror("dup2"); exit(EXIT_FAILURE); } if (close(pipefd[0]) == -1) { perror("close"); exit(EXIT_FAILURE); } char *prog = "/usr/bin/grep"; char *newargv[] = { "/usr/bin/grep", "zip", NULL }; char *newenv[] = { NULL }; execve(prog, newargv, newenv); perror("execve"); _exit(EXIT_FAILURE); } else { // in parent, continue to wrap up } // back in parent // close pipefds if (close(pipefd[0]) == -1) { perror("close"); exit(EXIT_FAILURE); } if (close(pipefd[1]) == -1) { perror("close"); exit(EXIT_FAILURE); } // wait for both children if (wait(NULL) == -1) { perror("wait"); exit(EXIT_FAILURE); } if (wait(NULL) == -1) { perror("wait"); exit(EXIT_FAILURE); } }
wait.c
#include <stdio.h> #include <unistd.h> // fork() #include <stdlib.h> // exit() #include <inttypes.h> // intmax_t #include <sys/wait.h> // wait() // man 2 fork // man 2 execve // man 2 wait int main(int argc, char **argv) { pid_t pid; switch (pid = fork()) { case -1: perror("fork"); exit(EXIT_FAILURE); break; case 0: // child puts("inside child process\n"); char *prog = "/usr/bin/ls"; char *newargv[] = { NULL, "./", NULL }; char *newenv[] = { NULL }; execve(prog, newargv, newenv); perror("execve"); _exit(EXIT_FAILURE); break; default: // parent int wstatus; printf("child pid: %jd\n", (intmax_t) pid); do { pid_t wpid = wait(&wstatus); if (-1 == wpid) { perror("waid"); exit(EXIT_FAILURE); } if (WIFEXITED(wstatus)) { printf("exited, status=%d\n", WEXITSTATUS(wstatus)); } else if (WIFSIGNALED(wstatus)) { printf("killed by signal %d\n", WTERMSIG(wstatus)); } else if (WIFSTOPPED(wstatus)) { printf("stopped by signal %d\n", WSTOPSIG(wstatus)); } else if (WIFCONTINUED(wstatus)) { printf("continued\n"); } } while (!WIFEXITED(wstatus) && !WIFSIGNALED(wstatus)); exit(EXIT_SUCCESS); break; } }