Linux僵尸进程处理

Posted by fishcried on June 20, 2014

1. 什么是僵死进程

僵死进程满足以下两点:

  1. 早逝。比他的父进程要早消亡。
  2. 父亲不给孩子收尸。没有调用wait类系统调用对子进程进行处理。

看下面的代码,产生僵死进程测试(generate_zombies.c):

#include <unistd.h>
#include <stdlib.h>
#include <signal.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>

int main(int argc, char *argv[])
{
	int zombies_num = 1;
	int opt;
	int i;

	while ((opt = getopt(argc, argv, "n:")) != -1) {
		switch (opt) {
		case 'n':
			zombies_num = atoi(optarg);
			break;
		default:
			fprintf(stderr, "Usage: %s -n Num\n", argv[0]);
			exit(EXIT_FAILURE);
		}
	}

	if (zombies_num <= 0) {
		fprintf(stderr, "Num must > 0\n");
		exit(EXIT_FAILURE);
	}

	for (i = 0; i < zombies_num; i++) {
		pid_t pid;
		if ((pid = fork()) < 0) {
			printf("fork faield - %s", strerror(errno));
		} else if (pid == 0) {
			printf("Create zombie, pid - [%u]\n", (unsigned)getpid());
			exit(0);
		} 
	}

	while (1) {
		sleep(10);
	}
	return 0;
}

2. 如何查看哪些进程是僵死进程,并清除

top命令能够直观的查看系统的总体情况

#./generate_zombies -n 20 
#top
top - 17:52:16 up 2 days,  8:20,  6 users,  load average: 0.12, 0.12, 0.14
Tasks: 177 total,   3 running, 154 sleeping,   0 stopped,  20 zombie
%Cpu(s): 23.1 us,  2.6 sy,  0.0 ni, 74.4 id,  0.0 wa,  0.0 hi,  0.0 si,  0.0 st
KiB Mem:   3955320 total,  2363616 used,  1591704 free,    76996 buffers
KiB Swap:  8178684 total,   113984 used,  8064700 free,  1004828 cached
...

top命令输出第二行的末尾20 zombie,直观的显示了当前系统存在了多少僵死进程.一个进程僵死后其状态会变为’Z’, 如果需要详细查看哪些进程为僵死进程可以通过以下命令:

ps -ef | grep d[e]funct 

ps -eo "ppid,pid,stat,comm" | awk '$3 ~ "Z+" {print }'
# 更简单的
ps -eo "stat,comm,ppid,pid" | grep '^Z'

那么如何清理掉这些僵死进程那? 直接kill僵死进程是不行的,因为进程已经僵死,不会对信号进行响应。所以只能杀死不负责人的父亲,然后有政府部门”init”来进行收尸。

#! /bin/bash
#clean_zombies.sh

for zombies_pid in $(ps -eo "stat,ppid" | awk '$1 ~ "Z" {print $2}' | sort | uniq)
do
	kill $zombies_pid
done

使用脚本进行处理实在是太方便了,不过还是来段c代码来恶心一下自己吧(clean_zombies.c):

#include <sys/types.h>
#include <dirent.h>

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>

#define INITSIZ	1
#define INCREMENTSIZ	1

static int is_zombie(const char *status_file, pid_t *ppid);

extern int clean_zombies(void);

int is_zombie(const char *status_file, pid_t *ppid)
{
	FILE *fp = NULL;
	char buf[BUFSIZ];
	int zombie = -1;
	char name[20]= {0},value[20] = {0};

	fp = fopen(status_file, "r");
	if (!fp) {
		return -1;
	}

	*ppid = 0;
	while (fgets(buf, sizeof buf -1, fp)) {
		if (zombie != -1 &&  *ppid != 0) {
			break;
		}
		if (sscanf(buf, "%19s%19s",name, value) != 2) {
			continue;
		}
		if (strcmp(name, "State:") == 0) {
			zombie = strchr(value, 'Z') ? 1:0;
		}
		if (strcmp(name, "Ppid:") == 0) {
			*ppid = atoi(value);
		}
	}

	fclose(fp);

	return zombie;
}

int clean_zombies(void)
{
	pid_t *zombies_parents;
	int num;
	int size = 0;
	struct dirent entry;
	struct dirent *rst = &entry;
	int ret;
	DIR *dir = NULL;
	int i;
	int zombies_num = 0;

	zombies_parents = malloc((sizeof (pid_t)) * INITSIZ);
	if (!zombies_parents) {
		return -1;
	}
	size = INITSIZ;
	num = 0;

	dir = opendir("/proc");
	if (!dir) {
		return -1;
	}

	while (readdir_r(dir, &entry, &rst) == 0 && rst) {
		if (strcmp(entry.d_name, ".") == 0 
				|| strcmp(entry.d_name, "..") == 0) {
			continue;
		} 
		if (entry.d_type == DT_DIR && atoi(entry.d_name) != 0) {
			char path[25] = "/proc/";
			pid_t ppid;
			snprintf(path, sizeof path - 1, "/proc/%s/status", entry.d_name);
			if (is_zombie(path, &ppid) == 1) {
				zombies_num++;
				if (ppid <= 0) {
					fprintf(stderr, "Get ppid of %s error\n", entry.d_name);	
					continue;
				}
				if (num == size) {
					void *ptr = realloc(zombies_parents, 
							sizeof (pid_t) * (size + INCREMENTSIZ));
					if (!ptr) {
						ret = -1;
						goto out;
					}
					size += INCREMENTSIZ;
					zombies_parents = (pid_t *)ptr;
				}
				for (i = 0; i < num && zombies_parents[i] != ppid; i++) {
					;
				}
				if (i == num) { 
					zombies_parents[num++] = ppid;
				}
			}
		}
	}

	for (i = 0; i < num; i++) {
		kill(zombies_parents[i], SIGKILL);
	}
	printf("There %d zombies, kill %d parents\n", zombies_num, num);
	ret = 0;

out:
	if (zombies_parents) {
		free(zombies_parents);
	}
	if (dir) {
		closedir(dir);
	}

	return ret;
}

int main(void) 
{
	clean_zombies();
	return 0;
}

4. 如何避免产生僵死进程

其实避免产生僵死进程的思路是很简单:在进程死后需要有人收尸。

  1. 父进程调用wait族系统调用回收子进程资源
  2. 处理SIGCHLD信号。忽略或者在处理时调用waitpid。
    • 注意多个进程同时退出,由于信号不排队,所以信号处理函数要通过循环非阻塞调用waitpid来收尸。
  3. fork两次,第一次先退出。父亲->儿子->孙子, 儿子死。孙子直接给init来接管。这样就 避免了僵死进程。

变更记录

Why Who When
整理旧文档 fishcired 2014-06-20
调整标题序号,修正格式问题 fishcired 2014-08-05
修改标题 fishcired 2015-12-02