地方エンジニアの学習日記

興味ある技術の雑なメモだったりを書いてくブログ。たまに日記とガジェット紹介。

【httpd】graceful shutdownの実装メモ

preforkの方で見ていく。子プロセスがどうやって死んでいくか気になったのでメモ

ざっくり流れ

ユーザがSIGUSER1をhttpdへ送信
親プロセッスがlisten socketをclose
処理中の子プロセスがあればそれを待つ
全て終了したらSIGUSR1を子プロセスへ送信
親プロセス死
httpdでのgraceful shutdown

機能の説明自体は以下

httpd.apache.org

apachectl -k gracefulとかやれば使える。

↓ら辺が気になったのでどんな感じで実装されているのかをメモした。

親プロセスは USR1 あるいは graceful シグナルを受け取ると、子プロセスに現在のリクエストの処理の後に終了する (あるいは何もしていなければすぐに終了する) ように助言します。
メイン

子プロセス作ったりacceptしたりもここ。ユーザからのgraceful shutdownもここで受け取る

httpd/prefork.c at 303010734b5ac4d3579133f8e82ededadcc646a9 · apache/httpd · GitHub

graceful shutdownに関連しそうなのはこの辺

    if (retained->mpm->shutdown_pending) {
        /* Time to perform a graceful shut down:
         * Reap the inactive children, and ask the active ones
         * to close their listeners, then wait until they are
         * all done to exit.
         */
        int active_children;
        apr_time_t cutoff = 0;

        /* Stop listening */
        ap_close_listeners();

        /* kill off the idle ones */
        for (i = 0; i < retained->mpm->num_buckets; i++) {
            ap_mpm_pod_killpg(all_buckets[i].pod, retained->max_daemons_limit);
        }

        /* Send SIGUSR1 to the active children */
        active_children = 0;
        for (index = 0; index < ap_daemons_limit; ++index) {
            if (ap_scoreboard_image->servers[index][0].status != SERVER_DEAD) {
                /* Ask each child to close its listeners. */
                ap_mpm_safe_kill(MPM_CHILD_PID(index), AP_SIG_GRACEFUL);
                active_children++;
            }
        }

        /* 全てのプロセスが終了するまでここでブロックされる */
        ap_relieve_child_processes(prefork_note_child_killed);

        /* cleanup pid file */
        ap_remove_pid(pconf, ap_pid_fname);
        ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00170)
           "caught " AP_SIG_GRACEFUL_STOP_STRING ", shutting down gracefully");

        if (ap_graceful_shutdown_timeout) {
            cutoff = apr_time_now() +
                     apr_time_from_sec(ap_graceful_shutdown_timeout);
        }

        /* Don't really exit until each child has finished */
        retained->mpm->shutdown_pending = 0;
        do {
            /* Pause for a second */
            sleep(1);

            /* Relieve any children which have now exited */
            ap_relieve_child_processes(prefork_note_child_killed);

            active_children = 0;
            for (index = 0; index < ap_daemons_limit; ++index) {
                if (ap_mpm_safe_kill(MPM_CHILD_PID(index), 0) == APR_SUCCESS) {
                    active_children = 1;
                    /* Having just one child is enough to stay around */
                    break;
                }
            }
        } while (!retained->mpm->shutdown_pending && active_children &&
                 (!ap_graceful_shutdown_timeout || apr_time_now() < cutoff));

        /* We might be here because we received SIGTERM, either
         * way, try and make sure that all of our processes are
         * really dead.
         */
        ap_unixd_killpg(getpgrp(), SIGTERM);

        return DONE;
    }

ap_close_listeners()でcloseして新規受付を停止する

AP_DECLARE_NONSTD(void) ap_close_listeners(void)
{
    int i;

    ap_close_listeners_ex(ap_listeners);

    for (i = 1; i < ap_num_listen_buckets; i++) {
        ap_close_listeners_ex(ap_listen_buckets[i]);
    }
}

AP_DECLARE_NONSTD(void) ap_close_listeners_ex(ap_listen_rec *listeners)
{
    ap_listen_rec *lr;
    for (lr = listeners; lr; lr = lr->next) {
        apr_socket_close(lr->sd);
        lr->active = 0;
    }
}

apr_socket_closeはaprの方で実装されている。

github.com

APR_DECLARE(apr_status_t) apr_socket_close(apr_socket_t *thesocket)
{
    apr_pool_cleanup_kill(thesocket->pool, thesocket, socket_cleanup);
    return socket_cleanup(thesocket);
}

static apr_status_t socket_cleanup(void *sock)
{
    apr_socket_t *thesocket = sock;
    int sd = thesocket->socketdes;
    thesocket->socketdes = -1;

    if (close(sd) == 0) {
        return APR_SUCCESS;
    }
    else {
        /* Restore, close() was not successful. */
        thesocket->socketdes = sd;

        return errno;
    }
}

ここまでの処理で新規のリクエストの受付は停止される。

        /* Send SIGUSR1 to the active children */
        active_children = 0;
        for (index = 0; index < ap_daemons_limit; ++index) {
            if (ap_scoreboard_image->servers[index][0].status != SERVER_DEAD) {
                /* Ask each child to close its listeners. */
                ap_mpm_safe_kill(MPM_CHILD_PID(index), AP_SIG_GRACEFUL);
                active_children++;
            }
        }

ap_mpm_safe_killで気になったのがsafe_killという命名。何がsafeなんだろと思って追ってみるとkillしようとするプロセスがちゃんと同一のプロセスグループに存在するかを確認した上でkillするという処理になっている点だった。apr_proc_wait()は内部でwaitpidを実行していてその戻り値が正しく無い場合はkillしないという仕組み。なんらかの理由でapacheが管理してないプロセスのpidがリストにあるような事故を防ぐ仕組みでしょうか。

AP_DECLARE(apr_status_t) ap_mpm_safe_kill(pid_t pid, int sig)
{
    proc.pid = pid;
    rv = apr_proc_wait(&proc, &status, &why, APR_NOWAIT);
    if (rv == APR_CHILD_DONE) {
        /* The child already died - log the termination status if
         * necessary: */
        ap_process_child_status(&proc, why, status);
        return APR_EINVAL;
    }
    else if (rv != APR_CHILD_NOTDONE) {
        /* The child is already dead and reaped, or was a bogus pid -
         * log this either way. */
        ap_log_error(APLOG_MARK, APLOG_NOTICE, rv, ap_server_conf, APLOGNO(00048)
                     "cannot send signal %d to pid %ld (non-child or "
                     "already dead)", sig, (long)pid);
        return APR_EINVAL;
    }
    return kill(pid, sig) ? errno : APR_SUCCESS;
}

呼び出し元ではsigにSIGUSRを設定しているのでsigusr1が子プロセスに送信される。

SIGUSR1の処理

設定部分は以下

static int make_child(server_rec *s, int slot)
{
        // aprの方で定義されている。APR_DECLARE(apr_sigfunc_t *) apr_signal(int signo, apr_sigfunc_t * func)
        apr_signal(AP_SIG_GRACEFUL, stop_listening);
        child_main(slot, bucket);

    prefork_note_child_started(slot, pid);

    return 0;
}

シグナルハンドラの設定によりstop_listeningがSIGUSR1が来た際の挙動となる。

ap_close_listeners_ex
↓
apr_socket_close
↓
apr_pool_cleanup_run
↓
apr_pool_cleanup_kill
↓
static void stop_listening(int sig)
{
    retained->mpm->mpm_state = AP_MPMQ_STOPPING;
    ap_close_listeners_ex(my_bucket->listeners);

    /* For a graceful stop, we want the child to exit when done */
    die_now = 1;
}

APR_DECLARE(apr_status_t) apr_pool_cleanup_run(apr_pool_t *p, void *data,
                              apr_status_t (*cleanup_fn)(void *))
{
    apr_pool_cleanup_kill(p, data, cleanup_fn);
    return (*cleanup_fn)(data);
}
どうやって待つのか

一番気になるのはここ。Nginxの場合はソケットを管理するステートマシン的なデータ構造があってそれぞれの終了を確認して終了していた。preforkモードでのhttpdの場合はどうなのだろうか。ap_mpm_pod_killpgあたりを追っていく。

        ap_close_listeners();


        for (i = 0; i < retained->mpm->num_buckets; i++) {
            ap_mpm_pod_killpg(all_buckets[i].pod, retained->max_daemons_limit);
        }

        active_children = 0;
        for (index = 0; index < ap_daemons_limit; ++index) {
            if (ap_scoreboard_image->servers[index][0].status != SERVER_DEAD) {
                /* Ask each child to close its listeners. */
                ap_mpm_safe_kill(MPM_CHILD_PID(index), AP_SIG_GRACEFUL);
                active_children++;
            }
        }
ap_mpm_pod_killpg

今もいまいち理解できてないポイントです。ソケットを閉じるのと子プロセスへシグナル送って終了を待つぐらいで良さそうですが間にdummy_connectionなる処理を行なっている箇所がありました。

void ap_mpm_pod_killpg(ap_pod_t *pod, int num)
{
    int i;
    apr_status_t rv = APR_SUCCESS;

    for (i = 0; i < num && rv == APR_SUCCESS; i++) {
        if (ap_scoreboard_image->servers[i][0].status != SERVER_READY ||
            ap_scoreboard_image->servers[i][0].pid == 0) {
            continue;
        }
        rv = dummy_connection(pod);
    }
}
dummy_connection

以下がよくわからない。acceptブロックしてるプロセスに対してダミーでhttpリクエストを送って処理を進めさせるイメージ?いつか調べたい。。。

static apr_status_t dummy_connection(ap_pod_t *pod)
{
    const char *data;
    apr_status_t rv;
    apr_socket_t *sock;
    apr_pool_t *p;
    apr_size_t len;
    ap_listen_rec *lp;

    /* create a temporary pool for the socket.  pconf stays around too long */
    rv = apr_pool_create(&p, pod->p);
    if (rv != APR_SUCCESS) {
        return rv;
    }
    apr_pool_tag(p, "dummy_connection");

    /* If possible, find a listener which is configured for
     * plain-HTTP, not SSL; using an SSL port would either be
     * expensive to do correctly (performing a complete SSL handshake)
     * or cause log spam by doing incorrectly (simply sending EOF). */
    lp = ap_listeners;
    while (lp && lp->protocol && ap_cstr_casecmp(lp->protocol, "http") != 0) {
        lp = lp->next;
    }
    if (!lp) {
        lp = ap_listeners;
    }

    rv = apr_socket_create(&sock, lp->bind_addr->family, SOCK_STREAM, 0, p);
    if (rv != APR_SUCCESS) {
        ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf, APLOGNO(00054)
                     "get socket to connect to listener");
        apr_pool_destroy(p);
        return rv;
    }

    /* on some platforms (e.g., FreeBSD), the kernel won't accept many
     * queued connections before it starts blocking local connects...
     * we need to keep from blocking too long and instead return an error,
     * because the MPM won't want to hold up a graceful restart for a
     * long time
     */
    rv = apr_socket_timeout_set(sock, apr_time_from_sec(3));
    if (rv != APR_SUCCESS) {
        ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf, APLOGNO(00055)
                     "set timeout on socket to connect to listener");
        apr_socket_close(sock);
        apr_pool_destroy(p);
        return rv;
    }

    rv = apr_socket_connect(sock, lp->bind_addr);
    if (rv != APR_SUCCESS) {
        int log_level = APLOG_WARNING;

        if (APR_STATUS_IS_TIMEUP(rv)) {
            /* probably some server processes bailed out already and there
             * is nobody around to call accept and clear out the kernel
             * connection queue; usually this is not worth logging
             */
            log_level = APLOG_DEBUG;
        }

        ap_log_error(APLOG_MARK, log_level, rv, ap_server_conf, APLOGNO(00056)
                     "connect to listener on %pI", lp->bind_addr);
        apr_pool_destroy(p);
        return rv;
    }

    if (lp->protocol && ap_cstr_casecmp(lp->protocol, "https") == 0) {
        /* Send a TLS 1.0 close_notify alert.  This is perhaps the
         * "least wrong" way to open and cleanly terminate an SSL
         * connection.  It should "work" without noisy error logs if
         * the server actually expects SSLv3/TLSv1.  With
         * SSLv23_server_method() OpenSSL's SSL_accept() fails
         * ungracefully on receipt of this message, since it requires
         * an 11-byte ClientHello message and this is too short. */
        static const unsigned char tls10_close_notify[7] = {
            '\x15',         /* TLSPlainText.type = Alert (21) */
            '\x03', '\x01', /* TLSPlainText.version = {3, 1} */
            '\x00', '\x02', /* TLSPlainText.length = 2 */
            '\x01',         /* Alert.level = warning (1) */
            '\x00'          /* Alert.description = close_notify (0) */
        };
        data = (const char *)tls10_close_notify;
        len = sizeof(tls10_close_notify);
    }
    else /* ... XXX other request types here? */ {
        /* Create an HTTP request string.  We include a User-Agent so
         * that administrators can track down the cause of the
         * odd-looking requests in their logs.  A complete request is
         * used since kernel-level filtering may require that much
         * data before returning from accept(). */
        data = apr_pstrcat(p, "OPTIONS * HTTP/1.0\r\nUser-Agent: ",
                           ap_get_server_description(),
                           " (internal dummy connection)\r\n\r\n", NULL);
        len = strlen(data);
    }

    apr_socket_send(sock, data, &len);
    apr_socket_close(sock);
    apr_pool_destroy(p);

    return rv;
}
待つのはここ -> ap_relieve_child_processes
AP_DECLARE(void) ap_relieve_child_processes(ap_reclaim_callback_fn_t *mpm_callback)
{
    int i;
    extra_process_t *cur_extra;
    int max_daemons;

    ap_mpm_query(AP_MPMQ_MAX_DAEMON_USED, &max_daemons);

    /* now see who is done */
    for (i = 0; i < max_daemons; ++i) {
        process_score *ps = ap_get_scoreboard_process(i);
        pid_t pid = ps->pid;

        if (pid == 0) {
            continue; /* not every scoreboard entry is in use */
        }

        if (reclaim_one_pid(pid, DO_NOTHING)) {
            mpm_callback(i, 0, 0);
        }
    }

    cur_extra = extras;
    while (cur_extra) {
        ap_generation_t old_gen;
        extra_process_t *next = cur_extra->next;

        if (reclaim_one_pid(cur_extra->pid, DO_NOTHING)) {
            if (ap_unregister_extra_mpm_process(cur_extra->pid, &old_gen) == 1) {
                mpm_callback(-1, cur_extra->pid, old_gen);
            }
            else {
                AP_DEBUG_ASSERT(1 == 0);
            }
        }
        cur_extra = next;
    }
}

以下でプロセスの状態がDO_NOTHINGなら0を返す。

static int reclaim_one_pid(pid_t pid, action_t action)
{
    apr_proc_t proc;
    apr_status_t waitret;
    apr_exit_why_e why;
    int status;

    /* Ensure pid sanity. */
    if (pid < 1) {
        return 1;
    }

    proc.pid = pid;
    waitret = apr_proc_wait(&proc, &status, &why, APR_NOWAIT);
    if (waitret != APR_CHILD_NOTDONE) {
        if (waitret == APR_CHILD_DONE)
            ap_process_child_status(&proc, why, status);
        return 1;
    }