地方エンジニアの学習日記

preforkの方で見ていく。子プロセスがどうやって死んでいくか気になったのでメモ

ざっくり流れ

ユーザがSIGUSER1をhttpdへ送信
親プロセッスがlisten socketをclose
処理中の子プロセスがあればそれを待つ
全て終了したらSIGUSR1を子プロセスへ送信
親プロセス死

httpdでのgraceful shutdown

機能の説明自体は以下

httpd.apache.org

apachectl -k gracefulとかやれば使える。

↓ら辺が気になったのでどんな感じで実装されているのかをメモした。

親プロセスは USR1 あるいは graceful シグナルを受け取ると、子プロセスに現在のリクエストの処理の後に終了する (あるいは何もしていなければすぐに終了する) ように助言します。

メイン

子プロセス作ったりacceptしたりもここ。ユーザからのgraceful shutdownもここで受け取る

httpd/prefork.c at 303010734b5ac4d3579133f8e82ededadcc646a9 · apache/httpd · GitHub

graceful shutdownに関連しそうなのはこの辺

    if (retained->mpm->shutdown_pending) {
        /* Time to perform a graceful shut down:
         * Reap the inactive children, and ask the active ones
         * to close their listeners, then wait until they are
         * all done to exit.
         */
        int active_children;
        apr_time_t cutoff = 0;

        /* Stop listening */
        ap_close_listeners();

        /* kill off the idle ones */
        for (i = 0; i < retained->mpm->num_buckets; i++) {
            ap_mpm_pod_killpg(all_buckets[i].pod, retained->max_daemons_limit);
        }

        /* Send SIGUSR1 to the active children */
        active_children = 0;
        for (index = 0; index < ap_daemons_limit; ++index) {
            if (ap_scoreboard_image->servers[index][0].status != SERVER_DEAD) {
                /* Ask each child to close its listeners. */
                ap_mpm_safe_kill(MPM_CHILD_PID(index), AP_SIG_GRACEFUL);
                active_children++;
            }
        }

        /* 全てのプロセスが終了するまでここでブロックされる */
        ap_relieve_child_processes(prefork_note_child_killed);

        /* cleanup pid file */
        ap_remove_pid(pconf, ap_pid_fname);
        ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00170)
           "caught " AP_SIG_GRACEFUL_STOP_STRING ", shutting down gracefully");

        if (ap_graceful_shutdown_timeout) {
            cutoff = apr_time_now() +
                     apr_time_from_sec(ap_graceful_shutdown_timeout);
        }

        /* Don't really exit until each child has finished */
        retained->mpm->shutdown_pending = 0;
        do {
            /* Pause for a second */
            sleep(1);

            /* Relieve any children which have now exited */
            ap_relieve_child_processes(prefork_note_child_killed);

            active_children = 0;
            for (index = 0; index < ap_daemons_limit; ++index) {
                if (ap_mpm_safe_kill(MPM_CHILD_PID(index), 0) == APR_SUCCESS) {
                    active_children = 1;
                    /* Having just one child is enough to stay around */
                    break;
                }
            }
        } while (!retained->mpm->shutdown_pending && active_children &&
                 (!ap_graceful_shutdown_timeout || apr_time_now() < cutoff));

        /* We might be here because we received SIGTERM, either
         * way, try and make sure that all of our processes are
         * really dead.
         */
        ap_unixd_killpg(getpgrp(), SIGTERM);

        return DONE;
    }

ap_close_listeners()でcloseして新規受付を停止する

AP_DECLARE_NONSTD(void) ap_close_listeners(void)
{
    int i;

    ap_close_listeners_ex(ap_listeners);

    for (i = 1; i < ap_num_listen_buckets; i++) {
        ap_close_listeners_ex(ap_listen_buckets[i]);
    }
}

AP_DECLARE_NONSTD(void) ap_close_listeners_ex(ap_listen_rec *listeners)
{
    ap_listen_rec *lr;
    for (lr = listeners; lr; lr = lr->next) {
        apr_socket_close(lr->sd);
        lr->active = 0;
    }
}

apr_socket_closeはaprの方で実装されている。

github.com

APR_DECLARE(apr_status_t) apr_socket_close(apr_socket_t *thesocket)
{
    apr_pool_cleanup_kill(thesocket->pool, thesocket, socket_cleanup);
    return socket_cleanup(thesocket);
}

static apr_status_t socket_cleanup(void *sock)
{
    apr_socket_t *thesocket = sock;
    int sd = thesocket->socketdes;
    thesocket->socketdes = -1;

    if (close(sd) == 0) {
        return APR_SUCCESS;
    }
    else {
        /* Restore, close() was not successful. */
        thesocket->socketdes = sd;

        return errno;
    }
}

ここまでの処理で新規のリクエストの受付は停止される。

        /* Send SIGUSR1 to the active children */
        active_children = 0;
        for (index = 0; index < ap_daemons_limit; ++index) {
            if (ap_scoreboard_image->servers[index][0].status != SERVER_DEAD) {
                /* Ask each child to close its listeners. */
                ap_mpm_safe_kill(MPM_CHILD_PID(index), AP_SIG_GRACEFUL);
                active_children++;
            }
        }

ap_mpm_safe_killで気になったのがsafe_killという命名。何がsafeなんだろと思って追ってみるとkillしようとするプロセスがちゃんと同一のプロセスグループに存在するかを確認した上でkillするという処理になっている点だった。apr_proc_wait()は内部でwaitpidを実行していてその戻り値が正しく無い場合はkillしないという仕組み。なんらかの理由でapacheが管理してないプロセスのpidがリストにあるような事故を防ぐ仕組みでしょうか。

AP_DECLARE(apr_status_t) ap_mpm_safe_kill(pid_t pid, int sig)
{
    proc.pid = pid;
    rv = apr_proc_wait(&proc, &status, &why, APR_NOWAIT);
    if (rv == APR_CHILD_DONE) {
        /* The child already died - log the termination status if
         * necessary: */
        ap_process_child_status(&proc, why, status);
        return APR_EINVAL;
    }
    else if (rv != APR_CHILD_NOTDONE) {
        /* The child is already dead and reaped, or was a bogus pid -
         * log this either way. */
        ap_log_error(APLOG_MARK, APLOG_NOTICE, rv, ap_server_conf, APLOGNO(00048)
                     "cannot send signal %d to pid %ld (non-child or "
                     "already dead)", sig, (long)pid);
        return APR_EINVAL;
    }
    return kill(pid, sig) ? errno : APR_SUCCESS;
}

呼び出し元ではsigにSIGUSRを設定しているのでsigusr1が子プロセスに送信される。

SIGUSR1の処理

設定部分は以下

static int make_child(server_rec *s, int slot)
{
        // aprの方で定義されている。APR_DECLARE(apr_sigfunc_t *) apr_signal(int signo, apr_sigfunc_t * func)
        apr_signal(AP_SIG_GRACEFUL, stop_listening);
        child_main(slot, bucket);

    prefork_note_child_started(slot, pid);

    return 0;
}

シグナルハンドラの設定によりstop_listeningがSIGUSR1が来た際の挙動となる。

ap_close_listeners_ex
↓
apr_socket_close
↓
apr_pool_cleanup_run
↓
apr_pool_cleanup_kill
↓

static void stop_listening(int sig)
{
    retained->mpm->mpm_state = AP_MPMQ_STOPPING;
    ap_close_listeners_ex(my_bucket->listeners);

    /* For a graceful stop, we want the child to exit when done */
    die_now = 1;
}

APR_DECLARE(apr_status_t) apr_pool_cleanup_run(apr_pool_t *p, void *data,
                              apr_status_t (*cleanup_fn)(void *))
{
    apr_pool_cleanup_kill(p, data, cleanup_fn);
    return (*cleanup_fn)(data);
}

どうやって待つのか

一番気になるのはここ。Nginxの場合はソケットを管理するステートマシン的なデータ構造があってそれぞれの終了を確認して終了していた。preforkモードでのhttpdの場合はどうなのだろうか。ap_mpm_pod_killpgあたりを追っていく。

        ap_close_listeners();


        for (i = 0; i < retained->mpm->num_buckets; i++) {
            ap_mpm_pod_killpg(all_buckets[i].pod, retained->max_daemons_limit);
        }

        active_children = 0;
        for (index = 0; index < ap_daemons_limit; ++index) {
            if (ap_scoreboard_image->servers[index][0].status != SERVER_DEAD) {
                /* Ask each child to close its listeners. */
                ap_mpm_safe_kill(MPM_CHILD_PID(index), AP_SIG_GRACEFUL);
                active_children++;
            }
        }

ap_mpm_pod_killpg

今もいまいち理解できてないポイントです。ソケットを閉じるのと子プロセスへシグナル送って終了を待つぐらいで良さそうですが間にdummy_connectionなる処理を行なっている箇所がありました。

void ap_mpm_pod_killpg(ap_pod_t *pod, int num)
{
    int i;
    apr_status_t rv = APR_SUCCESS;

    for (i = 0; i < num && rv == APR_SUCCESS; i++) {
        if (ap_scoreboard_image->servers[i][0].status != SERVER_READY ||
            ap_scoreboard_image->servers[i][0].pid == 0) {
            continue;
        }
        rv = dummy_connection(pod);
    }
}

dummy_connection

以下がよくわからない。acceptブロックしてるプロセスに対してダミーでhttpリクエストを送って処理を進めさせるイメージ？いつか調べたい。。。

static apr_status_t dummy_connection(ap_pod_t *pod)
{
    const char *data;
    apr_status_t rv;
    apr_socket_t *sock;
    apr_pool_t *p;
    apr_size_t len;
    ap_listen_rec *lp;

    /* create a temporary pool for the socket.  pconf stays around too long */
    rv = apr_pool_create(&p, pod->p);
    if (rv != APR_SUCCESS) {
        return rv;
    }
    apr_pool_tag(p, "dummy_connection");

    /* If possible, find a listener which is configured for
     * plain-HTTP, not SSL; using an SSL port would either be
     * expensive to do correctly (performing a complete SSL handshake)
     * or cause log spam by doing incorrectly (simply sending EOF). */
    lp = ap_listeners;
    while (lp && lp->protocol && ap_cstr_casecmp(lp->protocol, "http") != 0) {
        lp = lp->next;
    }
    if (!lp) {
        lp = ap_listeners;
    }

    rv = apr_socket_create(&sock, lp->bind_addr->family, SOCK_STREAM, 0, p);
    if (rv != APR_SUCCESS) {
        ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf, APLOGNO(00054)
                     "get socket to connect to listener");
        apr_pool_destroy(p);
        return rv;
    }

    /* on some platforms (e.g., FreeBSD), the kernel won't accept many
     * queued connections before it starts blocking local connects...
     * we need to keep from blocking too long and instead return an error,
     * because the MPM won't want to hold up a graceful restart for a
     * long time
     */
    rv = apr_socket_timeout_set(sock, apr_time_from_sec(3));
    if (rv != APR_SUCCESS) {
        ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf, APLOGNO(00055)
                     "set timeout on socket to connect to listener");
        apr_socket_close(sock);
        apr_pool_destroy(p);
        return rv;
    }

    rv = apr_socket_connect(sock, lp->bind_addr);
    if (rv != APR_SUCCESS) {
        int log_level = APLOG_WARNING;

        if (APR_STATUS_IS_TIMEUP(rv)) {
            /* probably some server processes bailed out already and there
             * is nobody around to call accept and clear out the kernel
             * connection queue; usually this is not worth logging
             */
            log_level = APLOG_DEBUG;
        }

        ap_log_error(APLOG_MARK, log_level, rv, ap_server_conf, APLOGNO(00056)
                     "connect to listener on %pI", lp->bind_addr);
        apr_pool_destroy(p);
        return rv;
    }

    if (lp->protocol && ap_cstr_casecmp(lp->protocol, "https") == 0) {
        /* Send a TLS 1.0 close_notify alert.  This is perhaps the
         * "least wrong" way to open and cleanly terminate an SSL
         * connection.  It should "work" without noisy error logs if
         * the server actually expects SSLv3/TLSv1.  With
         * SSLv23_server_method() OpenSSL's SSL_accept() fails
         * ungracefully on receipt of this message, since it requires
         * an 11-byte ClientHello message and this is too short. */
        static const unsigned char tls10_close_notify[7] = {
            '\x15',         /* TLSPlainText.type = Alert (21) */
            '\x03', '\x01', /* TLSPlainText.version = {3, 1} */
            '\x00', '\x02', /* TLSPlainText.length = 2 */
            '\x01',         /* Alert.level = warning (1) */
            '\x00'          /* Alert.description = close_notify (0) */
        };
        data = (const char *)tls10_close_notify;
        len = sizeof(tls10_close_notify);
    }
    else /* ... XXX other request types here? */ {
        /* Create an HTTP request string.  We include a User-Agent so
         * that administrators can track down the cause of the
         * odd-looking requests in their logs.  A complete request is
         * used since kernel-level filtering may require that much
         * data before returning from accept(). */
        data = apr_pstrcat(p, "OPTIONS * HTTP/1.0\r\nUser-Agent: ",
                           ap_get_server_description(),
                           " (internal dummy connection)\r\n\r\n", NULL);
        len = strlen(data);
    }

    apr_socket_send(sock, data, &len);
    apr_socket_close(sock);
    apr_pool_destroy(p);

    return rv;
}

待つのはここ -> ap_relieve_child_processes

AP_DECLARE(void) ap_relieve_child_processes(ap_reclaim_callback_fn_t *mpm_callback)
{
    int i;
    extra_process_t *cur_extra;
    int max_daemons;

    ap_mpm_query(AP_MPMQ_MAX_DAEMON_USED, &max_daemons);

    /* now see who is done */
    for (i = 0; i < max_daemons; ++i) {
        process_score *ps = ap_get_scoreboard_process(i);
        pid_t pid = ps->pid;

        if (pid == 0) {
            continue; /* not every scoreboard entry is in use */
        }

        if (reclaim_one_pid(pid, DO_NOTHING)) {
            mpm_callback(i, 0, 0);
        }
    }

    cur_extra = extras;
    while (cur_extra) {
        ap_generation_t old_gen;
        extra_process_t *next = cur_extra->next;

        if (reclaim_one_pid(cur_extra->pid, DO_NOTHING)) {
            if (ap_unregister_extra_mpm_process(cur_extra->pid, &old_gen) == 1) {
                mpm_callback(-1, cur_extra->pid, old_gen);
            }
            else {
                AP_DEBUG_ASSERT(1 == 0);
            }
        }
        cur_extra = next;
    }
}

以下でプロセスの状態がDO_NOTHINGなら0を返す。

static int reclaim_one_pid(pid_t pid, action_t action)
{
    apr_proc_t proc;
    apr_status_t waitret;
    apr_exit_why_e why;
    int status;

    /* Ensure pid sanity. */
    if (pid < 1) {
        return 1;
    }

    proc.pid = pid;
    waitret = apr_proc_wait(&proc, &status, &why, APR_NOWAIT);
    if (waitret != APR_CHILD_NOTDONE) {
        if (waitret == APR_CHILD_DONE)
            ap_process_child_status(&proc, why, status);
        return 1;
    }

都内の某SIerを2021年6月末で退職します。10年後くらいに自分で見て懐かしむために書いておきます。

どんな会社だった

社員数500人くらいで東京本社と地方にちらほら拠点がある会社でした。吸収合併を重ねて大きくしてる会社という印象でした。

何をやってた

仙台で某ニュースサイトのインフラエンジニアをやってました。サーバの構築だったり運用がメインでたまにコード書くくらいの日々でした。SIっぽいことは何もやってなく技術系のことを存分にできたのでSIあるあるみたいなのが全く無い感じでの終わりとなった。

良かった点

仙台で東京の案件へ参画できた

仙台にいながら東京の案件にリモートで参画できたのがとてもよかったです。2018年の11月とかに転職活動したときはこの条件を満たせる会社っていうのが少なくてこの会社が最適解であったと今でも思っています。(出張前提の参画の募集が多かったけど今のご時世だと多分そんなことはなさそう)

自己研鑽がしやすかった

業務面では技術調査の部分で結構時間が自由に取れてOSSのコードを読む時間だったり技術書を読む時間だったりが取れたりと自己研鑽のしやすい環境だったなと振り返っても感じます。

納期なんかも結構緩めにとってあり「時間ないからここは手抜きで。。」みたいなのが少なく気の済むまで広げていくことも可能でした。(Nginxの機能調査でソース読むみたいなのが普通にやれたりブログネタに困ることはなかった)

福利厚生が良かった

大企業みたいなどでかい福利厚生はないものの「有休の入社後即日付与」や「施策なんかの支援金」みたいなのがあって個人的には便利だった。

有休の即日付与って意外と無かったりするので体調を崩しがちな季節に入って速攻使わせてもらった記憶がある。

残念なのはこの辺のことが福利厚生に書いてないので入社後に「あ、そうなんですね！」みたいな感じになった。他社と僅差で迷ってる際の最後の後押しくらいにはなりそうなので書いてもよさそうとも思った。(やらなくても応募数とか多いみたいなのは聞くので意図的にやってないだけかもしれない。)

社内イベントが楽しかった(コロナ前)

月一で飲み会はあったし芋煮やら食事会的なのが業務時間内にあったりと楽しかった。メンバー間の仲もよくて自分から会社のイベントに行きたい的な感情を前の会社では持つことなかったのでとても良かった。

残念だった点

会社の残念だった点です。(ただこれは転職理由ではないです。)

技術の話があまりできない

技術好きな人だったりレベル高い人ってのは結構いるけど好きな技術が近くて雑談みたいな感じで話せる人が身近にいないのが残念だった。(そもそも探そうとすらしてないだけで何処かにはいたのかもしれないけど)

社内slackの技術のチャンネルもあったけどアクティブな人はあまりいなくてじゃあ社内でやりとりする必要ないよねという思いを持ってしまった。ただ良い面もあってこれきっかけでpythonのコミュニティだったりインフラのコミュニティだったりに行こうってなったりした。

次はどうする

7/1から別の会社で働きます。入社エントリはいずれ。

地方エンジニアの学習日記

興味ある技術の雑なメモだったりを書いてくブログ。たまに日記とガジェット紹介。

【Ruby】入門するURLメモ

公式

環境構築系

バージョンの話

ライブラリ

ざっくり理解

【httpd】graceful shutdownの実装メモ