Learn Zig Series (#69) - Daemonization: Background Services

[IMAGE: https://images.hive.blog/DQmaHuB6qTWHaSpJHQ1S8FCCRmNQuUxTcPZdU4yKHsJ7vEP/zig-banner.png]

What will I learn

How the classic Unix daemonization process works: fork, setsid, fork again, close file descriptors;
How to implement the double-fork pattern in Zig using std.posix;
How PID files prevent multiple daemon instances from running simultaneously;
How to set up file-based logging for daemons that have no terminal;
How to use signals (SIGHUP for config reload, SIGTERM for graceful stop) to control a running daemon;
How systemd-style service files work and when to skip the double-fork entirely;
How to build a self-monitoring health check with a watchdog timer;
How to combine everything into a practical background service.

Requirements

A working modern computer running macOS, Windows or Ubuntu;
An installed Zig 0.14+ distribution (download from ziglang.org);
The ambition to learn Zig programming.

Difficulty

Intermediate

Curriculum (of the `Learn Zig Series`):

Learn Zig Series (#69) - Daemonization: Background Services

Solutions to Episode 68 Exercises

Exercise 1: Unix socket chat server with poll()

const std = @import("std");
const posix = std.posix;
const linux = std.os.linux;
const c = @cImport({
    @cInclude("sys/socket.h");
});

const SOCKET_PATH = "/tmp/zig_chat.sock";
const MAX_CLIENTS = 16;

var client_fds: [MAX_CLIENTS]posix.fd_t = [_]posix.fd_t{-1} ** MAX_CLIENTS;
var client_pids: [MAX_CLIENTS]i32 = [_]i32{0} ** MAX_CLIENTS;
var num_clients: usize = 0;

fn addClient(fd: posix.fd_t, pid: i32) bool {
    if (num_clients &gt;= MAX_CLIENTS) return false;
    client_fds[num_clients] = fd;
    client_pids[num_clients] = pid;
    num_clients += 1;
    return true;
}

fn removeClient(idx: usize) void {
    posix.close(client_fds[idx]);
    var i = idx;
    while (i + 1 &lt; num_clients) : (i += 1) {
        client_fds[i] = client_fds[i + 1];
        client_pids[i] = client_pids[i + 1];
    }
    num_clients -= 1;
    client_fds[num_clients] = -1;
    client_pids[num_clients] = 0;
}

fn broadcast(sender_idx: usize, msg: []const u8) void {
    var buf: [1200]u8 = undefined;
    const prefix = std.fmt.bufPrint(&amp;buf, "[pid {d}] ", .{client_pids[sender_idx]}) catch return;
    for (0..num_clients) |i| {
        if (i == sender_idx) continue;
        _ = posix.write(client_fds[i], prefix) catch {};
        _ = posix.write(client_fds[i], msg) catch {};
    }
}

fn getPeerPid(fd: posix.fd_t) i32 {
    const UcredT = extern struct { pid: i32, uid: u32, gid: u32 };
    var cred: UcredT = undefined;
    var cred_len: posix.socklen_t = @sizeOf(UcredT);
    _ = linux.getsockopt(@intCast(fd), c.SOL_SOCKET, c.SO_PEERCRED, @ptrCast(&amp;cred), &amp;cred_len);
    return cred.pid;
}

pub fn main() !void {
    const stdout = std.io.getStdOut().writer();
    std.fs.cwd().deleteFile(SOCKET_PATH) catch {};

    const server_fd = try posix.socket(posix.AF.UNIX, posix.SOCK.STREAM, 0);
    defer posix.close(server_fd);

    const addr = try std.net.Address.initUnix(SOCKET_PATH);
    try posix.bind(server_fd, &amp;addr.any, addr.getOsSockLen());
    try posix.listen(server_fd, 5);
    try stdout.print("Chat server on {s}. Max {d} clients.\n", .{ SOCKET_PATH, MAX_CLIENTS });

    while (true) {
        var pollfds: [MAX_CLIENTS + 1]linux.pollfd = undefined;
        pollfds[0] = .{ .fd = server_fd, .events = linux.POLL.IN, .revents = 0 };
        for (0..num_clients) |i| {
            pollfds[i + 1] = .{ .fd = client_fds[i], .events = linux.POLL.IN, .revents = 0 };
        }

        const nfds: linux.nfds_t = @intCast(num_clients + 1);
        const ready = linux.poll(&amp;pollfds, nfds, 2000);
        if (@as(isize, @bitCast(@as(usize, ready))) &lt;= 0) continue;

        // new connection?
        if (pollfds[0].revents &amp; linux.POLL.IN != 0) {
            var ca: posix.sockaddr = undefined;
            var cl: posix.socklen_t = @sizeOf(posix.sockaddr);
            if (posix.accept(server_fd, &amp;ca, &amp;cl)) |cfd| {
                const pid = getPeerPid(cfd);
                if (addClient(cfd, pid)) {
                    try stdout.print("[+] pid {d} joined ({d} total)\n", .{ pid, num_clients });
                } else {
                    _ = posix.write(cfd, "server full\n") catch {};
                    posix.close(cfd);
                }
            } else |_| {}
        }

        // check existing clients (iterate backwards so removals don't skip)
        var i: usize = num_clients;
        while (i &gt; 0) {
            i -= 1;
            if (pollfds[i + 1].revents &amp; (linux.POLL.IN | linux.POLL.HUP) != 0) {
                var buf: [1024]u8 = undefined;
                const n = posix.read(client_fds[i], &amp;buf) catch 0;
                if (n == 0) {
                    try stdout.print("[-] pid {d} left\n", .{client_pids[i]});
                    removeClient(i);
                } else {
                    broadcast(i, buf[0..n]);
                }
            }
        }
    }
}

The server uses a single poll() call to multiplex the listening socket and all connected client fds in one array. New connections get their PID from SO_PEERCRED. Messages from any client are broadcast to all others with the sender's PID prefixed. Backwards iteration during removal prevents index shifting bugs.

Exercise 2: File descriptor proxy with privilege separation

const std = @import("std");
const posix = std.posix;
const linux = std.os.linux;
const c = @cImport({
    @cInclude("sys/socket.h");
});

const SOCKET_PATH = "/tmp/zig_fdproxy.sock";

const allowed_paths = [_][]const u8{
    "/etc/hostname",
    "/etc/os-release",
    "/tmp/zig_fdproxy_test.txt",
    "/proc/version",
};

fn isAllowed(path: []const u8) bool {
    for (allowed_paths) |a| {
        if (std.mem.eql(u8, path, a)) return true;
    }
    return false;
}

fn sendFd(sock: posix.fd_t, fd_to_send: posix.fd_t) !void {
    var data_buf = [_]u8{'F'};
    var iov = [_]posix.iovec{.{ .base = &amp;data_buf, .len = 1 }};
    var cmsg_buf: [64]u8 align(@alignOf(linux.cmsghdr)) = undefined;
    @memset(&amp;cmsg_buf, 0);
    const cmsg: *linux.cmsghdr = @ptrCast(&amp;cmsg_buf);
    cmsg.level = c.SOL_SOCKET;
    cmsg.type = c.SCM_RIGHTS;
    cmsg.len = @intCast(@sizeOf(linux.cmsghdr) + @sizeOf(posix.fd_t));
    const fd_ptr: *posix.fd_t = @ptrCast(@alignCast(@as([*]u8, @ptrCast(cmsg)) + @sizeOf(linux.cmsghdr)));
    fd_ptr.* = fd_to_send;
    const msg = posix.msghdr_const{
        .name = null, .namelen = 0, .iov = &amp;iov, .iovlen = 1,
        .control = &amp;cmsg_buf, .controllen = cmsg.len, .flags = 0,
    };
    const sent = linux.sendmsg(@intCast(sock), @ptrCast(&amp;msg), 0);
    if (@as(isize, @bitCast(@as(usize, sent))) &lt; 0) return error.SendFailed;
}

fn recvFd(sock: posix.fd_t) !posix.fd_t {
    var data_buf: [1]u8 = undefined;
    var iov = [_]posix.iovec{.{ .base = &amp;data_buf, .len = 1 }};
    var cmsg_buf: [64]u8 align(@alignOf(linux.cmsghdr)) = undefined;
    @memset(&amp;cmsg_buf, 0);
    var msg = posix.msghdr{
        .name = null, .namelen = 0, .iov = &amp;iov, .iovlen = 1,
        .control = &amp;cmsg_buf, .controllen = @intCast(cmsg_buf.len), .flags = 0,
    };
    const recvd = linux.recvmsg(@intCast(sock), @ptrCast(&amp;msg), 0);
    if (@as(isize, @bitCast(@as(usize, recvd))) &lt;= 0) return error.RecvFailed;
    const cmsg: *linux.cmsghdr = @ptrCast(@alignCast(&amp;cmsg_buf));
    if (cmsg.level != c.SOL_SOCKET or cmsg.type != c.SCM_RIGHTS) return error.NoCmsg;
    const fd_ptr: *const posix.fd_t = @ptrCast(@alignCast(@as([*]const u8, @ptrCast(cmsg)) + @sizeOf(linux.cmsghdr)));
    return fd_ptr.*;
}

pub fn main() !void {
    const stdout = std.io.getStdOut().writer();
    std.fs.cwd().deleteFile(SOCKET_PATH) catch {};
    // create test file
    { var f = try std.fs.cwd().createFile("/tmp/zig_fdproxy_test.txt", .{}); defer f.close(); try f.writeAll("Proxy-served secret content!\n"); }

    const server_fd = try posix.socket(posix.AF.UNIX, posix.SOCK.STREAM, 0);
    defer posix.close(server_fd);
    const addr = try std.net.Address.initUnix(SOCKET_PATH);
    try posix.bind(server_fd, &amp;addr.any, addr.getOsSockLen());
    try posix.listen(server_fd, 1);

    const pid = try posix.fork();
    if (pid == 0) {
        posix.close(server_fd);
        std.time.sleep(100 * std.time.ns_per_ms);
        const cli = try posix.socket(posix.AF.UNIX, posix.SOCK.STREAM, 0);
        defer posix.close(cli);
        const ca = try std.net.Address.initUnix(SOCKET_PATH);
        try posix.connect(cli, &amp;ca.any, ca.getOsSockLen());

        // request an allowed file
        _ = try posix.write(cli, "/tmp/zig_fdproxy_test.txt");
        const rfd = recvFd(cli) catch { std.debug.print("[worker] denied or error\n", .{}); std.process.exit(1); };
        defer posix.close(rfd);
        var buf: [512]u8 = undefined;
        const n = try posix.read(rfd, &amp;buf);
        std.debug.print("[worker] got: {s}", .{buf[0..n]});
        std.process.exit(0);
    }

    var ca2: posix.sockaddr = undefined;
    var cl2: posix.socklen_t = @sizeOf(posix.sockaddr);
    const cli_fd = try posix.accept(server_fd, &amp;ca2, &amp;cl2);
    defer posix.close(cli_fd);

    var buf: [256]u8 = undefined;
    const n = try posix.read(cli_fd, &amp;buf);
    const path = buf[0..n];
    try stdout.print("[opener] request for: {s}\n", .{path});

    if (isAllowed(path)) {
        const file = std.fs.cwd().openFile(path, .{}) catch { _ = posix.write(cli_fd, "open failed") catch {}; return; };
        defer file.close();
        try sendFd(cli_fd, file.handle);
        try stdout.print("[opener] fd sent for: {s}\n", .{path});
    } else {
        _ = try posix.write(cli_fd, "DENIED");
        try stdout.print("[opener] denied: {s}\n", .{path});
    }

    _ = std.posix.waitpid(pid, 0);
    std.fs.cwd().deleteFile(SOCKET_PATH) catch {};
}

The opener (parent) checks every requested filename against a hardcoded whitelist before opening it and passing the fd. The worker (child) never touches the filesystem -- it only receives already-open file descriptors. If the path is not in the whitelist, the opener sends a "DENIED" text message instead of an fd. This is exactly the privilege separation model used by OpenSSH.

Exercise 3: Metrics collector on abstract socket

const std = @import("std");
const posix = std.posix;
const linux = std.os.linux;

fn runCollector() !void {
    const stdout = std.io.getStdOut().writer();
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer _ = gpa.deinit();
    const allocator = gpa.allocator();

    const server_fd = try posix.socket(posix.AF.UNIX, posix.SOCK.STREAM, 0);
    defer posix.close(server_fd);
    const addr = try std.net.Address.initUnix("\x00zig-metrics");
    try posix.bind(server_fd, &amp;addr.any, addr.getOsSockLen());
    try posix.listen(server_fd, 5);
    try stdout.print("[collector] listening on abstract socket 'zig-metrics'\n", .{});

    var averages = std.StringHashMap(struct { sum: f64, count: u64 }).init(allocator);
    defer {
        var it = averages.iterator();
        while (it.next()) |entry| allocator.free(entry.key_ptr.*);
        averages.deinit();
    }

    for (0..5) |_| {
        var ca: posix.sockaddr = undefined;
        var cl: posix.socklen_t = @sizeOf(posix.sockaddr);
        const cfd = posix.accept(server_fd, &amp;ca, &amp;cl) catch continue;
        defer posix.close(cfd);

        var buf: [512]u8 = undefined;
        const n = posix.read(cfd, &amp;buf) catch continue;
        const line = std.mem.trim(u8, buf[0..n], " \n\r\t");

        // minimal JSON parse: find "name":"X" and "value":Y
        const name_start = std.mem.indexOf(u8, line, "\"name\":\"") orelse continue;
        const ns = name_start + 8;
        const name_end = std.mem.indexOfScalarPos(u8, line, ns, '"') orelse continue;
        const name = line[ns..name_end];

        if (std.mem.eql(u8, name, "dump")) {
            var it = averages.iterator();
            while (it.next()) |entry| {
                const avg = entry.value_ptr.sum / @as(f64, @floatFromInt(entry.value_ptr.count));
                try stdout.print("  {s}: avg={d:.2} (n={d})\n", .{ entry.key_ptr.*, avg, entry.value_ptr.count });
            }
            _ = posix.write(cfd, "dumped") catch {};
            continue;
        }

        const val_start = std.mem.indexOf(u8, line, "\"value\":") orelse continue;
        const vs = val_start + 8;
        var ve = vs;
        while (ve &lt; line.len and (line[ve] == '.' or (line[ve] &gt;= '0' and line[ve] &lt;= '9'))) ve += 1;
        const value = std.fmt.parseFloat(f64, line[vs..ve]) catch continue;

        const result = averages.getOrPut(allocator.dupe(u8, name) catch continue) catch continue;
        if (result.found_existing) {
            result.value_ptr.sum += value;
            result.value_ptr.count += 1;
            allocator.free(result.key_ptr.*);
            result.key_ptr.* = allocator.dupe(u8, name) catch continue;
        } else {
            result.value_ptr.* = .{ .sum = value, .count = 1 };
        }

        const avg = result.value_ptr.sum / @as(f64, @floatFromInt(result.value_ptr.count));
        var resp_buf: [128]u8 = undefined;
        const resp = std.fmt.bufPrint(&amp;resp_buf, "{s}: avg={d:.2}\n", .{ name, avg }) catch "error\n";
        _ = posix.write(cfd, resp) catch {};
    }
}

pub fn main() !void {
    const pid = try posix.fork();
    if (pid == 0) {
        // reporter child: send a few metrics then ask for dump
        std.time.sleep(200 * std.time.ns_per_ms);
        const metrics = [_][]const u8{
            "{\"name\":\"cpu\",\"value\":42.5}",
            "{\"name\":\"cpu\",\"value\":55.0}",
            "{\"name\":\"mem\",\"value\":71.2}",
            "{\"name\":\"mem\",\"value\":68.8}",
            "{\"name\":\"dump\",\"value\":0}",
        };
        for (metrics) |m| {
            std.time.sleep(50 * std.time.ns_per_ms);
            const fd = posix.socket(posix.AF.UNIX, posix.SOCK.STREAM, 0) catch continue;
            defer posix.close(fd);
            const a = std.net.Address.initUnix("\x00zig-metrics") catch continue;
            posix.connect(fd, &amp;a.any, a.getOsSockLen()) catch continue;
            _ = posix.write(fd, m) catch {};
            var buf: [128]u8 = undefined;
            _ = posix.read(fd, &amp;buf) catch {};
        }
        std.process.exit(0);
    }
    runCollector() catch |err| std.debug.print("collector error: {}\n", .{err});
    _ = std.posix.waitpid(pid, 0);
}

The collector parses the JSON manually (looking for "name":"..." and "value":N patterns) and tracks running sums and counts per metric name in a hash map. The dump command triggers a full printout of all averages. Using an abstract socket means no filesystem cleanup and no stale socket files to worry about.

At the end of last episode I mentioned we'd be going deeper into the daemon lifecycle -- how to properly detach from a terminal, create a new session, redirect standard I/O, write PID files. If you've ever written a long-running service (maybe something like the command daemon we built in episode 68?) you know the frustration: you start the process, it works great, you close your terminal and... it dies. Or you accidentally start two copies and they fight over the same socket file. Or it crashes at 3am and nobody notices until morning.

The classic Unix daemon is the answer to ALL of these problems. It's a process that has fully detached from any controlling terminal, runs in its own session, logs to files instead of stdout, writes a PID file so you can find it later, and handles signals for graceful shutdown and config reload. Every major Unix service -- sshd, nginx, postgres, cron -- follows this pattern (or a modern variant of it).

Here we go!

The double-fork pattern: why fork twice?

The double-fork is the traditional Unix daemonization recipe. It sounds weird at first -- why would you fork twice? -- but each step solves a specific problem:

First fork: The parent exits immediately. The child continues. This does two things: it returns control to the shell (so the user gets their prompt back) and it guarantees the child is not a process group leader (because it just got a new PID from fork).
setsid(): The child calls setsid() to create a new session and become its session leader. This detaches it from the controlling terminal completely. No more SIGHUP when the terminal closes, no more terminal signals at all.
Second fork: The session leader forks again and the session leader exits. The grandchild (the actual daemon) is NOT a session leader, which means it can never accidentally acquire a controlling terminal again (on System V systems, only session leaders can acquire a controlling terminal by opening a tty device).
Close file descriptors, chdir to /: The daemon closes stdin, stdout, and stderr (they point to the dead terminal anyway), changes directory to / (so it doesn't hold open any mounted filesystem), and optionally resets the file creation mask.

const std = @import("std");
const posix = std.posix;
const linux = std.os.linux;
const c = @cImport({
    @cInclude("unistd.h");
});

fn daemonize() !void {
    // Step 1: first fork -- parent exits, child continues
    const pid1 = try posix.fork();
    if (pid1 &gt; 0) {
        // parent: exit cleanly so the shell gets its prompt back
        std.process.exit(0);
    }

    // Step 2: create new session (detach from terminal)
    const sid = c.setsid();
    if (sid &lt; 0) return error.SetsidFailed;

    // Step 3: second fork -- session leader exits, grandchild continues
    // the grandchild can never acquire a controlling terminal
    const pid2 = try posix.fork();
    if (pid2 &gt; 0) {
        std.process.exit(0);
    }

    // Step 4: set file creation mask
    _ = linux.umask(0o027);

    // Step 5: change working directory to root
    // so we don't hold any filesystem mount busy
    std.posix.chdir("/") catch {};

    // Step 6: close standard file descriptors
    // they point to the dead terminal -- useless now
    posix.close(0); // stdin
    posix.close(1); // stdout
    posix.close(2); // stderr

    // Step 7: redirect stdin/stdout/stderr to /dev/null
    // so any library code that writes to stdout doesn't crash
    const devnull = try posix.open("/dev/null", .{ .ACCMODE = .RDWR }, 0);
    // devnull should be fd 0 (we just closed it)
    if (devnull != 0) {
        // dup2 to make sure fd 0, 1, 2 all point to /dev/null
        _ = try posix.dup2(devnull, 0);
        posix.close(devnull);
    }
    _ = try posix.dup2(0, 1);
    _ = try posix.dup2(0, 2);
}

pub fn main() !void {
    const stderr = std.io.getStdErr().writer();
    try stderr.print("Starting daemon (pid {d})...\n", .{linux.getpid()});

    try daemonize();

    // we're now a proper daemon -- no terminal, no stdout
    // everything from here must use file-based logging
    var log = try std.fs.cwd().createFile("/tmp/zig_daemon_test.log", .{});
    defer log.close();

    var buf: [128]u8 = undefined;
    const msg = std.fmt.bufPrint(&amp;buf, "Daemon running as pid {d}\n", .{linux.getpid()}) catch return;
    _ = try log.write(msg);

    // do some work
    var i: u32 = 0;
    while (i &lt; 5) : (i += 1) {
        std.time.sleep(1 * std.time.ns_per_s);
        const tick = std.fmt.bufPrint(&amp;buf, "tick {d}\n", .{i}) catch continue;
        _ = log.write(tick) catch {};
    }

    const done = std.fmt.bufPrint(&amp;buf, "Daemon exiting cleanly.\n", .{}) catch return;
    _ = log.write(done) catch {};
}

When you run this, the program prints "Starting daemon..." and immediately returns to the shell. But the process is still running in the background -- check with ps aux | grep zig or look at /tmp/zig_daemon_test.log after a few seconds. The daemon ticks away writing to its log file, completely detached from your terminal.

A few things to note about the implementation. We close fds 0, 1, 2 and then reopen /dev/null on them. This is important because some library code (or even Zig's standard library) might assume these fds exist. If fd 1 is closed and some code opens a file, that file gets fd 1 -- and then a print to stdout would accidentically write to your data file. Redirecting to /dev/null makes stdout writes silently disappear instead.

PID files: one daemon at a time

A PID file is just a text file containing the process ID of the running daemon. It solves two problems: you can find the daemon later (to send it signals), and you can prevent multiple copies from running simultaneously.

The pattern is simple: on startup, try to create the PID file. If it already exists and the PID inside it refers to a running process, another instance is already active -- refuse to start. If the PID file exists but the process is dead, it's a stale file from a crash -- overwrite it.

const std = @import("std");
const posix = std.posix;
const linux = std.os.linux;

const PidFile = struct {
    path: []const u8,

    fn acquire(self: PidFile) !void {
        // check if a PID file already exists
        if (std.fs.cwd().openFile(self.path, .{})) |file| {
            defer file.close();
            var buf: [32]u8 = undefined;
            const n = file.read(&amp;buf) catch 0;
            if (n &gt; 0) {
                const pid_str = std.mem.trim(u8, buf[0..n], " \n\r\t");
                const old_pid = std.fmt.parseInt(i32, pid_str, 10) catch 0;
                if (old_pid &gt; 0) {
                    // check if that process is still alive
                    const result = linux.kill(old_pid, 0);
                    const signed: isize = @bitCast(@as(usize, result));
                    if (signed == 0) {
                        // process exists -- another instance is running
                        return error.AlreadyRunning;
                    }
                    // process is dead -- stale PID file, overwrite it
                }
            }
        } else |_| {
            // no PID file -- good, we're the first
        }

        // write our PID
        var f = try std.fs.cwd().createFile(self.path, .{});
        defer f.close();
        var buf: [32]u8 = undefined;
        const pid_str = std.fmt.bufPrint(&amp;buf, "{d}\n", .{linux.getpid()}) catch return error.FormatFailed;
        try f.writeAll(pid_str);
    }

    fn release(self: PidFile) void {
        std.fs.cwd().deleteFile(self.path) catch {};
    }
};

pub fn main() !void {
    const stdout = std.io.getStdOut().writer();

    const pidfile = PidFile{ .path = "/tmp/zig_pidtest.pid" };

    pidfile.acquire() catch |err| {
        if (err == error.AlreadyRunning) {
            try stdout.print("Error: another instance is already running!\n", .{});
            std.process.exit(1);
        }
        return err;
    };
    defer pidfile.release();

    try stdout.print("Daemon started, PID file written to {s}\n", .{pidfile.path});
    try stdout.print("pid = {d}\n", .{linux.getpid()});

    // simulate some work
    std.time.sleep(3 * std.time.ns_per_s);

    try stdout.print("Daemon stopping, PID file removed.\n", .{});
}

If you run this program twice in parallel, the second instance will print "another instance is already running!" and exit. The kill(pid, 0) call is the standard Unix trick for checking if a process exists without actually sending it a signal -- it returns 0 if the process is alive and an error if it's dead.

NB: This PID file approach is NOT perfect. There's a small race window between checking the file and writing our PID. A truly bulletproof approach uses flock() on the PID file to get an exclusive advisory lock -- if the lock succeeds, you own it. But the simple check-and-write is good enough for the vast majority of daemons. We're not building a database here ;-)

Logging for daemons: because stdout is dead

Once you've daemonized, stdout goes to /dev/null. You need an alternative. There are two main approaches: write directly to a log file, or use the system logger (syslog). File-based logging is simpler and gives you full control. Syslog integrates with the system's logging infrastructure so your daemon's messages show up alongside other system messages in journalctl or /var/log/syslog.

const std = @import("std");
const linux = std.os.linux;
const c = @cImport({
    @cInclude("syslog.h");
});

const DaemonLogger = struct {
    file: ?std.fs.File,
    use_syslog: bool,

    fn init(path: ?[]const u8, use_syslog: bool) DaemonLogger {
        var logger = DaemonLogger{
            .file = null,
            .use_syslog = use_syslog,
        };

        if (path) |p| {
            logger.file = std.fs.cwd().createFile(p, .{ .truncate = false }) catch null;
            if (logger.file) |*f| {
                // seek to end for append behavior
                f.seekFromEnd(0) catch {};
            }
        }

        if (use_syslog) {
            c.openlog("zig-daemon", c.LOG_PID | c.LOG_NDELAY, c.LOG_DAEMON);
        }

        return logger;
    }

    fn deinit(self: *DaemonLogger) void {
        if (self.file) |*f| f.close();
        if (self.use_syslog) c.closelog();
    }

    fn info(self: *DaemonLogger, comptime fmt: []const u8, args: anytype) void {
        self.writeLog("INFO", fmt, args);
    }

    fn warn(self: *DaemonLogger, comptime fmt: []const u8, args: anytype) void {
        self.writeLog("WARN", fmt, args);
    }

    fn err(self: *DaemonLogger, comptime fmt: []const u8, args: anytype) void {
        self.writeLog("ERROR", fmt, args);
    }

    fn writeLog(self: *DaemonLogger, level: []const u8, comptime fmt: []const u8, args: anytype) void {
        var buf: [512]u8 = undefined;
        const msg = std.fmt.bufPrint(&amp;buf, fmt, args) catch return;

        // write to file
        if (self.file) |*f| {
            var line_buf: [600]u8 = undefined;
            const ts = @divTrunc(std.time.timestamp(), 1);
            const line = std.fmt.bufPrint(&amp;line_buf, "[{d}] [{s}] {s}\n", .{ ts, level, msg }) catch return;
            _ = f.write(line) catch {};
        }

        // write to syslog
        if (self.use_syslog) {
            const priority: c_int = if (std.mem.eql(u8, level, "ERROR"))
                c.LOG_ERR
            else if (std.mem.eql(u8, level, "WARN"))
                c.LOG_WARNING
            else
                c.LOG_INFO;

            // syslog wants a C string
            var c_buf: [512]u8 = undefined;
            if (msg.len &lt; c_buf.len) {
                @memcpy(c_buf[0..msg.len], msg);
                c_buf[msg.len] = 0;
                c.syslog(priority, "%s", @as([*c]const u8, @ptrCast(&amp;c_buf)));
            }
        }
    }
};

pub fn main() !void {
    const stdout = std.io.getStdOut().writer();

    var logger = DaemonLogger.init("/tmp/zig_daemon.log", true);
    defer logger.deinit();

    logger.info("daemon starting, pid={d}", .{linux.getpid()});
    logger.warn("this is a warning message", .{});
    logger.err("something went wrong: code={d}", .{42});
    logger.info("daemon shutting down", .{});

    try stdout.print("Log written to /tmp/zig_daemon.log\n", .{});
    try stdout.print("Check syslog: journalctl -t zig-daemon --no-pager\n", .{});
}

The logger writes to both a file and syslog simultaneously. In production you'd typically pick one or the other -- syslog if you want integration with journalctl and log rotation handled by the system, file-based if you want full control over the format and location. The C interop for syslog is straightforward: openlog sets the program name and facility, syslog writes individual messages, closelog cleans up.

Having said that, most modern daemons skip syslog entirely and just write to stderr -- because systemd captures stderr and routes it to the journal anyway. If your daemon will be managed by systemd (which we'll discuss shortly), you can just keep writing to stderr and let systemd handle the rest. The file-based logger is most useful when you're NOT running under systemd.

Signal-based control: SIGHUP and SIGTERM

A well-behaved daemon responds to two signals at minimum:

SIGTERM: graceful shutdown. Clean up resources, close connections, remove the PID file, exit.
SIGHUP: reload configuration. Re-read config files without restarting the entire process.

These conventions go back decades. kill -TERM tells a daemon to stop. kill -HUP tells it to reload. Every sysadmin on the planet knows this, and your daemon should respect it.

We covered signal handling in detail in episode 67, so this should look familiar:

const std = @import("std");
const posix = std.posix;
const linux = std.os.linux;

var should_stop: bool = false;
var should_reload: bool = false;
var sig_pipe_fd: posix.fd_t = -1;

fn signalHandler(sig: c_int) callconv(.c) void {
    const byte = [_]u8{@intCast(@as(u32, @bitCast(sig)))};
    _ = posix.write(sig_pipe_fd, &amp;byte) catch {};
}

const Config = struct {
    interval_ms: u64 = 2000,
    log_path: []const u8 = "/tmp/zig_daemon_ctrl.log",
    verbose: bool = false,

    fn load() Config {
        // in a real daemon this would read from a config file
        // for now we just return defaults (or slightly different values on reload)
        return Config{ .interval_ms = 1500, .verbose = true };
    }
};

pub fn main() !void {
    const stdout = std.io.getStdOut().writer();

    // self-pipe for signal delivery (episode 67 pattern)
    const pipe_fds = try posix.pipe();
    sig_pipe_fd = pipe_fds[1];
    var flags = linux.fcntl(pipe_fds[1], linux.F.GETFL, @as(linux.fd_t, 0));
    _ = linux.fcntl(pipe_fds[1], linux.F.SETFL, flags | @as(u32, @bitCast(linux.O{ .NONBLOCK = true })));

    var sa: linux.Sigaction = .{
        .handler = .{ .handler = signalHandler },
        .mask = linux.empty_sigset,
        .flags = linux.SA.RESTART,
    };
    _ = linux.sigaction(linux.SIG.TERM, &amp;sa, null);
    _ = linux.sigaction(linux.SIG.HUP, &amp;sa, null);

    var config = Config{};
    try stdout.print("Daemon pid {d}. Send SIGHUP to reload, SIGTERM to stop.\n", .{linux.getpid()});
    try stdout.print("  kill -HUP {d}   (reload)\n", .{linux.getpid()});
    try stdout.print("  kill -TERM {d}  (stop)\n", .{linux.getpid()});

    var tick: u32 = 0;
    while (!should_stop) {
        var pollfds = [_]linux.pollfd{.{
            .fd = pipe_fds[0],
            .events = linux.POLL.IN,
            .revents = 0,
        }};

        _ = linux.poll(&amp;pollfds, 1, @intCast(config.interval_ms));

        if (pollfds[0].revents &amp; linux.POLL.IN != 0) {
            var buf: [16]u8 = undefined;
            const n = posix.read(pipe_fds[0], &amp;buf) catch 0;
            for (buf[0..n]) |sig| {
                switch (sig) {
                    @intCast(linux.SIG.TERM) =&gt; {
                        try stdout.print("[daemon] SIGTERM received, shutting down...\n", .{});
                        should_stop = true;
                    },
                    @intCast(linux.SIG.HUP) =&gt; {
                        try stdout.print("[daemon] SIGHUP received, reloading config...\n", .{});
                        config = Config.load();
                        try stdout.print("[daemon] new interval: {d}ms, verbose: {}\n", .{ config.interval_ms, config.verbose });
                    },
                    else =&gt; {},
                }
            }
        }

        if (!should_stop) {
            tick += 1;
            if (config.verbose) {
                try stdout.print("[daemon] tick {d} (interval {d}ms)\n", .{ tick, config.interval_ms });
            }
        }
    }

    try stdout.print("[daemon] stopped after {d} ticks. Goodbye!\n", .{tick});
    posix.close(pipe_fds[0]);
    posix.close(pipe_fds[1]);
}

This is the exact self-pipe pattern from episode 67 -- the signal handler writes a byte to the pipe, the main loop polls on the pipe read end and dispatches based on which signal byte it receives. The poll timeout doubles as the daemon's work interval, so each tick either does work or handles a signal.

The config reload is simplistic here (just returns hardcoded values), but in a real daemon you'd read a TOML or JSON file, validate the new values, and swap them in. The key insight is that SIGHUP doesn't restart the process -- it reloads configuration in-place. This means no downtime, no connection drops, no lost state.

The systemd question: do you even need double-fork?

If you're running on a modern Linux system with systemd, you might not need the double-fork at all. Systemd can manage the process lifecycle for you: start on boot, restart on crash, capture logs, handle dependencies. All you need is a service file:

# /etc/systemd/system/my-zig-daemon.service
[Unit]
Description=My Zig Daemon
After=network.target

[Service]
Type=simple
ExecStart=/usr/local/bin/my-zig-daemon
ExecReload=/bin/kill -HUP $MAINPID
Restart=on-failure
RestartSec=5
User=daemon
Group=daemon
WorkingDirectory=/tmp

# security hardening
NoNewPrivileges=yes
ProtectSystem=strict
ProtectHome=yes

[Install]
WantedBy=multi-user.target

With Type=simple, systemd expects the process to run in the foreground -- no forking at all. Systemd itself handles the "daemonization": it starts the process, monitors it, captures its stdout/stderr to the journal, restarts it on failure, and sends SIGTERM when you run systemctl stop.

This means for systemd-managed services, your Zig daemon is just a normal program that runs a loop:

const std = @import("std");
const posix = std.posix;
const linux = std.os.linux;

var should_stop: bool = false;

fn termHandler(sig: c_int) callconv(.c) void {
    _ = sig;
    @atomicStore(bool, &amp;should_stop, true, .release);
}

pub fn main() !void {
    const stderr = std.io.getStdErr().writer();

    var sa: linux.Sigaction = .{
        .handler = .{ .handler = termHandler },
        .mask = linux.empty_sigset,
        .flags = 0,
    };
    _ = linux.sigaction(linux.SIG.TERM, &amp;sa, null);

    try stderr.print("Daemon starting (pid {d})\n", .{linux.getpid()});

    var tick: u32 = 0;
    while (!@atomicLoad(bool, &amp;should_stop, .acquire)) {
        tick += 1;
        try stderr.print("Working... tick {d}\n", .{tick});
        std.time.sleep(2 * std.time.ns_per_s);
    }

    try stderr.print("Daemon stopped after {d} ticks\n", .{tick});
}

That's it. No fork, no setsid, no /dev/null, no PID file. Systemd handles all of it. The writes to stderr show up in journalctl -u my-zig-daemon automatically.

So when DO you need the double-fork? When you can't rely on systemd -- embedded systems, BSDs, older Linux distros, macOS launchd (which uses a different mechanism entirely), or when your daemon needs to be self-contained and portable across Unix variants. The classic daemonization code works everywhere. The systemd approach only works on systemd systems.

Health checking: knowing your daemon is actually alive

A daemon that crashes silently at 3am is worse than one that crashes loudly. You need health checks -- periodic self-monitoring that detects when something is wrong and either fixes it or reports it.

The simplest health check is a watchdog timer: the daemon must "kick" the watchdog periodically. If it fails to kick within the timeout, something is stuck (deadlock, infinite loop, blocked I/O):

const std = @import("std");
const linux = std.os.linux;
const posix = std.posix;

const Watchdog = struct {
    last_kick: i64,
    timeout_s: i64,
    health_file: []const u8,

    fn init(timeout_s: i64, health_file: []const u8) Watchdog {
        const now = @divTrunc(std.time.timestamp(), 1);
        return Watchdog{
            .last_kick = now,
            .timeout_s = timeout_s,
            .health_file = health_file,
        };
    }

    fn kick(self: *Watchdog) void {
        self.last_kick = @divTrunc(std.time.timestamp(), 1);

        // write timestamp to health file (external monitors can check this)
        var f = std.fs.cwd().createFile(self.health_file, .{}) catch return;
        defer f.close();
        var buf: [64]u8 = undefined;
        const msg = std.fmt.bufPrint(&amp;buf, "{d}\n", .{self.last_kick}) catch return;
        _ = f.write(msg) catch {};
    }

    fn isHealthy(self: *Watchdog) bool {
        const now = @divTrunc(std.time.timestamp(), 1);
        return (now - self.last_kick) &lt; self.timeout_s;
    }

    fn cleanup(self: *Watchdog) void {
        std.fs.cwd().deleteFile(self.health_file) catch {};
    }
};

fn doWork(iteration: u32) !bool {
    // simulate real work with occasional "hangs"
    if (iteration == 7) {
        // simulate a slow operation
        std.time.sleep(500 * std.time.ns_per_ms);
    }
    return true;
}

pub fn main() !void {
    const stdout = std.io.getStdOut().writer();

    var wd = Watchdog.init(10, "/tmp/zig_daemon_health");
    defer wd.cleanup();

    try stdout.print("Daemon with watchdog (timeout: {d}s)\n", .{wd.timeout_s});
    try stdout.print("Health file: {s}\n", .{wd.health_file});

    var i: u32 = 0;
    while (i &lt; 12) : (i += 1) {
        // do the actual work
        _ = doWork(i) catch {
            try stdout.print("[{d}] work failed!\n", .{i});
            continue;
        };

        // kick the watchdog after successful work
        wd.kick();

        if (wd.isHealthy()) {
            try stdout.print("[{d}] healthy (last kick {d}s ago)\n", .{
                i,
                @divTrunc(std.time.timestamp(), 1) - wd.last_kick,
            });
        } else {
            try stdout.print("[{d}] WARNING: watchdog timeout exceeded!\n", .{i});
        }

        std.time.sleep(1 * std.time.ns_per_s);
    }

    try stdout.print("Done. Cleaning up health file.\n", .{});
}

The health file approach is nice because external monitoring tools (cron scripts, Nagios, Prometheus) can just check the timestamp in that file. If it's older than the timeout, the daemon is stuck. Systemd has its own watchdog mechanism (WatchdogSec= in the service file) where the daemon sends sd_notify("WATCHDOG=1") periodically -- but that requires the systemd notification socket protocol.

Putting it all together: a practical background service

Let's combine everything -- double-fork, PID file, logging, signal handling, and health checking -- into a complete daemon that does actual work. Our daemon will periodically check a file for new entries and process them (think of it as a simple job queue):

const std = @import("std");
const posix = std.posix;
const linux = std.os.linux;
const c = @cImport({
    @cInclude("unistd.h");
});

const PID_FILE = "/tmp/zig_service.pid";
const LOG_FILE = "/tmp/zig_service.log";
const HEALTH_FILE = "/tmp/zig_service.health";
const QUEUE_FILE = "/tmp/zig_service_queue.txt";

var sig_pipe_fd: posix.fd_t = -1;

fn signalHandler(sig: c_int) callconv(.c) void {
    const byte = [_]u8{@intCast(@as(u32, @bitCast(sig)))};
    _ = posix.write(sig_pipe_fd, &amp;byte) catch {};
}

fn logMessage(log: *std.fs.File, level: []const u8, comptime fmt: []const u8, args: anytype) void {
    var buf: [512]u8 = undefined;
    const msg = std.fmt.bufPrint(&amp;buf, fmt, args) catch return;
    var line_buf: [600]u8 = undefined;
    const ts = @divTrunc(std.time.timestamp(), 1);
    const line = std.fmt.bufPrint(&amp;line_buf, "[{d}] [{s}] {s}\n", .{ ts, level, msg }) catch return;
    _ = log.write(line) catch {};
}

fn writePid() !void {
    var f = try std.fs.cwd().createFile(PID_FILE, .{});
    defer f.close();
    var buf: [32]u8 = undefined;
    const s = std.fmt.bufPrint(&amp;buf, "{d}\n", .{linux.getpid()}) catch return error.FormatFailed;
    try f.writeAll(s);
}

fn kickWatchdog() void {
    var f = std.fs.cwd().createFile(HEALTH_FILE, .{}) catch return;
    defer f.close();
    var buf: [32]u8 = undefined;
    const s = std.fmt.bufPrint(&amp;buf, "{d}\n", .{@divTrunc(std.time.timestamp(), 1)}) catch return;
    _ = f.write(s) catch {};
}

fn processQueue(log: *std.fs.File) u32 {
    var file = std.fs.cwd().openFile(QUEUE_FILE, .{}) catch return 0;
    defer file.close();

    var count: u32 = 0;
    var buf: [4096]u8 = undefined;
    const n = file.read(&amp;buf) catch return 0;
    if (n == 0) return 0;

    var iter = std.mem.splitScalar(u8, buf[0..n], '\n');
    while (iter.next()) |line| {
        const trimmed = std.mem.trim(u8, line, " \r\t");
        if (trimmed.len == 0) continue;
        logMessage(log, "INFO", "processing job: {s}", .{trimmed});
        count += 1;
    }

    if (count &gt; 0) {
        // clear the queue after processing
        std.fs.cwd().deleteFile(QUEUE_FILE) catch {};
    }
    return count;
}

fn daemonize() !void {
    const pid1 = try posix.fork();
    if (pid1 &gt; 0) std.process.exit(0);
    const sid = c.setsid();
    if (sid &lt; 0) return error.SetsidFailed;
    const pid2 = try posix.fork();
    if (pid2 &gt; 0) std.process.exit(0);
    _ = linux.umask(0o027);
    std.posix.chdir("/") catch {};
    posix.close(0);
    posix.close(1);
    posix.close(2);
    const devnull = try posix.open("/dev/null", .{ .ACCMODE = .RDWR }, 0);
    if (devnull != 0) {
        _ = try posix.dup2(devnull, 0);
        posix.close(devnull);
    }
    _ = try posix.dup2(0, 1);
    _ = try posix.dup2(0, 2);
}

pub fn main() !void {
    const stderr = std.io.getStdErr().writer();
    try stderr.print("Starting service daemon...\n", .{});

    try daemonize();

    // open log file
    var log = try std.fs.cwd().createFile(LOG_FILE, .{ .truncate = false });
    defer log.close();
    log.seekFromEnd(0) catch {};

    // write PID file
    writePid() catch {
        logMessage(&amp;log, "ERROR", "failed to write PID file", .{});
        return;
    };
    defer std.fs.cwd().deleteFile(PID_FILE) catch {};
    defer std.fs.cwd().deleteFile(HEALTH_FILE) catch {};

    // self-pipe for signals
    const pipe_fds = try posix.pipe();
    sig_pipe_fd = pipe_fds[1];
    var flags = linux.fcntl(pipe_fds[1], linux.F.GETFL, @as(linux.fd_t, 0));
    _ = linux.fcntl(pipe_fds[1], linux.F.SETFL, flags | @as(u32, @bitCast(linux.O{ .NONBLOCK = true })));

    var sa: linux.Sigaction = .{
        .handler = .{ .handler = signalHandler },
        .mask = linux.empty_sigset,
        .flags = linux.SA.RESTART,
    };
    _ = linux.sigaction(linux.SIG.TERM, &amp;sa, null);
    _ = linux.sigaction(linux.SIG.HUP, &amp;sa, null);

    logMessage(&amp;log, "INFO", "daemon started, pid={d}", .{linux.getpid()});
    kickWatchdog();

    var should_stop = false;
    var total_jobs: u64 = 0;
    var tick: u32 = 0;

    while (!should_stop) {
        var pollfds = [_]linux.pollfd{.{
            .fd = pipe_fds[0],
            .events = linux.POLL.IN,
            .revents = 0,
        }};

        _ = linux.poll(&amp;pollfds, 1, 3000); // check every 3 seconds

        // handle signals
        if (pollfds[0].revents &amp; linux.POLL.IN != 0) {
            var buf: [16]u8 = undefined;
            const n = posix.read(pipe_fds[0], &amp;buf) catch 0;
            for (buf[0..n]) |sig| {
                switch (sig) {
                    @intCast(linux.SIG.TERM) =&gt; {
                        logMessage(&amp;log, "INFO", "SIGTERM received, stopping", .{});
                        should_stop = true;
                    },
                    @intCast(linux.SIG.HUP) =&gt; {
                        logMessage(&amp;log, "INFO", "SIGHUP received, reloading config", .{});
                    },
                    else =&gt; {},
                }
            }
        }

        if (!should_stop) {
            tick += 1;
            const processed = processQueue(&amp;log);
            total_jobs += processed;
            kickWatchdog();

            if (processed &gt; 0) {
                logMessage(&amp;log, "INFO", "tick {d}: processed {d} jobs (total: {d})", .{ tick, processed, total_jobs });
            }
        }
    }

    logMessage(&amp;log, "INFO", "daemon stopped, processed {d} total jobs", .{total_jobs});
    posix.close(pipe_fds[0]);
    posix.close(pipe_fds[1]);
}

You can test it by building and running (it will daemonize immediately), then:

# check it's running
cat /tmp/zig_service.pid

# add some jobs
echo -e "build project\nrun tests\ndeploy staging" &gt; /tmp/zig_service_queue.txt

# wait a few seconds, check the log
cat /tmp/zig_service.log

# reload config (does nothing special in this demo)
kill -HUP $(cat /tmp/zig_service.pid)

# graceful shutdown
kill -TERM $(cat /tmp/zig_service.pid)

# verify it cleaned up
ls /tmp/zig_service.pid  # should be gone
cat /tmp/zig_service.log  # should show shutdown message

This is a production-ready daemon skeleton. It handles all the lifecycle concerns: proper detachment from the terminal, single-instance enforcement via PID file, file-based logging, signal handling for stop and reload, health monitoring via watchdog file, and graceful cleanup on exit. You could drop your actual business logic into the processQueue function and have a real service running.

The concepts from the last several episodes -- fork (ep64), signals (ep67), Unix sockets (ep68) -- all come together here. A daemon is really just the culmination of Unix process management: fork to detach, signals to control, sockets or files for communication, and disciplined resource cleanup. Next up we'll look at timers and scheduling -- how to make your daemon do things at precise intervals, handle timeouts, and integrate with the system clock.

Exercises

Add a Unix socket control interface to the complete daemon from the last section. The daemon should listen on /tmp/zig_service.sock in addition to watching the queue file. Clients should be able to send status (returns pid, uptime, and total jobs processed), queue (adds a job directly without writing to the queue file -- store in an in-memory array of up to 32 entries), and stop (triggers graceful shutdown). Use poll() to multiplex the signal pipe and the socket listener in the same event loop. Verify with echo "status" | socat - UNIX-CONNECT:/tmp/zig_service.sock.
Implement log rotation inside the daemon. The daemon should track how many bytes it has written to the log file. When the log exceeds 10,000 bytes, rename the current log to zig_service.log.1 (overwriting any existing .1), open a fresh zig_service.log, and continue writing. Also respond to SIGUSR1 by forcing an immediate rotation regardless of size. This is the same pattern that nginx uses for log rotation (nginx rotates on SIGUSR1, but our daemon rotates on both size threshold and signal).
Write a daemon supervisor -- a separate program that monitors the daemon's health file. The supervisor reads /tmp/zig_service.health every 5 seconds. If the timestamp inside is older than 15 seconds, the health check has failed. On first failure, the supervisor logs a warning. On three consecutive failures, the supervisor reads the PID file, kills the daemon with SIGTERM, waits for it to exit, and restarts it by exec-ing the daemon binary. Log all supervisor actions to /tmp/zig_supervisor.log. This is a simplified version of what systemd's WatchdogSec does.

Alright, dat was 'm!

The classic double-fork pattern detaches a process from its controlling terminal by forking twice, calling setsid between forks, and redirecting standard fds to /dev/null
Each step in the double-fork solves a specific problem: first fork returns control to the shell, setsid creates a new session, second fork prevents accidental terminal acquisition
PID files provide single-instance enforcement and let you find the daemon later for signal delivery -- check for stale PIDs with kill(pid, 0)
Daemons need file-based logging (or syslog) because stdout is dead after daemonization -- the DaemonLogger pattern supports both simultaneously
SIGTERM for graceful shutdown and SIGHUP for config reload are the standard Unix daemon signal conventions -- use the self-pipe pattern from episode 67 to handle them safely
Modern systemd-managed services can skip the double-fork entirely -- systemd handles process supervision, log capture, restart-on-failure, and watchdog monitoring
Health files with timestamps let external monitors detect stuck daemons -- write the current timestamp on every successful work cycle
Combining fork, PID files, logging, signals, and health checking creates a production-ready daemon skeleton that handles all lifecycle concerns

Thanks for reading!

@scipio