systemd简易笔记

Linux服务治理

需要阅读

了解为什么要将二进制先拷到 /usr/local/bin,不然就会被SELinux拦截

chap-security-enhanced_linux-troubleshooting

查看审计日志

grep "go_notify" /var/log/audit/audit.log

Systemd Unit Type

  1. simple
    使用systemctl start 时不会报错

  2. exec
    和simple差不多
    会报错

  3. forking
    可能跟某个历史遗产有关,文档上不建议用,建议使用notify,notify-reload

  4. oneshot
    专为一次执行,无须长期驻留

  5. dbus
    dbus应用的话直接Type=dbus最合适,其能根据dbusname作判断active依据
    #whatisd-bus

  6. notify
    如果服务需要加载一会儿,然后再通知systemd自己已激活,需要用这个,支持reloading的服务,则用下面的

  7. notify-reload

  8. idle
    待所有工作都激活后才执行,可强制设置时间

其详解文档位置

systemd.service.html#Type=

systemd服务,Type=notify

如果普通的main进程,运行后就能获得反馈,不需要时间加载配置什么的,直接使用Type=exec,比simple更好

[Unit]
Description=practice go notify
After=network.target

[Service]
Type=notify
TimeoutStartSec=12
#WatchdogSecs=10s
#Restart=on-failure
ExecStart=/usr/local/bin/go_notify
TimeoutStopSec=5s

[Install]
WantedBy=multi-user.target

go示例

package main

import (
        "context"
        "log"
        "os"
        "os/signal"
        "syscall"
        "time"

        "github.com/coreos/go-systemd/v22/daemon"
)

const (
        // 模拟初始化耗时
        initDuration = 5 * time.Second
        // 业务循环间隔
        workInterval = 2 * time.Second
)

func main() {
        log.SetPrefix("notify-demo ")
        log.Println("starting...")

        // 1. 如果环境变量不在,直接退出(方便本地测试)
        notifySocket := os.Getenv("NOTIFY_SOCKET")
        if notifySocket == "" {
                log.Fatal("NOTIFY_SOCKET not set, run me under systemd")
        }
        log.Printf("NOTIFY_SOCKET=%s", notifySocket)

        // 2. 模拟“重量级”初始化
        log.Printf("初始化中,预计 %v ...", initDuration)
        time.Sleep(initDuration)

        // 3. 通知 systemd:我已经准备好了
        sent, err := daemon.SdNotify(false, daemon.SdNotifyReady)
        if err != nil {
                log.Fatalf("SdNotify(READY=1) failed: %v", err)
        }
        if !sent {
                log.Fatal("systemd 没有收到 READY=1(返回值=false)")
        }
        log.Println("已向 systemd 发送 READY=1")

        // 4. 看门狗相关
        //    如果单元文件里写了 WatchdogSec=10s,那么 systemd
        //    会每 10/2=5s 检查一次 WATCHDOG=1 是否到达。
        interval, err := daemon.SdWatchdogEnabled(false)
        if err != nil {
                log.Fatalf("SdWatchdogEnabled error: %v", err)
        }
        if interval > 0 {
                log.Printf("看门狗已启用,周期 %v,将周期性发送 WATCHDOG=1", interval)
        } else {
                log.Println("看门狗未启用(单元文件没写 WatchdogSec=)")
        }

        // 5. 业务主循环 + 信号处理
        ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
        defer stop()

        tick := time.NewTicker(workInterval)
        defer tick.Stop()

        if interval > 0 {
                // 启动独立 goroutine 喂狗
                go watchdogLoop(interval)
        }

        for {
                select {
                case <-ctx.Done():
                        log.Println("收到信号,开始优雅退出...")
                        // 这里可以关闭连接、刷盘等
                        time.Sleep(1 * time.Second) // 模拟清理
                        log.Println("bye~")
                        return
                case <-tick.C:
                        log.Println("业务心跳:doing useful work...")
                }
        }
}

// watchdogLoop 每 <interval/2> 发一次 WATCHDOG=1
func watchdogLoop(interval time.Duration) {
        t := time.NewTicker(interval / 2)
        defer t.Stop()
        for {
                <-t.C
                if sent, err := daemon.SdNotify(false, daemon.SdNotifyWatchdog); err != nil {
                        log.Printf("WATCHDOG=1 发送失败: %v", err)
                } else if !sent {
                        log.Println("WATCHDOG=1 未被 systemd 接收")
                }
        }
}

control group

  1. A Linux SysAdmin’s introduction to cgroups
    cgroups-part-one
  2. How to manage cgroups with CPUShares
    cgroups-part-two
  3. Managing cgroups the hard way-manually
    cgroups-part-three
  4. Managing cgroups with systemd
    cgroups-part-four

以及第四节的资源看一下。

null

逆天,systemd这整套体系,如此之大。

systemd.resource-control.html#Options

示例

#!/usr/bin/env python3
import time
import logging
import signal
import sys

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s',
    handlers=[logging.FileHandler('/var/log/demo-py-service.log'),
              logging.StreamHandler()]
)

shutdown = False
def _term(signum, frame):
    global shutdown
    shutdown = True
signal.signal(signal.SIGTERM, _term)

while not shutdown:
    logging.info("demo-py-service is alive")
    time.sleep(10)
logging.info("demo-py-service exiting")
[Unit]
Description=Demo Cgroup python
After=network.target

[Service]
Type=exec
ExecStart=/usr/bin/python3 /usr/local/bin/cgroup_py.py
Restart=on-failure
RestartSec=5s

User=nobody
Group=nobody

MemoryMax=50M
CPUQuota=20%

StandardOutput=journal
StandardError=journal

[Install]
WantedBy=multi-user.target

关于限制方面的字段设置,需要参考 man systemd.resource-control OPTIONS

systemd-resoved

Systemd-resolved

man systemd-resolved
man resolved.conf
man resolvectl
man nss-resolve

systemd-resolved is a system service that provides network name resolution to local applications. It implements a caching and validating DNS/DNSSEC stub resolver, as well as an LLMNR and MulticastDNS resolver and responder. Local applications may submit network name resolution requests via three interface

我之前写毕业设计的时候,改resolve文件,一直过一会儿就失败,让我非常的恼火。然后发现,文档里写着,这是通过 systemd-resolved来管理的,不要修改他。

毕业设计的内容,其实就是控制dns解析。然后需要改dns服务器。

# This is /run/systemd/resolve/stub-resolv.conf managed by man:systemd-resolved(8).
# Do not edit.
#
# This file might be symlinked as /etc/resolv.conf. If you're looking at
# /etc/resolv.conf and seeing this text, you have followed the symlink.
#
# This is a dynamic resolv.conf file for connecting local clients to the
# internal DNS stub resolver of systemd-resolved. This file lists all
# configured search domains.
#
# Run "resolvectl status" to see details about the uplink DNS servers
# currently in use.
#
# Third party programs should typically not access this file directly, but only
# through the symlink at /etc/resolv.conf. To manage man:resolv.conf(5) in a
# different way, replace this symlink by a static file or a different symlink.
#
# See man:systemd-resolved.service(8) for details about the supported modes of
# operation for /etc/resolv.conf.

nameserver 127.0.0.53
options edns0 trust-ad
search .

go-daemon-template

systemd, cgroup, go 综合起来的一个项目,作systemd刻意练习

示例

[Unit]
Description=GD
After=network.target

[Service]
Type=exec
ExecStart=/usr/local/bin/go-daemon-template
Restart=on-failure
RestartSec=5s

User=nobody
Group=nobody

MemoryMax=500M
#CPUQuota=60%

StandardOutput=journal
StandardError=journal

[Install]
WantedBy=multi-user.target

go-daemon-template 可以用htop查看,cgroup生效情况

package main

import (
        "fmt"
        "log"
        "net/http"
        "runtime"
        "strconv"
        "sync"
)

var (
        cpuStopCh chan struct{} // 广播停止 CPU 负载
        cpuWg     sync.WaitGroup
        memBuf    [][]byte      // 持有大块内存
        memMu     sync.Mutex
)

func init() {
        cpuStopCh = make(chan struct{})
}

// 死循环占满 1 个 P
func cpuBurner() {
        defer cpuWg.Done()
        for {
                select {
                case <-cpuStopCh:
                        return
                default:
                        // 纯计算,不让出时间片
                        for i := 0; i < 1e6; i++ {
                        }
                }
        }
}

// 模拟 CPU 占用
func setCPU(cores int) {
        // 先停掉旧的
        close(cpuStopCh)
        cpuWg.Wait()

        // 重新初始化
        cpuStopCh = make(chan struct{})
        cpuWg.Add(cores)
        for i := 0; i < cores; i++ {
                go cpuBurner()
        }
}

// 模拟内存占用
func setMemory(mb int) {
        memMu.Lock()
        defer memMu.Unlock()

        // 先释放旧的
        memBuf = nil
        runtime.GC()

        // 申请新的
        block := 1024 * 1024 // 1 MB
        total := mb * block
        slice := make([]byte, total)
        // 写一遍,避免懒分配
        for i := range slice {
                slice[i] = 0
        }
        // 切成 1 MB 一份,方便后面扩缩
        for i := 0; i < mb; i++ {
                memBuf = append(memBuf, slice[i*block:(i+1)*block])
        }
}

func queryHandler(w http.ResponseWriter, r *http.Request) {
        q := r.URL.Query()

        // --- CPU ---
        cpuStr := q.Get("cpu")
        if cpuStr != "" {
                cores, err := strconv.Atoi(cpuStr)
                if err != nil || cores < 0 {
                        http.Error(w, "cpu must be non-negative integer", http.StatusBadRequest)
                        return
                }
                setCPU(cores)
        }

        // --- Memory ---
        memStr := q.Get("memory")
        if memStr != "" {
                mb, err := strconv.Atoi(memStr)
                if err != nil || mb < 0 {
                        http.Error(w, "memory must be non-negative integer", http.StatusBadRequest)
                        return
                }
                setMemory(mb)
        }

        // 返回当前状态
        var m runtime.MemStats
        runtime.ReadMemStats(&m)
        fmt.Fprintf(w, "ok: cpu=%s  memory=%s MB  alloc=%.1f MB\n",
                cpuStr, memStr, float64(m.Alloc)/1024/1024)
}

func main() {
        // 让调度器线程数 >= CPU 核心数,防止占不满
        runtime.GOMAXPROCS(runtime.NumCPU())

        http.HandleFunc("/query", queryHandler)
        log.Println("listen :8080  e.g.  /query?cpu=2&memory=500")
        log.Fatal(http.ListenAndServe(":8080", nil))
}

发表评论: