Linux服务治理

需要阅读

了解为什么要将二进制先拷到 /usr/local/bin，不然就会被SELinux拦截

chap-security-enhanced_linux-troubleshooting

查看审计日志

grep "go_notify" /var/log/audit/audit.log

Systemd Unit Type

simple
使用systemctl start 时不会报错
exec
和simple差不多
会报错
forking
可能跟某个历史遗产有关，文档上不建议用，建议使用notify,notify-reload
oneshot
专为一次执行，无须长期驻留
dbus
dbus应用的话直接Type=dbus最合适，其能根据dbusname作判断active依据
#whatisd-bus
notify
如果服务需要加载一会儿，然后再通知systemd自己已激活，需要用这个，支持reloading的服务，则用下面的
notify-reload
idle
待所有工作都激活后才执行，可强制设置时间

其详解文档位置

systemd.service.html#Type=

systemd服务，Type=notify

如果普通的main进程，运行后就能获得反馈，不需要时间加载配置什么的，直接使用Type=exec，比simple更好

[Unit]
Description=practice go notify
After=network.target

[Service]
Type=notify
TimeoutStartSec=12
#WatchdogSecs=10s
#Restart=on-failure
ExecStart=/usr/local/bin/go_notify
TimeoutStopSec=5s

[Install]
WantedBy=multi-user.target

go示例

package main

import (
        "context"
        "log"
        "os"
        "os/signal"
        "syscall"
        "time"

        "github.com/coreos/go-systemd/v22/daemon"
)

const (
        // 模拟初始化耗时
        initDuration = 5 * time.Second
        // 业务循环间隔
        workInterval = 2 * time.Second
)

func main() {
        log.SetPrefix("notify-demo ")
        log.Println("starting...")

        // 1. 如果环境变量不在，直接退出（方便本地测试）
        notifySocket := os.Getenv("NOTIFY_SOCKET")
        if notifySocket == "" {
                log.Fatal("NOTIFY_SOCKET not set, run me under systemd")
        }
        log.Printf("NOTIFY_SOCKET=%s", notifySocket)

        // 2. 模拟“重量级”初始化
        log.Printf("初始化中，预计 %v ...", initDuration)
        time.Sleep(initDuration)

        // 3. 通知 systemd：我已经准备好了
        sent, err := daemon.SdNotify(false, daemon.SdNotifyReady)
        if err != nil {
                log.Fatalf("SdNotify(READY=1) failed: %v", err)
        }
        if !sent {
                log.Fatal("systemd 没有收到 READY=1（返回值=false）")
        }
        log.Println("已向 systemd 发送 READY=1")

        // 4. 看门狗相关
        //    如果单元文件里写了 WatchdogSec=10s，那么 systemd
        //    会每 10/2=5s 检查一次 WATCHDOG=1 是否到达。
        interval, err := daemon.SdWatchdogEnabled(false)
        if err != nil {
                log.Fatalf("SdWatchdogEnabled error: %v", err)
        }
        if interval > 0 {
                log.Printf("看门狗已启用，周期 %v，将周期性发送 WATCHDOG=1", interval)
        } else {
                log.Println("看门狗未启用（单元文件没写 WatchdogSec=）")
        }

        // 5. 业务主循环 + 信号处理
        ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
        defer stop()

        tick := time.NewTicker(workInterval)
        defer tick.Stop()

        if interval > 0 {
                // 启动独立 goroutine 喂狗
                go watchdogLoop(interval)
        }

        for {
                select {
                case <-ctx.Done():
                        log.Println("收到信号，开始优雅退出...")
                        // 这里可以关闭连接、刷盘等
                        time.Sleep(1 * time.Second) // 模拟清理
                        log.Println("bye~")
                        return
                case <-tick.C:
                        log.Println("业务心跳：doing useful work...")
                }
        }
}

// watchdogLoop 每 <interval/2> 发一次 WATCHDOG=1
func watchdogLoop(interval time.Duration) {
        t := time.NewTicker(interval / 2)
        defer t.Stop()
        for {
                <-t.C
                if sent, err := daemon.SdNotify(false, daemon.SdNotifyWatchdog); err != nil {
                        log.Printf("WATCHDOG=1 发送失败: %v", err)
                } else if !sent {
                        log.Println("WATCHDOG=1 未被 systemd 接收")
                }
        }
}

control group

A Linux SysAdmin’s introduction to cgroups
cgroups-part-one
How to manage cgroups with CPUShares
cgroups-part-two
Managing cgroups the hard way-manually
cgroups-part-three
Managing cgroups with systemd
cgroups-part-four

以及第四节的资源看一下。

null

逆天，systemd这整套体系，如此之大。

systemd.resource-control.html#Options

示例

#!/usr/bin/env python3
import time
import logging
import signal
import sys

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s',
    handlers=[logging.FileHandler('/var/log/demo-py-service.log'),
              logging.StreamHandler()]
)

shutdown = False
def _term(signum, frame):
    global shutdown
    shutdown = True
signal.signal(signal.SIGTERM, _term)

while not shutdown:
    logging.info("demo-py-service is alive")
    time.sleep(10)
logging.info("demo-py-service exiting")

[Unit]
Description=Demo Cgroup python
After=network.target

[Service]
Type=exec
ExecStart=/usr/bin/python3 /usr/local/bin/cgroup_py.py
Restart=on-failure
RestartSec=5s

User=nobody
Group=nobody

MemoryMax=50M
CPUQuota=20%

StandardOutput=journal
StandardError=journal

[Install]
WantedBy=multi-user.target

关于限制方面的字段设置，需要参考 man systemd.resource-control OPTIONS

systemd-resoved

Systemd-resolved

man systemd-resolved
man resolved.conf
man resolvectl
man nss-resolve

systemd-resolved is a system service that provides network name resolution to local applications. It implements a caching and validating DNS/DNSSEC stub resolver, as well as an LLMNR and MulticastDNS resolver and responder. Local applications may submit network name resolution requests via three interface

我之前写毕业设计的时候，改resolve文件，一直过一会儿就失败，让我非常的恼火。然后发现，文档里写着，这是通过 systemd-resolved来管理的，不要修改他。

毕业设计的内容，其实就是控制dns解析。然后需要改dns服务器。

# This is /run/systemd/resolve/stub-resolv.conf managed by man:systemd-resolved(8).
# Do not edit.
#
# This file might be symlinked as /etc/resolv.conf. If you're looking at
# /etc/resolv.conf and seeing this text, you have followed the symlink.
#
# This is a dynamic resolv.conf file for connecting local clients to the
# internal DNS stub resolver of systemd-resolved. This file lists all
# configured search domains.
#
# Run "resolvectl status" to see details about the uplink DNS servers
# currently in use.
#
# Third party programs should typically not access this file directly, but only
# through the symlink at /etc/resolv.conf. To manage man:resolv.conf(5) in a
# different way, replace this symlink by a static file or a different symlink.
#
# See man:systemd-resolved.service(8) for details about the supported modes of
# operation for /etc/resolv.conf.

nameserver 127.0.0.53
options edns0 trust-ad
search .

go-daemon-template

systemd, cgroup, go 综合起来的一个项目，作systemd刻意练习

示例

[Unit]
Description=GD
After=network.target

[Service]
Type=exec
ExecStart=/usr/local/bin/go-daemon-template
Restart=on-failure
RestartSec=5s

User=nobody
Group=nobody

MemoryMax=500M
#CPUQuota=60%

StandardOutput=journal
StandardError=journal

[Install]
WantedBy=multi-user.target

go-daemon-template 可以用htop查看，cgroup生效情况

package main

import (
        "fmt"
        "log"
        "net/http"
        "runtime"
        "strconv"
        "sync"
)

var (
        cpuStopCh chan struct{} // 广播停止 CPU 负载
        cpuWg     sync.WaitGroup
        memBuf    [][]byte      // 持有大块内存
        memMu     sync.Mutex
)

func init() {
        cpuStopCh = make(chan struct{})
}

// 死循环占满 1 个 P
func cpuBurner() {
        defer cpuWg.Done()
        for {
                select {
                case <-cpuStopCh:
                        return
                default:
                        // 纯计算，不让出时间片
                        for i := 0; i < 1e6; i++ {
                        }
                }
        }
}

// 模拟 CPU 占用
func setCPU(cores int) {
        // 先停掉旧的
        close(cpuStopCh)
        cpuWg.Wait()

        // 重新初始化
        cpuStopCh = make(chan struct{})
        cpuWg.Add(cores)
        for i := 0; i < cores; i++ {
                go cpuBurner()
        }
}

// 模拟内存占用
func setMemory(mb int) {
        memMu.Lock()
        defer memMu.Unlock()

        // 先释放旧的
        memBuf = nil
        runtime.GC()

        // 申请新的
        block := 1024 * 1024 // 1 MB
        total := mb * block
        slice := make([]byte, total)
        // 写一遍，避免懒分配
        for i := range slice {
                slice[i] = 0
        }
        // 切成 1 MB 一份，方便后面扩缩
        for i := 0; i < mb; i++ {
                memBuf = append(memBuf, slice[i*block:(i+1)*block])
        }
}

func queryHandler(w http.ResponseWriter, r *http.Request) {
        q := r.URL.Query()

        // --- CPU ---
        cpuStr := q.Get("cpu")
        if cpuStr != "" {
                cores, err := strconv.Atoi(cpuStr)
                if err != nil || cores < 0 {
                        http.Error(w, "cpu must be non-negative integer", http.StatusBadRequest)
                        return
                }
                setCPU(cores)
        }

        // --- Memory ---
        memStr := q.Get("memory")
        if memStr != "" {
                mb, err := strconv.Atoi(memStr)
                if err != nil || mb < 0 {
                        http.Error(w, "memory must be non-negative integer", http.StatusBadRequest)
                        return
                }
                setMemory(mb)
        }

        // 返回当前状态
        var m runtime.MemStats
        runtime.ReadMemStats(&m)
        fmt.Fprintf(w, "ok: cpu=%s  memory=%s MB  alloc=%.1f MB\n",
                cpuStr, memStr, float64(m.Alloc)/1024/1024)
}

func main() {
        // 让调度器线程数 >= CPU 核心数，防止占不满
        runtime.GOMAXPROCS(runtime.NumCPU())

        http.HandleFunc("/query", queryHandler)
        log.Println("listen :8080  e.g.  /query?cpu=2&memory=500")
        log.Fatal(http.ListenAndServe(":8080", nil))
}

卡bug的个人网站

务实路

systemd简易笔记

Linux服务治理

systemd服务，Type=notify

control group

systemd-resoved

go-daemon-template