Linux服务治理
需要阅读
了解为什么要将二进制先拷到 /usr/local/bin,不然就会被SELinux拦截
chap-security-enhanced_linux-troubleshooting
查看审计日志
grep "go_notify" /var/log/audit/audit.log
Systemd Unit Type
simple
使用systemctl start 时不会报错exec
和simple差不多
会报错forking
可能跟某个历史遗产有关,文档上不建议用,建议使用notify,notify-reloadoneshot
专为一次执行,无须长期驻留dbus
dbus应用的话直接Type=dbus最合适,其能根据dbusname作判断active依据
#whatisd-busnotify
如果服务需要加载一会儿,然后再通知systemd自己已激活,需要用这个,支持reloading的服务,则用下面的notify-reload
idle
待所有工作都激活后才执行,可强制设置时间
其详解文档位置
systemd服务,Type=notify
如果普通的main进程,运行后就能获得反馈,不需要时间加载配置什么的,直接使用Type=exec,比simple更好
[Unit]
Description=practice go notify
After=network.target
[Service]
Type=notify
TimeoutStartSec=12
#WatchdogSecs=10s
#Restart=on-failure
ExecStart=/usr/local/bin/go_notify
TimeoutStopSec=5s
[Install]
WantedBy=multi-user.target
go示例
package main
import (
"context"
"log"
"os"
"os/signal"
"syscall"
"time"
"github.com/coreos/go-systemd/v22/daemon"
)
const (
// 模拟初始化耗时
initDuration = 5 * time.Second
// 业务循环间隔
workInterval = 2 * time.Second
)
func main() {
log.SetPrefix("notify-demo ")
log.Println("starting...")
// 1. 如果环境变量不在,直接退出(方便本地测试)
notifySocket := os.Getenv("NOTIFY_SOCKET")
if notifySocket == "" {
log.Fatal("NOTIFY_SOCKET not set, run me under systemd")
}
log.Printf("NOTIFY_SOCKET=%s", notifySocket)
// 2. 模拟“重量级”初始化
log.Printf("初始化中,预计 %v ...", initDuration)
time.Sleep(initDuration)
// 3. 通知 systemd:我已经准备好了
sent, err := daemon.SdNotify(false, daemon.SdNotifyReady)
if err != nil {
log.Fatalf("SdNotify(READY=1) failed: %v", err)
}
if !sent {
log.Fatal("systemd 没有收到 READY=1(返回值=false)")
}
log.Println("已向 systemd 发送 READY=1")
// 4. 看门狗相关
// 如果单元文件里写了 WatchdogSec=10s,那么 systemd
// 会每 10/2=5s 检查一次 WATCHDOG=1 是否到达。
interval, err := daemon.SdWatchdogEnabled(false)
if err != nil {
log.Fatalf("SdWatchdogEnabled error: %v", err)
}
if interval > 0 {
log.Printf("看门狗已启用,周期 %v,将周期性发送 WATCHDOG=1", interval)
} else {
log.Println("看门狗未启用(单元文件没写 WatchdogSec=)")
}
// 5. 业务主循环 + 信号处理
ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
defer stop()
tick := time.NewTicker(workInterval)
defer tick.Stop()
if interval > 0 {
// 启动独立 goroutine 喂狗
go watchdogLoop(interval)
}
for {
select {
case <-ctx.Done():
log.Println("收到信号,开始优雅退出...")
// 这里可以关闭连接、刷盘等
time.Sleep(1 * time.Second) // 模拟清理
log.Println("bye~")
return
case <-tick.C:
log.Println("业务心跳:doing useful work...")
}
}
}
// watchdogLoop 每 <interval/2> 发一次 WATCHDOG=1
func watchdogLoop(interval time.Duration) {
t := time.NewTicker(interval / 2)
defer t.Stop()
for {
<-t.C
if sent, err := daemon.SdNotify(false, daemon.SdNotifyWatchdog); err != nil {
log.Printf("WATCHDOG=1 发送失败: %v", err)
} else if !sent {
log.Println("WATCHDOG=1 未被 systemd 接收")
}
}
}
control group
- A Linux SysAdmin’s introduction to cgroups
cgroups-part-one - How to manage cgroups with CPUShares
cgroups-part-two - Managing cgroups the hard way-manually
cgroups-part-three - Managing cgroups with systemd
cgroups-part-four
以及第四节的资源看一下。

逆天,systemd这整套体系,如此之大。
systemd.resource-control.html#Options
示例
#!/usr/bin/env python3
import time
import logging
import signal
import sys
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(message)s',
handlers=[logging.FileHandler('/var/log/demo-py-service.log'),
logging.StreamHandler()]
)
shutdown = False
def _term(signum, frame):
global shutdown
shutdown = True
signal.signal(signal.SIGTERM, _term)
while not shutdown:
logging.info("demo-py-service is alive")
time.sleep(10)
logging.info("demo-py-service exiting")
[Unit]
Description=Demo Cgroup python
After=network.target
[Service]
Type=exec
ExecStart=/usr/bin/python3 /usr/local/bin/cgroup_py.py
Restart=on-failure
RestartSec=5s
User=nobody
Group=nobody
MemoryMax=50M
CPUQuota=20%
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target
关于限制方面的字段设置,需要参考 man systemd.resource-control OPTIONS
systemd-resoved
man systemd-resolved
man resolved.conf
man resolvectl
man nss-resolve
systemd-resolved is a system service that provides network name resolution to local applications. It implements a caching and validating DNS/DNSSEC stub resolver, as well as an LLMNR and MulticastDNS resolver and responder. Local applications may submit network name resolution requests via three interface
我之前写毕业设计的时候,改resolve文件,一直过一会儿就失败,让我非常的恼火。然后发现,文档里写着,这是通过 systemd-resolved来管理的,不要修改他。
毕业设计的内容,其实就是控制dns解析。然后需要改dns服务器。
# This is /run/systemd/resolve/stub-resolv.conf managed by man:systemd-resolved(8).
# Do not edit.
#
# This file might be symlinked as /etc/resolv.conf. If you're looking at
# /etc/resolv.conf and seeing this text, you have followed the symlink.
#
# This is a dynamic resolv.conf file for connecting local clients to the
# internal DNS stub resolver of systemd-resolved. This file lists all
# configured search domains.
#
# Run "resolvectl status" to see details about the uplink DNS servers
# currently in use.
#
# Third party programs should typically not access this file directly, but only
# through the symlink at /etc/resolv.conf. To manage man:resolv.conf(5) in a
# different way, replace this symlink by a static file or a different symlink.
#
# See man:systemd-resolved.service(8) for details about the supported modes of
# operation for /etc/resolv.conf.
nameserver 127.0.0.53
options edns0 trust-ad
search .
go-daemon-template
systemd, cgroup, go 综合起来的一个项目,作systemd刻意练习
示例
[Unit]
Description=GD
After=network.target
[Service]
Type=exec
ExecStart=/usr/local/bin/go-daemon-template
Restart=on-failure
RestartSec=5s
User=nobody
Group=nobody
MemoryMax=500M
#CPUQuota=60%
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target
go-daemon-template 可以用htop查看,cgroup生效情况
package main
import (
"fmt"
"log"
"net/http"
"runtime"
"strconv"
"sync"
)
var (
cpuStopCh chan struct{} // 广播停止 CPU 负载
cpuWg sync.WaitGroup
memBuf [][]byte // 持有大块内存
memMu sync.Mutex
)
func init() {
cpuStopCh = make(chan struct{})
}
// 死循环占满 1 个 P
func cpuBurner() {
defer cpuWg.Done()
for {
select {
case <-cpuStopCh:
return
default:
// 纯计算,不让出时间片
for i := 0; i < 1e6; i++ {
}
}
}
}
// 模拟 CPU 占用
func setCPU(cores int) {
// 先停掉旧的
close(cpuStopCh)
cpuWg.Wait()
// 重新初始化
cpuStopCh = make(chan struct{})
cpuWg.Add(cores)
for i := 0; i < cores; i++ {
go cpuBurner()
}
}
// 模拟内存占用
func setMemory(mb int) {
memMu.Lock()
defer memMu.Unlock()
// 先释放旧的
memBuf = nil
runtime.GC()
// 申请新的
block := 1024 * 1024 // 1 MB
total := mb * block
slice := make([]byte, total)
// 写一遍,避免懒分配
for i := range slice {
slice[i] = 0
}
// 切成 1 MB 一份,方便后面扩缩
for i := 0; i < mb; i++ {
memBuf = append(memBuf, slice[i*block:(i+1)*block])
}
}
func queryHandler(w http.ResponseWriter, r *http.Request) {
q := r.URL.Query()
// --- CPU ---
cpuStr := q.Get("cpu")
if cpuStr != "" {
cores, err := strconv.Atoi(cpuStr)
if err != nil || cores < 0 {
http.Error(w, "cpu must be non-negative integer", http.StatusBadRequest)
return
}
setCPU(cores)
}
// --- Memory ---
memStr := q.Get("memory")
if memStr != "" {
mb, err := strconv.Atoi(memStr)
if err != nil || mb < 0 {
http.Error(w, "memory must be non-negative integer", http.StatusBadRequest)
return
}
setMemory(mb)
}
// 返回当前状态
var m runtime.MemStats
runtime.ReadMemStats(&m)
fmt.Fprintf(w, "ok: cpu=%s memory=%s MB alloc=%.1f MB\n",
cpuStr, memStr, float64(m.Alloc)/1024/1024)
}
func main() {
// 让调度器线程数 >= CPU 核心数,防止占不满
runtime.GOMAXPROCS(runtime.NumCPU())
http.HandleFunc("/query", queryHandler)
log.Println("listen :8080 e.g. /query?cpu=2&memory=500")
log.Fatal(http.ListenAndServe(":8080", nil))
}