From 0e5c3d6c65e754226bd127ee261ac5e631986b90 Mon Sep 17 00:00:00 2001 From: iceBear67 Date: Wed, 3 Jun 2026 15:31:27 +0800 Subject: [PATCH] snapshot orchestration --- .gitignore | 3 + USAGE.md | 114 +++++++++++++++++++++++++++ composer.py | 221 +++++++++++++++++++++++++++++++++++++--------------- 3 files changed, 276 insertions(+), 62 deletions(-) create mode 100644 .gitignore create mode 100644 USAGE.md diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..05a6a02 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.venv +.idea/* +__pycache__/** diff --git a/USAGE.md b/USAGE.md new file mode 100644 index 0000000..b6fdeeb --- /dev/null +++ b/USAGE.md @@ -0,0 +1,114 @@ +# Composer + +多租户 Docker Compose 编排工具。扫描 `users/` 下的用户目录,校验每个用户的 `compose.yml`,生成注入网络配置的快照,然后启动所有服务。 + +## 目录结构 + +``` +/ +├── users/ +│ ├── 10-alice/ # 优先级-用户名 +│ │ ├── compose.yml # 必需 +│ │ ├── .env # 可选(明文) +│ │ ├── .env.gpg # 可选(加密,二选一即可) +│ │ ├── app/ +│ │ │ └── Dockerfile +│ │ └── ... +│ └── 20-bob/ +│ └── ... +└── snapshots/ # 自动生成(每次覆盖) + ├── 10-alice/ + │ └── compose.yml # 已注入网络配置的版本 + └── 20-bob/ +``` + +- 目录名格式:`<数字优先级>-<用户名>` +- 用户名:`[a-z0-9]+` +- 优先级数字越小越先处理 + +## CLI 参数 + +| 参数 | 必需 | 默认值 | 说明 | +|------|------|--------|------| +| `--root` | | `.` | 项目根目录 | +| `--network` | | `cloud` | 注入到 services 的外部网络名 | +| `--volume-parent` | ✓ | | 卷挂载路径白名单前缀 | +| `--dry` | | `false` | 只打印操作,不实际执行 | + +## compose.yml 约束 + +### 允许的顶层字段 + +只有 `services` 被识别。如果 YAML 中有 `networks`、`volumes`、`version` 等额外顶层键,会触发 **WARN** —— 它们会被 Pydantic 静默丢弃,可能与网络注入冲突。 + +### 允许的 service 字段 + +`build` `image` `volumes` `command` `depends_on` `entrypoint` `env_file` `environment` + +`build` 可以是字符串(指向含 Dockerfile 的目录)或对象 `{context, dockerfile, args}`。其他键同样触发 WARN。 + +### 卷约束 + +所有 volume 的宿主机路径必须以 `--volume-parent` 为前缀,且只允许 `ro` 模式。 + +### 环境文件 + +- 支持明文 `.env` 或 GPG 加密 `.env.gpg` +- 只校验明文文件的内容格式(`KEY=value` 或 `export KEY=value`) +- 如果只存在 `.env.gpg` 则跳过内容校验 + +## 运行流程 + +``` +1. 扫描 users/ → 解析优先级和用户名 +2. 加载每个 compose.yml → Pydantic 校验 +3. 检测注入干扰字段(WARN) +4. 校验 service 引用、Dockerfile 存在性、卷路径、env_file +5. ── 有错则退出 ── +6. 为每个用户创建 snapshots/-/ +7. 拷贝所有文件:compose.yml 注入网络配置,.gpg 文件解密 +8. ── 快照失败则退出 ── +9. 在每个快照目录执行 docker compose up --remove-orphans --build --detach +``` + +## 网络注入 + +快照中的 `compose.yml` 会被注入: + +```yaml +networks: + cloud: # --network 参数值 + external: true + alice: # 用户名 + name: alice + +services: + app: + networks: [cloud, alice] +``` + +每个 service 的 `networks` 会被**覆盖**为 `[, ]`。 + +## GPG 解密 + +快照拷贝时,`.gpg` 后缀的文件会调用系统 `gpg --decrypt --batch` 解密,输出到去掉 `.gpg` 后缀的路径。原 `.gpg` 文件不进入快照。 + +要求运行环境已配置好 GPG 密钥,且能无交互解密。 + +## Dry-run 模式 + +`--dry` 执行全部校验,但只打印操作摘要: + +``` +[dry] would snapshot users/10-alice -> snapshots/10-alice +[dry] inject networks: .networks.cloud.external=true, ... +[dry] would run: docker compose -f snapshots/10-alice/compose.yml up ... +``` + +不创建目录、不写文件、不解密、不启动容器。 + +## 错误处理 + +- **校验阶段有错** → 打印错误并 `exit 1`(不创建快照) +- **快照创建失败**(单个用户)→ 标记错误,跳过该用户,继续处理其他用户;全部完成后 `exit 1` +- **compose up 失败**(单个用户)→ 标记错误,继续启动其他用户;全部完成后 `exit 1` diff --git a/composer.py b/composer.py index f88e448..0859f72 100644 --- a/composer.py +++ b/composer.py @@ -1,24 +1,21 @@ import argparse -import subprocess -from collections import defaultdict -from dataclasses import dataclass import os import re +import shutil +import subprocess +import sys -import docker from pydantic import BaseModel, ValidationError import yaml -@dataclass class BuildSpec(BaseModel): context: str dockerfile: str - args: dict[str, str] = defaultdict + args: dict[str, str] = {} -@dataclass class Service(BaseModel): build: str | BuildSpec | None = None image: str | None = None @@ -30,17 +27,15 @@ class Service(BaseModel): environment: dict[str, str] | None = None -@dataclass class ComposeSpec(BaseModel): services: dict[str, Service] parser = argparse.ArgumentParser() -parser.add_argument("--dry", type=bool, default=False) -parser.add_argument("--root", default=".", type=str, required=True) +parser.add_argument("--dry", action="store_true", default=False) +parser.add_argument("--root", default=".", type=str) parser.add_argument("--network", default="cloud", type=str) parser.add_argument("--volume-parent", type=str, required=True) -parser.add_argument("--lock", type=str) args = parser.parse_args() dry_run = args.dry @@ -61,7 +56,7 @@ userNamePattern = r"^[a-z0-9]+$" users: list[tuple[int, str]] = [] for userDir in os.listdir(f"{root}/users"): - spl = userDir.split("-") + spl = userDir.split("-", 1) if len(spl) != 2: err(f"ERR: Valid priority isn't set for userDir {spl}") continue @@ -77,23 +72,6 @@ users.sort(key=lambda x: x[0]) serviceNamePattern = r"^[a-z0-9]+$" -userWorkDir: dict[str, str] = {} -userComposeFiles: dict[str, ComposeSpec] = {} - -for prio, name in users: - path = f"{root}/users/{prio}-{name}/compose.yml" - with open(path) as csf: - data = yaml.safe_load(csf) - try: - spec = ComposeSpec(**data) - except ValidationError as e: - err(f"Cannot validate compose spec at {path}") - print(e.errors()) - continue - userComposeFiles[name] = spec - userWorkDir[name] = f"{root}/users/{prio}-{name}/" - - def validate_compose_spec(spec: ComposeSpec, workdir: str): invalid_services = [serviceName for serviceName, _ in spec.services.items() if not re.match(serviceNamePattern, serviceName)] @@ -101,16 +79,68 @@ def validate_compose_spec(spec: ComposeSpec, workdir: str): err(f"ERR: Invalid service names: {', '.join([x for x in invalid_services])}") for invalid_service in invalid_services: spec.services.pop(invalid_service) - for key, spec in spec.services.items(): - validate_service_spec(spec, workdir) + for key, svc in spec.services.items(): + validate_service_spec(svc, workdir, key) - depended_services = [(spec.depends_on or []) for name, spec in spec.services.items()] + depended_services = [(svc.depends_on or []) for _name, svc in spec.services.items()] depended_services = [item for sublist in depended_services for item in sublist] for serviceName in depended_services: if serviceName not in spec.services: err(f"ERR: Service {serviceName} is depended on but not defined in the compose spec") + +# Fields recognized by our Pydantic models — anything else in the YAML +# is silently dropped by Pydantic and could interfere with our injection. +_TOP_LEVEL_KNOWN = {"services"} +_SERVICE_KNOWN = {"build", "image", "volumes", "command", "depends_on", + "entrypoint", "env_file", "environment"} +_BUILD_KNOWN = {"context", "dockerfile", "args"} + + +def detect_injected_fields(path_: str, data: dict): + """Warn about YAML keys not captured by the Pydantic model — these would + be silently dropped and could interfere with network injection.""" + if not isinstance(data, dict): + return + extra_toplevel = set(data.keys()) - _TOP_LEVEL_KNOWN + if extra_toplevel: + err(f"WARN: {path_} has unexpected top-level keys (ignored by model): " + f"{', '.join(sorted(extra_toplevel))}. " + f"These may interfere with injected networks/config.") + services = data.get("services") + if not isinstance(services, dict): + return + for svc_name, svc_data in services.items(): + if not isinstance(svc_data, dict): + continue + extra_svc = set(svc_data.keys()) - _SERVICE_KNOWN + if extra_svc: + err(f"WARN: {path_} service '{svc_name}' has unexpected keys " + f"(ignored by model): {', '.join(sorted(extra_svc))}") + build = svc_data.get("build") + if isinstance(build, dict): + extra_build = set(build.keys()) - _BUILD_KNOWN + if extra_build: + err(f"WARN: {path_} service '{svc_name}' build block has " + f"unexpected keys: {', '.join(sorted(extra_build))}") + + +for prio, name in users: + workdir = f"{root}/users/{prio}-{name}/" + path = f"{workdir}compose.yml" + with open(path) as csf: + data = yaml.safe_load(csf) + detect_injected_fields(path, data) + try: + spec = ComposeSpec(**data) + except ValidationError as e: + err(f"Cannot validate compose spec at {path}") + print(e.errors()) + continue + validate_compose_spec(spec, workdir) + + def validate_env_file(path_: str) -> bool: try: with open(path_, "r") as f: @@ -134,46 +164,113 @@ def validate_env_file(path_: str) -> bool: return False -def validate_service_spec(serv: Service, workdir: str): +def validate_service_spec(serv: Service, workdir: str, name: str): if serv.image is None and serv.build is None: - err(f"ERR: Service {serv.image} doesn't have an image or build spec") + err(f"ERR: Service {name} doesn't have an image or build spec") if isinstance(serv.build, str) and not os.path.exists(f"{workdir}/{serv.build}/Dockerfile"): - err(f"ERR: Dockerfile doesn't exist at {workdir}/{serv.build}/Dockerfile") - for volume in serv.volumes: + err(f"ERR: Service {name}: Dockerfile doesn't exist at {workdir}/{serv.build}/Dockerfile") + for volume in (serv.volumes or []): spl = volume.split(":") if (len(spl) != 2 and len(spl) != 3) or \ not os.path.normpath(spl[0]).startswith(vol_parent) or \ (len(spl) == 3 and spl[2] != "ro"): - err(f"ERR: Invalid volume spec {volume} in service {serv.image}") - continue - if serv.env_file and not os.path.exists(f"{workdir}/{serv.env_file}"): - err(f"ERR: env_file {serv.env_file} doesn't exist in {workdir}") - elif serv.env_file: + err(f"ERR: Invalid volume spec {volume} in service {name}") + if serv.env_file and not os.path.exists(f"{workdir}/{serv.env_file}") \ + and not os.path.exists(f"{workdir}/{serv.env_file}.gpg"): + err(f"ERR: Service {name}: env_file {serv.env_file} (or .gpg) doesn't exist in {workdir}") + elif serv.env_file and os.path.exists(f"{workdir}/{serv.env_file}"): validate_env_file(f"{workdir}/{serv.env_file}") if isinstance(serv.build, BuildSpec) and not os.path.exists(f"{workdir}/{serv.build.dockerfile}"): - err(f"ERR: Dockerfile {serv.build.dockerfile} doesn't exist in {workdir}") + err(f"ERR: Service {name}: Dockerfile {serv.build.dockerfile} doesn't exist in {workdir}") -dk = docker.from_env() +if has_error: + print("Errors found during validation. Exiting.") + sys.exit(1) -def orchestrate(user: str, spec: ComposeSpec, workdir: str): - for name, serv in spec.services.items(): - if isinstance(serv.build, str): - serv.build = BuildSpec(context=serv.build, dockerfile=f"{serv.build}/Dockerfile") - if serv.build: - command = [ - "docker", "buildx", "build", "-t", f"{user}-{name}:latest" - ] - build_args = [["--build-arg", f"{k}={v}"] for k, v in serv.build.args.items()] - command += [arg for build_arg in build_args for arg in build_arg] - command += ["-f", serv.build.dockerfile, serv.build.context] - print(f"Building image for {user}:{name} with command: {' '.join(command)}") - if not dry_run: - try: - subprocess.run(command, check=True) - except subprocess.CalledProcessError as e: - err(f"ERR: Failed to build image for {user}:{name} with error: {e}") - continue +# ── Snapshot: copy userdir + inject networks into compose.yml ────────── +snapshot_root = f"{root}/snapshots" +print(f"Snapshot target: {snapshot_root}") + +snapshot_dirs: list[tuple[str, str]] = [] # (dst_dir, name) for compose-up later + +for prio, name in users: + src_dir = f"{root}/users/{prio}-{name}" + dst_dir = f"{snapshot_root}/{prio}-{name}" + + if dry_run: + print(f" [dry] would snapshot {src_dir} -> {dst_dir}") + print(f" [dry] inject networks: .networks.{network}.external=true, " + f".networks.{name}.name={name}, services[].networks=[{network}, {name}]") + snapshot_dirs.append((dst_dir, name)) + continue + + print(f" Snapshotting {src_dir} -> {dst_dir}") + try: + if os.path.exists(dst_dir): + shutil.rmtree(dst_dir) + os.makedirs(dst_dir, exist_ok=True) + + for item in os.listdir(src_dir): + src_item = os.path.join(src_dir, item) + dst_item = os.path.join(dst_dir, item) + if item == "compose.yml": + # Rewrite with injected network configuration + with open(src_item) as f: + compose_data = yaml.safe_load(f) + compose_data.setdefault("networks", {}) + compose_data["networks"][network] = {"external": True} + compose_data["networks"][name] = {"name": name} + for svc_name, svc in (compose_data.get("services") or {}).items(): + svc["networks"] = [network, name] + print(f" Injected networks into service '{svc_name}': " + f"[{network}, {name}]") + with open(dst_item, "w") as f: + yaml.dump(compose_data, f, default_flow_style=False, sort_keys=False) + print(f" Wrote {dst_item} (networks injected)") + elif item.endswith(".gpg"): + # Decrypt .gpg file → output without .gpg suffix + plain_name = item[:-4] # strip ".gpg" + dst_plain = os.path.join(dst_dir, plain_name) + print(f" Decrypting {item} -> {plain_name}") + subprocess.run( + ["gpg", "--decrypt", "--batch", "--output", dst_plain, src_item], + check=True, + ) + elif os.path.isdir(src_item): + shutil.copytree(src_item, dst_item) else: - print("Build skipped due to dry run mode.") + shutil.copy2(src_item, dst_item) + + snapshot_dirs.append((dst_dir, name)) + print(f" Done snapshotting {name}") + except Exception as e: + err(f"ERR: Snapshot failed for {name}: {e}") + +# ── Compose up in each snapshot ──────────────────────────────────────── +if has_error: + print("Snapshot errors detected — skipping compose up.") + sys.exit(1) + +if dry_run: + for dst_dir, name in snapshot_dirs: + print(f" [dry] would run: docker compose -f {dst_dir}/compose.yml " + f"up --remove-orphans --build --detach") +else: + for dst_dir, name in snapshot_dirs: + print(f" Starting services for {name} in {dst_dir} …") + try: + subprocess.run( + ["docker", "compose", "-f", f"{dst_dir}/compose.yml", + "up", "--remove-orphans", "--build", "--detach"], + cwd=dst_dir, check=True, + ) + print(f" ✓ {name} started") + except subprocess.CalledProcessError as e: + err(f"ERR: docker compose up failed for {name}: {e}") + +if has_error: + print("Errors during compose up. Exiting.") + sys.exit(1) +