Coverage for bzfs_main/detect.py: 98%
193 statements
« prev ^ index » next coverage.py v7.10.2, created at 2025-08-06 13:30 +0000
« prev ^ index » next coverage.py v7.10.2, created at 2025-08-06 13:30 +0000
1# Copyright 2024 Wolfgang Hoschek AT mac DOT com
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14#
15"""Detection of ZFS features and system capabilities on local and remote hosts."""
17from __future__ import annotations
18import platform
19import re
20import subprocess
21import sys
22import time
23from dataclasses import dataclass, field
24from subprocess import DEVNULL, PIPE
25from typing import (
26 TYPE_CHECKING,
27)
29from bzfs_main.connection import (
30 DEDICATED,
31 SHARED,
32 ConnectionPools,
33 run_ssh_command,
34 try_ssh_command,
35)
36from bzfs_main.utils import (
37 LOG_TRACE,
38 PROG_NAME,
39 die,
40 list_formatter,
41)
43if TYPE_CHECKING: # pragma: no cover - for type hints only
44 from bzfs_main.bzfs import Job
45 from bzfs_main.configuration import Params, Remote
47# constants:
48DISABLE_PRG: str = "-"
49DUMMY_DATASET: str = "dummy"
50ZFS_VERSION_IS_AT_LEAST_2_1_0: str = "zfs>=2.1.0"
51ZFS_VERSION_IS_AT_LEAST_2_2_0: str = "zfs>=2.2.0"
54#############################################################################
55@dataclass(frozen=True)
56class RemoteConfCacheItem:
57 """Caches detected programs, zpool features and connection pools, per remote."""
59 connection_pools: ConnectionPools
60 available_programs: dict[str, str]
61 zpool_features: dict[str, str]
62 timestamp_nanos: int = field(default_factory=time.monotonic_ns)
65def detect_available_programs(job: Job) -> None:
66 """Detects programs, zpool features and connection pools for local and remote hosts."""
67 p = params = job.params
68 log = p.log
69 available_programs: dict[str, dict[str, str]] = params.available_programs
70 if "local" not in available_programs:
71 cmd: list[str] = [p.shell_program_local, "-c", _find_available_programs(p)]
72 available_programs["local"] = dict.fromkeys(
73 subprocess.run(cmd, stdin=DEVNULL, stdout=PIPE, stderr=sys.stderr, text=True).stdout.splitlines(), ""
74 )
75 cmd = [p.shell_program_local, "-c", "exit"]
76 if subprocess.run(cmd, stdin=DEVNULL, stdout=PIPE, stderr=sys.stderr, text=True).returncode != 0:
77 _disable_program(p, "sh", ["local"])
79 for r in [p.dst, p.src]:
80 loc: str = r.location
81 remote_conf_cache_key = r.cache_key()
82 cache_item: RemoteConfCacheItem | None = job.remote_conf_cache.get(remote_conf_cache_key)
83 if cache_item is not None:
84 # startup perf: cache avoids ssh connect setup and feature detection roundtrips on revisits to same site
85 p.connection_pools[loc] = cache_item.connection_pools
86 if time.monotonic_ns() - cache_item.timestamp_nanos < p.remote_conf_cache_ttl_nanos:
87 available_programs[loc] = cache_item.available_programs
88 p.zpool_features[loc] = cache_item.zpool_features
89 continue # cache hit, skip remote detection
90 else:
91 p.connection_pools[loc] = ConnectionPools(
92 r, {SHARED: r.max_concurrent_ssh_sessions_per_tcp_connection, DEDICATED: 1}
93 )
94 _detect_zpool_features(job, r)
95 _detect_available_programs_remote(job, r, available_programs, r.ssh_user_host)
96 job.remote_conf_cache[remote_conf_cache_key] = RemoteConfCacheItem(
97 p.connection_pools[loc], available_programs[loc], p.zpool_features[loc]
98 )
99 if r.use_zfs_delegation and p.zpool_features[loc].get("delegation") == "off":
100 die(
101 f"Permission denied as ZFS delegation is disabled for {r.location} "
102 f"dataset: {r.basis_root_dataset}. Manually enable it via 'sudo zpool set delegation=on {r.pool}'"
103 )
105 locations = ["src", "dst", "local"]
106 if params.compression_program == DISABLE_PRG:
107 _disable_program(p, "zstd", locations)
108 if params.mbuffer_program == DISABLE_PRG:
109 _disable_program(p, "mbuffer", locations)
110 if params.ps_program == DISABLE_PRG:
111 _disable_program(p, "ps", locations)
112 if params.pv_program == DISABLE_PRG:
113 _disable_program(p, "pv", locations)
114 if params.shell_program == DISABLE_PRG:
115 _disable_program(p, "sh", locations)
116 if params.sudo_program == DISABLE_PRG:
117 _disable_program(p, "sudo", locations)
118 if params.zpool_program == DISABLE_PRG:
119 _disable_program(p, "zpool", locations)
121 for key, programs in available_programs.items():
122 for program in list(programs.keys()):
123 if program.startswith("uname-"):
124 # uname-Linux foo 5.15.0-69-generic #76-Ubuntu SMP Fri Mar 17 17:19:29 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux
125 # uname-FreeBSD freebsd 14.1-RELEASE FreeBSD 14.1-RELEASE releng/14.1-n267679-10e31f0946d8 GENERIC amd64
126 # uname-SunOS solaris 5.11 11.4.42.111.0 i86pc i386 i86pc # https://blogs.oracle.com/solaris/post/building-open-source-software-on-oracle-solaris-114-cbe-release
127 # uname-SunOS solaris 5.11 11.4.0.15.0 i86pc i386 i86pc
128 # uname-Darwin foo 23.6.0 Darwin Kernel Version 23.6.0: Mon Jul 29 21:13:04 PDT 2024; root:xnu-10063.141.2~1/RELEASE_ARM64_T6020 arm64
129 programs.pop(program)
130 uname: str = program[len("uname-") :]
131 programs["uname"] = uname
132 log.log(LOG_TRACE, f"available_programs[{key}][uname]: %s", uname)
133 programs["os"] = uname.split(" ")[0] # Linux|FreeBSD|SunOS|Darwin
134 log.log(LOG_TRACE, f"available_programs[{key}][os]: %s", programs["os"])
135 elif program.startswith("default_shell-"):
136 programs.pop(program)
137 default_shell: str = program[len("default_shell-") :]
138 programs["default_shell"] = default_shell
139 log.log(LOG_TRACE, f"available_programs[{key}][default_shell]: %s", default_shell)
140 _validate_default_shell(default_shell, r)
141 elif program.startswith("getconf_cpu_count-"):
142 programs.pop(program)
143 getconf_cpu_count: str = program[len("getconf_cpu_count-") :]
144 programs["getconf_cpu_count"] = getconf_cpu_count
145 log.log(LOG_TRACE, f"available_programs[{key}][getconf_cpu_count]: %s", getconf_cpu_count)
147 for key, programs in available_programs.items():
148 log.debug(f"available_programs[{key}]: %s", list_formatter(programs, separator=", "))
150 for r in [p.dst, p.src]:
151 if r.sudo and not p.is_program_available("sudo", r.location):
152 die(f"{p.sudo_program} CLI is not available on {r.location} host: {r.ssh_user_host or 'localhost'}")
154 if (
155 len(p.args.preserve_properties) > 0
156 and any(prop in p.zfs_send_program_opts for prop in ["--props", "-p"])
157 and not p.is_program_available(ZFS_VERSION_IS_AT_LEAST_2_2_0, p.dst.location)
158 ):
159 die(
160 "Cowardly refusing to proceed as --preserve-properties is unreliable on destination ZFS < 2.2.0 when using "
161 "'zfs send --props'. Either upgrade destination ZFS, or remove '--props' from --zfs-send-program-opt(s)."
162 )
165def _disable_program(p: Params, program: str, locations: list[str]) -> None:
166 """Removes the given program from the available_programs mapping."""
167 for location in locations:
168 p.available_programs[location].pop(program, None)
171def _find_available_programs(p: Params) -> str:
172 """POSIX shell script that checks for the existence of various programs; It uses `if` statements instead of `&&` plus
173 `printf` instead of `echo` to ensure maximum compatibility across shells."""
174 cmds: list[str] = []
175 cmds.append("printf 'default_shell-%s\n' \"$SHELL\"")
176 cmds.append("if command -v echo > /dev/null; then printf 'echo\n'; fi")
177 cmds.append(f"if command -v {p.zpool_program} > /dev/null; then printf 'zpool\n'; fi")
178 cmds.append(f"if command -v {p.ssh_program} > /dev/null; then printf 'ssh\n'; fi")
179 cmds.append(f"if command -v {p.shell_program} > /dev/null; then printf 'sh\n'; fi")
180 cmds.append(f"if command -v {p.sudo_program} > /dev/null; then printf 'sudo\n'; fi")
181 cmds.append(f"if command -v {p.compression_program} > /dev/null; then printf 'zstd\n'; fi")
182 cmds.append(f"if command -v {p.mbuffer_program} > /dev/null; then printf 'mbuffer\n'; fi")
183 cmds.append(f"if command -v {p.pv_program} > /dev/null; then printf 'pv\n'; fi")
184 cmds.append(f"if command -v {p.ps_program} > /dev/null; then printf 'ps\n'; fi")
185 cmds.append(
186 f"if command -v {p.psrinfo_program} > /dev/null; then "
187 f"printf 'getconf_cpu_count-'; {p.psrinfo_program} -p; "
188 f"elif command -v {p.getconf_program} > /dev/null; then "
189 f"printf 'getconf_cpu_count-'; {p.getconf_program} _NPROCESSORS_ONLN; "
190 "fi"
191 )
192 cmds.append(f"if command -v {p.uname_program} > /dev/null; then printf 'uname-'; {p.uname_program} -a || true; fi")
193 return "; ".join(cmds)
196def _detect_available_programs_remote(job: Job, remote: Remote, available_programs: dict, ssh_user_host: str) -> None:
197 """Detects CLI tools available on ``remote`` and updates mapping correspondingly."""
198 p, log = job.params, job.params.log
199 location = remote.location
200 available_programs_minimum = {"zpool": None, "sudo": None}
201 available_programs[location] = {}
202 lines: str | None = None
203 try:
204 # on Linux, 'zfs --version' returns with zero status and prints the correct info
205 # on FreeBSD, 'zfs --version' always prints the same (correct) info as Linux, but nonetheless sometimes
206 # returns with non-zero status (sometimes = if the zfs kernel module is not loaded)
207 # on Solaris, 'zfs --version' returns with non-zero status without printing useful info as the --version
208 # option is not known there
209 lines = run_ssh_command(job, remote, LOG_TRACE, print_stderr=False, cmd=[p.zfs_program, "--version"])
210 assert lines
211 except (FileNotFoundError, PermissionError): # location is local and program file was not found
212 die(f"{p.zfs_program} CLI is not available on {location} host: {ssh_user_host or 'localhost'}")
213 except subprocess.CalledProcessError as e:
214 if "unrecognized command '--version'" in e.stderr and "run: zfs help" in e.stderr:
215 available_programs[location]["zfs"] = "notOpenZFS" # solaris-11.4 zfs does not know --version flag
216 elif not e.stdout.startswith("zfs"): 216 ↛ 219line 216 didn't jump to line 219 because the condition on line 216 was always true
217 die(f"{p.zfs_program} CLI is not available on {location} host: {ssh_user_host or 'localhost'}")
218 else:
219 lines = e.stdout # FreeBSD if the zfs kernel module is not loaded
220 assert lines
221 if lines:
222 line: str = lines.splitlines()[0]
223 assert line.startswith("zfs")
224 # Example: zfs-2.1.5~rc5-ubuntu3 -> 2.1.5, zfswin-2.2.3rc5 -> 2.2.3
225 version: str = line.split("-")[1].strip()
226 match = re.fullmatch(r"(\d+\.\d+\.\d+).*", version)
227 assert match, "Unparsable zfs version string: " + version
228 version = match.group(1)
229 available_programs[location]["zfs"] = version
230 if is_version_at_least(version, "2.1.0"):
231 available_programs[location][ZFS_VERSION_IS_AT_LEAST_2_1_0] = True
232 if is_version_at_least(version, "2.2.0"):
233 available_programs[location][ZFS_VERSION_IS_AT_LEAST_2_2_0] = True
234 log.log(LOG_TRACE, f"available_programs[{location}][zfs]: %s", available_programs[location]["zfs"])
236 if p.shell_program != DISABLE_PRG:
237 try:
238 cmd: list[str] = [p.shell_program, "-c", _find_available_programs(p)]
239 available_programs[location].update(dict.fromkeys(run_ssh_command(job, remote, LOG_TRACE, cmd=cmd).splitlines()))
240 return
241 except (FileNotFoundError, PermissionError) as e: # location is local and shell program file was not found
242 if e.filename != p.shell_program: 242 ↛ 243line 242 didn't jump to line 243 because the condition on line 242 was never true
243 raise
244 except subprocess.CalledProcessError:
245 pass
246 log.warning("%s", f"Failed to find {p.shell_program} on {location}. Continuing with minimal assumptions...")
247 available_programs[location].update(available_programs_minimum)
250def is_solaris_zfs(p: Params, remote: Remote) -> bool:
251 """Returns True if the remote ZFS implementation uses Solaris ZFS."""
252 return is_solaris_zfs_location(p, remote.location)
255def is_solaris_zfs_location(p: Params, location: str) -> bool:
256 """Returns True if ``location`` uses Solaris ZFS."""
257 if location == "local":
258 return platform.system() == "SunOS"
259 return p.available_programs[location].get("zfs") == "notOpenZFS"
262def is_dummy(r: Remote) -> bool:
263 """Returns True if ``remote`` refers to the synthetic dummy dataset."""
264 return r.root_dataset == DUMMY_DATASET
267def _detect_zpool_features(job: Job, remote: Remote) -> None:
268 """Fills ``job.params.zpool_features`` with detected zpool capabilities."""
269 p = params = job.params
270 r, loc, log = remote, remote.location, p.log
271 lines: list[str] = []
272 features: dict[str, str] = {}
273 params.zpool_features.pop(loc, None)
274 if is_dummy(r):
275 params.zpool_features[loc] = {}
276 return
277 if params.zpool_program != DISABLE_PRG:
278 cmd: list[str] = params.split_args(f"{params.zpool_program} get -Hp -o property,value all", r.pool)
279 try:
280 lines = run_ssh_command(job, remote, LOG_TRACE, check=False, cmd=cmd).splitlines()
281 except (FileNotFoundError, PermissionError) as e:
282 if e.filename != params.zpool_program: 282 ↛ 283line 282 didn't jump to line 283 because the condition on line 282 was never true
283 raise
284 log.warning("%s", f"Failed to detect zpool features on {loc}: {r.pool}. Continuing with minimal assumptions ...")
285 else:
286 props: dict[str, str] = {line.split("\t", 1)[0]: line.split("\t", 1)[1] for line in lines}
287 features = {k: v for k, v in props.items() if k.startswith("feature@") or k == "delegation"}
288 if len(lines) == 0:
289 cmd = p.split_args(f"{p.zfs_program} list -t filesystem -Hp -o name -s name", r.pool)
290 if try_ssh_command(job, remote, LOG_TRACE, cmd=cmd) is None:
291 die(f"Pool does not exist for {loc} dataset: {r.basis_root_dataset}. Manually create the pool first!")
292 params.zpool_features[loc] = features
295def is_zpool_feature_enabled_or_active(p: Params, remote: Remote, feature: str) -> bool:
296 """Returns True if the given zpool feature is active or enabled on ``remote``."""
297 return p.zpool_features[remote.location].get(feature) in ("active", "enabled")
300def are_bookmarks_enabled(p: Params, remote: Remote) -> bool:
301 """Checks if bookmark related features are enabled on ``remote``."""
302 return is_zpool_feature_enabled_or_active(p, remote, "feature@bookmark_v2") and is_zpool_feature_enabled_or_active(
303 p, remote, "feature@bookmark_written"
304 )
307def is_caching_snapshots(p: Params, remote: Remote) -> bool:
308 """Returns True if snapshot caching is supported and enabled on ``remote``."""
309 return (
310 p.is_caching_snapshots
311 and p.is_program_available(ZFS_VERSION_IS_AT_LEAST_2_2_0, remote.location)
312 and is_zpool_feature_enabled_or_active(p, remote, "feature@extensible_dataset")
313 )
316def is_version_at_least(version_str: str, min_version_str: str) -> bool:
317 """Checks if the version string is at least the minimum version string."""
318 return tuple(map(int, version_str.split("."))) >= tuple(map(int, min_version_str.split(".")))
321def _validate_default_shell(path_to_default_shell: str, r: Remote) -> None:
322 """Fails if the remote user uses csh or tcsh as the default shell."""
323 if path_to_default_shell.endswith(("/csh", "/tcsh")):
324 # On some old FreeBSD systems the default shell is still csh. Also see https://www.grymoire.com/unix/CshTop10.txt
325 die(
326 f"Cowardly refusing to proceed because {PROG_NAME} is not compatible with csh-style quoting of special "
327 f"characters. The safe workaround is to first manually set 'sh' instead of '{path_to_default_shell}' as "
328 f"the default shell of the Unix user on {r.location} host: {r.ssh_user_host or 'localhost'}, like so: "
329 "chsh -s /bin/sh YOURUSERNAME"
330 )