Coverage for bzfs_main/utils.py: 100%

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14#

15"""Collection of helper functions used across bzfs; includes environment variable parsing, process management and lightweight

16concurrency primitives.

18Everything in this module relies only on the standard library so other modules remain dependency free. Each utility favors

19simple, predictable behavior on all supported platforms.

20"""

22from __future__ import annotations

23import argparse

24import bisect

25import collections

26import contextlib

27import errno

28import logging

29import os

30import pwd

31import random

32import re

33import signal

34import stat

35import subprocess

36import sys

37import threading

38import time

39import types

40from collections import defaultdict, deque

41from datetime import datetime, timedelta, timezone, tzinfo

42from subprocess import DEVNULL, PIPE

43from typing import (

44 IO,

45 Any,

46 Callable,

47 Final,

48 Generator,

49 Generic,

50 ItemsView,

51 Iterable,

52 Iterator,

53 List,

54 NoReturn,

55 Protocol,

56 Sequence,

57 TextIO,

58 Tuple,

59 TypeVar,

60 cast,

61)

63# constants:

64PROG_NAME: str = "bzfs"

65ENV_VAR_PREFIX: str = PROG_NAME + "_"

66DIE_STATUS: int = 3

67DESCENDANTS_RE_SUFFIX: str = r"(?:/.*)?" # also match descendants of a matching dataset

68LOG_STDERR: int = (logging.INFO + logging.WARNING) // 2 # custom log level is halfway in between

69LOG_STDOUT: int = (LOG_STDERR + logging.INFO) // 2 # custom log level is halfway in between

70LOG_DEBUG: int = logging.DEBUG

71LOG_TRACE: int = logging.DEBUG // 2 # custom log level is halfway in between

72SNAPSHOT_FILTERS_VAR: str = "snapshot_filters_var"

73YEAR_WITH_FOUR_DIGITS_REGEX: re.Pattern[str] = re.compile(r"[1-9][0-9][0-9][0-9]") # empty shall not match non-empty target

74UNIX_TIME_INFINITY_SECS: int = 2**64 # billions of years and to be extra safe, larger than the largest ZFS GUID

75DONT_SKIP_DATASET: str = ""

76SHELL_CHARS: str = '"' + "'`~!@#$%^&*()+={}[]|;<>?,\\"

77FILE_PERMISSIONS: int = stat.S_IRUSR | stat.S_IWUSR # rw------- (owner read + write)

78DIR_PERMISSIONS: int = stat.S_IRWXU # rwx------ (owner read + write + execute)

80RegexList = List[Tuple[re.Pattern, bool]] # Type alias

83def getenv_any(key: str, default: str | None = None) -> str | None:

84 """All shell environment variable names used for configuration start with this prefix."""

85 return os.getenv(ENV_VAR_PREFIX + key, default)

88def getenv_int(key: str, default: int) -> int:

89 """Returns environment variable ``key`` as int with ``default`` fallback."""

90 return int(cast(str, getenv_any(key, str(default))))

93def getenv_bool(key: str, default: bool = False) -> bool:

94 """Returns environment variable ``key`` as bool with ``default`` fallback."""

95 return cast(str, getenv_any(key, str(default))).lower().strip() == "true"

98def cut(field: int = -1, separator: str = "\t", lines: list[str] | None = None) -> list[str]:

99 """Retains only column number 'field' in a list of TSV/CSV lines; Analog to Unix 'cut' CLI command."""

100 assert lines is not None

101 assert isinstance(lines, list)

102 assert len(separator) == 1

103 if field == 1:

104 return [line[0 : line.index(separator)] for line in lines]

105 elif field == 2:

106 return [line[line.index(separator) + 1 :] for line in lines]

107 else:

108 raise ValueError("Unsupported parameter value")

109

110

111def drain(iterable: Iterable[Any]) -> None:

112 """Consumes all items in the iterable, effectively draining it."""

113 deque(iterable, maxlen=0)

114

115

116K_ = TypeVar("K_")

117V_ = TypeVar("V_")

118

119

120def shuffle_dict(dictionary: dict[K_, V_]) -> dict[K_, V_]:

121 """Returns a new dict with items shuffled randomly."""

122 items: list[tuple[K_, V_]] = list(dictionary.items())

123 random.shuffle(items)

124 return dict(items)

125

126

127def sorted_dict(dictionary: dict[K_, V_]) -> dict[K_, V_]:

128 """Returns a new dict with items sorted primarily by key and secondarily by value."""

129 return dict(sorted(dictionary.items()))

130

131

132def tail(file: str, n: int, errors: str | None = None) -> Sequence[str]:

133 """Return the last ``n`` lines of ``file`` without following symlinks."""

134 if not os.path.isfile(file):

135 return []

136 with open_nofollow(file, "r", encoding="utf-8", errors=errors, check_owner=False) as fd:

137 return deque(fd, maxlen=n)

138

139

140def replace_capturing_groups_with_non_capturing_groups(regex: str) -> str:

141 """Replaces regex capturing groups with non-capturing groups for better matching performance.

142

143 Example: '(.*/)?tmp(foo|bar)(?!public)\\(' --> '(?:.*/)?tmp(?:foo|bar)(?!public)\\()'

144 Aka replaces brace '(' followed by a char other than question mark '?', but not preceded by a backslash

145 with the replacement string '(?:'

146 Also see https://docs.python.org/3/howto/regex.html#non-capturing-and-named-groups

147 """

148 # pattern = re.compile(r'(?<!\\)\((?!\?)')

149 # return pattern.sub('(?:', regex)

150 i = len(regex) - 2

151 while i >= 0:

152 i = regex.rfind("(", 0, i + 1)

153 if i >= 0 and regex[i] == "(" and (regex[i + 1] != "?") and (i == 0 or regex[i - 1] != "\\"):

154 regex = f"{regex[0:i]}(?:{regex[i + 1:]}"

155 i -= 1

156 return regex

157

158

159def get_home_directory() -> str:

160 """Reliably detects home dir without using HOME env var."""

161 # thread-safe version of: os.environ.pop('HOME', None); os.path.expanduser('~')

162 return pwd.getpwuid(os.getuid()).pw_dir

163

164

165def human_readable_bytes(num_bytes: float, separator: str = " ", precision: int | None = None) -> str:

166 """Formats 'num_bytes' as a human-readable size; for example "567 MiB"."""

167 sign = "-" if num_bytes < 0 else ""

168 s = abs(num_bytes)

169 units = ("B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB", "RiB", "QiB")

170 n = len(units) - 1

171 i = 0

172 while s >= 1024 and i < n:

173 s /= 1024

174 i += 1

175 formatted_num = human_readable_float(s) if precision is None else f"{s:.{precision}f}"

176 return f"{sign}{formatted_num}{separator}{units[i]}"

177

178

179def human_readable_duration(duration: float, unit: str = "ns", separator: str = "", precision: int | None = None) -> str:

180 """Formats a duration in human units, automatically scaling as needed; for example "567ms"."""

181 sign = "-" if duration < 0 else ""

182 t = abs(duration)

183 units = ("ns", "μs", "ms", "s", "m", "h", "d")

184 nanos = (1, 1_000, 1_000_000, 1_000_000_000, 60 * 1_000_000_000, 60 * 60 * 1_000_000_000, 60 * 60 * 24 * 1_000_000_000)

185 i = units.index(unit)

186 if t < 1 and t != 0:

187 t *= nanos[i]

188 i = 0

189 while t >= 1000 and i < 3:

190 t /= 1000

191 i += 1

192 if i >= 3:

193 while t >= 60 and i < 5:

194 t /= 60

195 i += 1

196 if i >= 5:

197 while t >= 24 and i < len(units) - 1:

198 t /= 24

199 i += 1

200 formatted_num = human_readable_float(t) if precision is None else f"{t:.{precision}f}"

201 return f"{sign}{formatted_num}{separator}{units[i]}"

202

203

204def human_readable_float(number: float) -> str:

205 """Formats ``number`` with a variable precision depending on magnitude.

206

207 This design mirrors the way humans round values when scanning logs.

208

209 If the number has one digit before the decimal point (0 <= abs(number) < 10):

210 Round and use two decimals after the decimal point (e.g., 3.14559 --> "3.15").

211

212 If the number has two digits before the decimal point (10 <= abs(number) < 100):

213 Round and use one decimal after the decimal point (e.g., 12.36 --> "12.4").

214

215 If the number has three or more digits before the decimal point (abs(number) >= 100):

216 Round and use zero decimals after the decimal point (e.g., 123.556 --> "124").

217

218 Ensures no unnecessary trailing zeroes are retained: Example: 1.500 --> "1.5", 1.00 --> "1"

219 """

220 abs_number = abs(number)

221 precision = 2 if abs_number < 10 else 1 if abs_number < 100 else 0

222 if precision == 0:

223 return str(round(number))

224 result = f"{number:.{precision}f}"

225 assert "." in result

226 result = result.rstrip("0").rstrip(".") # Remove trailing zeros and trailing decimal point if empty

227 return "0" if result == "-0" else result

228

229

230def percent(number: int, total: int) -> str:

231 """Returns percentage string of ``number`` relative to ``total``."""

232 return f"{number}={'NaN' if total == 0 else human_readable_float(100 * number / total)}%"

233

234

235def open_nofollow(

236 path: str,

237 mode: str = "r",

238 buffering: int = -1,

239 encoding: str | None = None,

240 errors: str | None = None,

241 newline: str | None = None,

242 *,

243 perm: int = stat.S_IRUSR | stat.S_IWUSR, # rw------- (owner read + write)

244 check_owner: bool = True,

245 **kwargs: Any,

246) -> IO[Any]:

247 """Behaves exactly like built-in open(), except that it refuses to follow symlinks, i.e. raises OSError with

248 errno.ELOOP/EMLINK if basename of path is a symlink.

249

250 Also, can specify permissions on O_CREAT, and verify ownership.

251 """

252 if not mode:

253 raise ValueError("Must have exactly one of create/read/write/append mode and at most one plus")

254 flags = {

255 "r": os.O_RDONLY,

256 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,

257 "a": os.O_WRONLY | os.O_CREAT | os.O_APPEND,

258 "x": os.O_WRONLY | os.O_CREAT | os.O_EXCL,

259 }.get(mode[0])

260 if flags is None:

261 raise ValueError(f"invalid mode {mode!r}")

262 if "+" in mode:

263 flags = (flags & ~os.O_WRONLY) | os.O_RDWR

264 flags |= os.O_NOFOLLOW | os.O_CLOEXEC

265 fd = os.open(path, flags=flags, mode=perm)

266 try:

267 if check_owner:

268 st_uid: int = os.fstat(fd).st_uid

269 if st_uid != os.geteuid(): # verify ownership is current effective UID

270 raise PermissionError(errno.EPERM, f"{path!r} is owned by uid {st_uid}, not {os.geteuid()}", path)

271 return os.fdopen(fd, mode, buffering=buffering, encoding=encoding, errors=errors, newline=newline, **kwargs)

272 except Exception:

273 try:

274 os.close(fd)

275 except OSError:

276 pass

277 raise

280P = TypeVar("P")

283def find_match(

284 seq: Sequence[P],

285 predicate: Callable[[P], bool],

286 start: int | None = None,

287 end: int | None = None,

288 reverse: bool = False,

289 raises: bool | str | Callable[[], str] = False, # raises: bool | str | Callable = False, # python >= 3.10

290) -> int:

291 """Returns the integer index within seq of the first item (or last item if reverse==True) that matches the given

292 predicate condition. If no matching item is found returns -1 or ValueError, depending on the raises parameter, which is a

293 bool indicating whether to raise an error, or a string containing the error message, but can also be a Callable/lambda in

294 order to support efficient deferred generation of error messages. Analog to str.find(), including slicing semantics with

295 parameters start and end. For example, seq can be a list, tuple or str.

296

297 Example usage:

298 lst = ["a", "b", "-c", "d"]

299 i = find_match(lst, lambda arg: arg.startswith("-"), start=1, end=3, reverse=True)

300 if i >= 0:

301 ...

302 i = find_match(lst, lambda arg: arg.startswith("-"), raises=f"Tag {tag} not found in {file}")

303 i = find_match(lst, lambda arg: arg.startswith("-"), raises=lambda: f"Tag {tag} not found in {file}")

304 """

305 offset: int = 0 if start is None else start if start >= 0 else len(seq) + start

306 if start is not None or end is not None:

307 seq = seq[start:end]

308 for i, item in enumerate(reversed(seq) if reverse else seq):

309 if predicate(item):

310 if reverse:

311 return len(seq) - i - 1 + offset

312 else:

313 return i + offset

314 if raises is False or raises is None:

315 return -1

316 if raises is True:

317 raise ValueError("No matching item found in sequence")

318 if callable(raises):

319 raises = raises()

320 raise ValueError(raises)

321

322

323def is_descendant(dataset: str, of_root_dataset: str) -> bool:

324 """Returns True if ``dataset`` lies under ``of_root_dataset``."""

325 return (dataset + "/").startswith(of_root_dataset + "/")

326

327

328def has_duplicates(sorted_list: list[Any]) -> bool:

329 """Returns True if any adjacent items within the given sorted sequence are equal."""

330 return any(a == b for a, b in zip(sorted_list, sorted_list[1:]))

331

332

333def dry(msg: str, is_dry_run: bool) -> str:

334 """Prefix ``msg`` with 'Dry' when in dry-run mode."""

335 return "Dry " + msg if is_dry_run else msg

336

337

338def relativize_dataset(dataset: str, root_dataset: str) -> str:

339 """Converts an absolute dataset path to one relative to ``root_dataset``.

340

341 Example: root_dataset=tank/foo, dataset=tank/foo/bar/baz --> relative_path=/bar/baz.

342 """

343 return dataset[len(root_dataset) :]

344

345

346def dataset_paths(dataset: str) -> Generator[str, None, None]:

347 """Enumerates all paths of a valid ZFS dataset name; Example: "a/b/c" --> yields "a", "a/b", "a/b/c"."""

348 i: int = 0

349 while i >= 0:

350 i = dataset.find("/", i)

351 if i < 0:

352 yield dataset

353 else:

354 yield dataset[:i]

355 i += 1

356

357

358def replace_prefix(s: str, old_prefix: str, new_prefix: str) -> str:

359 """In a string s, replaces a leading old_prefix string with new_prefix; assumes the leading string is present."""

360 assert s.startswith(old_prefix)

361 return new_prefix + s[len(old_prefix) :]

362

363

364def replace_in_lines(lines: list[str], old: str, new: str, count: int = -1) -> None:

365 """Replaces ``old`` with ``new`` in-place for every string in ``lines``."""

366 for i in range(len(lines)):

367 lines[i] = lines[i].replace(old, new, count)

368

369

370TAPPEND = TypeVar("TAPPEND")

371

372

373def append_if_absent(lst: list[TAPPEND], *items: TAPPEND) -> list[TAPPEND]:

374 """Appends items to list if they are not already present."""

375 for item in items:

376 if item not in lst:

377 lst.append(item)

378 return lst

379

380

381def xappend(lst: list[TAPPEND], *items: TAPPEND | Iterable[TAPPEND]) -> list[TAPPEND]:

382 """Appends each of the items to the given list if the item is "truthy", for example not None and not an empty string; If

383 an item is an iterable does so recursively, flattening the output."""

384 for item in items:

385 if isinstance(item, str) or not isinstance(item, collections.abc.Iterable):

386 if item:

387 lst.append(cast(TAPPEND, item))

388 else:

389 xappend(lst, *item)

390 return lst

391

392

393def is_included(name: str, include_regexes: RegexList, exclude_regexes: RegexList) -> bool:

394 """Returns True if the name matches at least one of the include regexes but none of the exclude regexes; else False.

395

396 A regex that starts with a `!` is a negation - the regex matches if the regex without the `!` prefix does not match.

397 """

398 for regex, is_negation in exclude_regexes:

399 is_match = regex.fullmatch(name) if regex.pattern != ".*" else True

400 if is_negation:

401 is_match = not is_match

402 if is_match:

403 return False

404

405 for regex, is_negation in include_regexes:

406 is_match = regex.fullmatch(name) if regex.pattern != ".*" else True

407 if is_negation:

408 is_match = not is_match

409 if is_match:

410 return True

411

412 return False

413

414

415def compile_regexes(regexes: list[str], suffix: str = "") -> RegexList:

416 """Compiles regex strings and keeps track of negations."""

417 assert isinstance(regexes, list)

418 compiled_regexes: RegexList = []

419 for regex in regexes:

420 if suffix: # disallow non-trailing end-of-str symbol in dataset regexes to ensure descendants will also match

421 if regex.endswith("\\$"):

422 pass # trailing literal $ is ok

423 elif regex.endswith("$"):

424 regex = regex[0:-1] # ok because all users of compile_regexes() call re.fullmatch()

425 elif "$" in regex:

426 raise re.error("Must not use non-trailing '$' character", regex)

427 if is_negation := regex.startswith("!"):

428 regex = regex[1:]

429 regex = replace_capturing_groups_with_non_capturing_groups(regex)

430 if regex != ".*" or not (suffix.startswith("(") and suffix.endswith(")?")):

431 regex = f"{regex}{suffix}"

432 compiled_regexes.append((re.compile(regex), is_negation))

433 return compiled_regexes

434

435

436def list_formatter(iterable: Iterable[Any], separator: str = " ", lstrip: bool = False) -> Any:

437 """Lazy formatter joining items with ``separator`` used to avoid overhead in disabled log levels."""

438

439 class CustomListFormatter:

440 """Formatter object that joins items when converted to ``str``."""

441

442 def __str__(self) -> str:

443 s = separator.join(map(str, iterable))

444 return s.lstrip() if lstrip else s

445

446 return CustomListFormatter()

447

448

449def pretty_print_formatter(obj_to_format: Any) -> Any:

450 """Lazy pprint formatter used to avoid overhead in disabled log levels."""

451

452 class PrettyPrintFormatter:

453 """Formatter that pretty-prints the object on conversion to ``str``."""

454

455 def __str__(self) -> str:

456 import pprint

457

458 return pprint.pformat(vars(obj_to_format))

459

460 return PrettyPrintFormatter()

461

462

463def stderr_to_str(stderr: Any) -> str:

464 """Workaround for https://github.com/python/cpython/issues/87597."""

465 return str(stderr) if not isinstance(stderr, bytes) else stderr.decode("utf-8")

466

467

468def xprint(log: logging.Logger, value: Any, run: bool = True, end: str = "\n", file: TextIO | None = None) -> None:

469 """Optionally logs ``value`` at stdout/stderr level."""

470 if run and value:

471 value = value if end else str(value).rstrip()

472 level = LOG_STDOUT if file is sys.stdout else LOG_STDERR

473 log.log(level, "%s", value)

474

475

476def die(msg: str, exit_code: int = DIE_STATUS, parser: argparse.ArgumentParser | None = None) -> NoReturn:

477 """Exits the program with ``exit_code`` after logging ``msg``."""

478 if parser is None:

479 ex = SystemExit(msg)

480 ex.code = exit_code

481 raise ex

482 else:

483 parser.error(msg)

484

485

486def subprocess_run(*args: Any, **kwargs: Any) -> subprocess.CompletedProcess:

487 """Drop-in replacement for subprocess.run() that mimics its behavior except it enhances cleanup on TimeoutExpired."""

488 input_value = kwargs.pop("input", None)

489 timeout = kwargs.pop("timeout", None)

490 check = kwargs.pop("check", False)

491 if input_value is not None:

492 if kwargs.get("stdin") is not None:

493 raise ValueError("input and stdin are mutually exclusive")

494 kwargs["stdin"] = subprocess.PIPE

495

496 with subprocess.Popen(*args, **kwargs) as proc:

497 try:

498 stdout, stderr = proc.communicate(input_value, timeout=timeout)

499 except BaseException as e:

500 try:

501 if isinstance(e, subprocess.TimeoutExpired):

502 terminate_process_subtree(root_pid=proc.pid) # send SIGTERM to child process and its descendants

503 finally:

504 proc.kill()

505 raise

506 else:

507 exitcode: int | None = proc.poll()

508 assert exitcode is not None

509 if check and exitcode:

510 raise subprocess.CalledProcessError(exitcode, proc.args, output=stdout, stderr=stderr)

511 return subprocess.CompletedProcess(proc.args, exitcode, stdout, stderr)

512

513

514def terminate_process_subtree(

515 except_current_process: bool = False, root_pid: int | None = None, sig: signal.Signals = signal.SIGTERM

516) -> None:

517 """Sends ``sig`` to ``root_pid`` and all of its descendant processes."""

518 current_pid: int = os.getpid()

519 root_pid = current_pid if root_pid is None else root_pid

520 pids: list[int] = _get_descendant_processes(root_pid)

521 if root_pid == current_pid:

522 pids += [] if except_current_process else [current_pid]

523 else:

524 pids.insert(0, root_pid)

525 for pid in pids:

526 with contextlib.suppress(OSError):

527 os.kill(pid, sig)

528

529

530def _get_descendant_processes(root_pid: int) -> list[int]:

531 """Returns the list of all descendant process IDs for the given root PID, on Unix systems."""

532 procs: defaultdict[int, list[int]] = defaultdict(list)

533 cmd: list[str] = ["ps", "-Ao", "pid,ppid"]

534 lines: list[str] = subprocess.run(cmd, stdin=DEVNULL, stdout=PIPE, text=True, check=True).stdout.splitlines()

535 for line in lines[1:]: # all lines except the header line

536 splits: list[str] = line.split()

537 assert len(splits) == 2

538 pid = int(splits[0])

539 ppid = int(splits[1])

540 procs[ppid].append(pid)

541 descendants: list[int] = []

542

543 def recursive_append(ppid: int) -> None:

544 """Recursively collect descendant PIDs starting from ``ppid``."""

545 for child_pid in procs[ppid]:

546 descendants.append(child_pid)

547 recursive_append(child_pid)

548

549 recursive_append(root_pid)

550 return descendants

551

552

553def pid_exists(pid: int) -> bool | None:

554 """Returns True if a process with PID exists, False if not, or None on error."""

555 if pid <= 0:

556 return False

557 try: # with signal=0, no signal is actually sent, but error checking is still performed

558 os.kill(pid, 0) # ... which can be used to check for process existence on POSIX systems

559 except OSError as err:

560 if err.errno == errno.ESRCH: # No such process

561 return False

562 if err.errno == errno.EPERM: # Operation not permitted

563 return True

564 return None

565 return True

566

567

568def nprefix(s: str) -> str:

569 """Returns a canonical snapshot prefix with trailing underscore."""

570 return sys.intern(s + "_")

571

572

573def ninfix(s: str) -> str:

574 """Returns a canonical infix with trailing underscore when not empty."""

575 return sys.intern(s + "_") if s else ""

576

577

578def nsuffix(s: str) -> str:

579 """Returns a canonical suffix with leading underscore when not empty."""

580 return sys.intern("_" + s) if s else ""

581

582

583def format_dict(dictionary: dict[Any, Any]) -> str:

584 """Returns a formatted dictionary using repr for consistent output."""

585 return f'"{dictionary}"'

586

587

588def validate_dataset_name(dataset: str, input_text: str) -> None:

589 """'zfs create' CLI does not accept dataset names that are empty or start or end in a slash, etc."""

590 # Also see https://github.com/openzfs/zfs/issues/439#issuecomment-2784424

591 # and https://github.com/openzfs/zfs/issues/8798

592 # and (by now nomore accurate): https://docs.oracle.com/cd/E26505_01/html/E37384/gbcpt.html

593 if (

594 dataset in ("", ".", "..")

595 or any(dataset.startswith(prefix) for prefix in ("/", "./", "../"))

596 or any(dataset.endswith(suffix) for suffix in ("/", "/.", "/.."))

597 or any(substring in dataset for substring in ("//", "/./", "/../"))

598 or any(char in SHELL_CHARS or (char.isspace() and char != " ") for char in dataset)

599 or not dataset[0].isalpha()

600 ):

601 die(f"Invalid ZFS dataset name: '{dataset}' for: '{input_text}'")

602

603

604def validate_property_name(propname: str, input_text: str) -> str:

605 """Checks that the ZFS property name contains no spaces or shell chars."""

606 invalid_chars: str = SHELL_CHARS

607 if not propname or any(c.isspace() or c in invalid_chars for c in propname):

608 die(f"Invalid ZFS property name: '{propname}' for: '{input_text}'")

609 return propname

610

611

612def validate_is_not_a_symlink(msg: str, path: str, parser: argparse.ArgumentParser | None = None) -> None:

613 """Checks that the given path is not a symbolic link."""

614 if os.path.islink(path):

615 die(f"{msg}must not be a symlink: {path}", parser=parser)

616

617

618def parse_duration_to_milliseconds(duration: str, regex_suffix: str = "", context: str = "") -> int:

619 """Parses human duration strings like '5m' or '2 hours' to milliseconds."""

620 unit_milliseconds: dict[str, int] = {

621 "milliseconds": 1,

622 "millis": 1,

623 "seconds": 1000,

624 "secs": 1000,

625 "minutes": 60 * 1000,

626 "mins": 60 * 1000,

627 "hours": 60 * 60 * 1000,

628 "days": 86400 * 1000,

629 "weeks": 7 * 86400 * 1000,

630 "months": round(30.5 * 86400 * 1000),

631 "years": 365 * 86400 * 1000,

632 }

633 match = re.fullmatch(

635 duration,

636 )

637 if not match:

638 if context:

639 die(f"Invalid duration format: {duration} within {context}")

640 else:

641 raise ValueError(f"Invalid duration format: {duration}")

642 assert match

643 quantity: int = int(match.group(1))

644 unit: str = match.group(2)

645 return quantity * unit_milliseconds[unit]

646

647

648def unixtime_fromisoformat(datetime_str: str) -> int:

649 """Converts ISO 8601 datetime string into UTC Unix time seconds."""

650 return int(datetime.fromisoformat(datetime_str).timestamp())

651

652

653def isotime_from_unixtime(unixtime_in_seconds: int) -> str:

654 """Converts UTC Unix time seconds into ISO 8601 datetime string."""

655 tz: tzinfo = timezone.utc

656 dt: datetime = datetime.fromtimestamp(unixtime_in_seconds, tz=tz)

657 return dt.isoformat(sep="_", timespec="seconds")

658

659

660def current_datetime(

661 tz_spec: str | None = None,

662 now_fn: Callable[[tzinfo | None], datetime] | None = None,

663) -> datetime:

664 """Returns current time in ``tz_spec`` timezone or local timezone."""

665 if now_fn is None:

666 now_fn = datetime.now

667 return now_fn(get_timezone(tz_spec))

668

669

670def get_timezone(tz_spec: str | None = None) -> tzinfo | None:

671 """Returns timezone from spec or local timezone if unspecified."""

672 tz: tzinfo | None

673 if tz_spec is None:

674 tz = None

675 elif tz_spec == "UTC":

676 tz = timezone.utc

677 else:

678 if match := re.fullmatch(r"([+-])(\d\d):?(\d\d)", tz_spec):

679 sign, hours, minutes = match.groups()

680 offset: int = int(hours) * 60 + int(minutes)

681 offset = -offset if sign == "-" else offset

682 tz = timezone(timedelta(minutes=offset))

683 elif "/" in tz_spec and sys.version_info >= (3, 9):

684 from zoneinfo import ZoneInfo # requires python >= 3.9

685

686 tz = ZoneInfo(tz_spec)

687 else:

688 raise ValueError(f"Invalid timezone specification: {tz_spec}")

689 return tz

690

691

692###############################################################################

693S = TypeVar("S")

694

695

696class Interner(Generic[S]):

697 """Same as sys.intern() except that it isn't global and can also be used for types other than str."""

698

699 def __init__(self, items: Iterable[S] = frozenset()) -> None:

700 self._items: dict[S, S] = {v: v for v in items}

701

702 def intern(self, item: S) -> S:

703 """Interns the given item."""

704 interned_item = self._items.get(item)

705 if interned_item is None:

706 self._items[item] = item

707 return item

708 return interned_item

709

710 def interned(self, item: S) -> S:

711 """Returns the interned item if an equal item is contained, else returns the non-interned item."""

712 return self._items.get(item, item)

713

714 def __contains__(self, item: S) -> bool:

715 return item in self._items

716

717

718###############################################################################

719class SnapshotPeriods: # thread-safe

720 """Parses snapshot suffix strings and converts between durations."""

721

722 def __init__(self) -> None:

723 """Initialize lookup tables of suffixes and corresponding millis."""

724 self.suffix_milliseconds: Final = {

725 "yearly": 365 * 86400 * 1000,

726 "monthly": round(30.5 * 86400 * 1000),

727 "weekly": 7 * 86400 * 1000,

728 "daily": 86400 * 1000,

729 "hourly": 60 * 60 * 1000,

730 "minutely": 60 * 1000,

731 "secondly": 1000,

732 "millisecondly": 1,

733 }

734 self.period_labels: Final = {

735 "yearly": "years",

736 "monthly": "months",

737 "weekly": "weeks",

738 "daily": "days",

739 "hourly": "hours",

740 "minutely": "minutes",

741 "secondly": "seconds",

742 "millisecondly": "milliseconds",

743 }

744 self._suffix_regex0: Final = re.compile(rf"([1-9][0-9]*)?({'|'.join(self.suffix_milliseconds.keys())})")

745 self._suffix_regex1: Final = re.compile("_" + self._suffix_regex0.pattern)

746

747 def suffix_to_duration0(self, suffix: str) -> tuple[int, str]:

748 """Parse suffix like '10minutely' to (10, 'minutely')."""

749 return self._suffix_to_duration(suffix, self._suffix_regex0)

750

751 def suffix_to_duration1(self, suffix: str) -> tuple[int, str]:

752 """Like :meth:`suffix_to_duration0` but expects an underscore prefix."""

753 return self._suffix_to_duration(suffix, self._suffix_regex1)

754

755 @staticmethod

756 def _suffix_to_duration(suffix: str, regex: re.Pattern) -> tuple[int, str]:

757 """Example: Converts '2 hourly' to (2, 'hourly') and 'hourly' to (1, 'hourly')."""

758 if match := regex.fullmatch(suffix):

759 duration_amount: int = int(match.group(1)) if match.group(1) else 1

760 assert duration_amount > 0

761 duration_unit: str = match.group(2)

762 return duration_amount, duration_unit

763 else:

764 return 0, ""

765

766 def label_milliseconds(self, snapshot: str) -> int:

767 """Returns duration encoded in ``snapshot`` suffix, in milliseconds."""

768 i = snapshot.rfind("_")

769 snapshot = "" if i < 0 else snapshot[i + 1 :]

770 duration_amount, duration_unit = self._suffix_to_duration(snapshot, self._suffix_regex0)

771 return duration_amount * self.suffix_milliseconds.get(duration_unit, 0)

772

773

774#############################################################################

775class Comparable(Protocol):

776 """Partial ordering protocol used by :class:`SmallPriorityQueue`."""

777

778 def __lt__(self, other: Any) -> bool: # pragma: no cover - behavior defined by implementer

779 ...

780

781

782T = TypeVar("T", bound=Comparable) # Generic type variable for elements stored in a SmallPriorityQueue

783

784

785class SmallPriorityQueue(Generic[T]):

786 """A priority queue that can handle updates to the priority of any element that is already contained in the queue, and

787 does so very efficiently if there are a small number of elements in the queue (no more than thousands), as is the case

788 for us.

789

790 Could be implemented using a SortedList via https://github.com/grantjenks/python-sortedcontainers or using an indexed

791 priority queue via

792 https://github.com/nvictus/pqdict.

793 But, to avoid an external dependency, is actually implemented

794 using a simple yet effective binary search-based sorted list that can handle updates to the priority of elements that

795 are already contained in the queue, via removal of the element, followed by update of the element, followed by

796 (re)insertion. Duplicate elements (if any) are maintained in their order of insertion relative to other duplicates.

797 """

798

799 def __init__(self, reverse: bool = False) -> None:

800 """Creates an empty queue; sort order flips when ``reverse`` is True."""

801 self._lst: list[T] = []

802 self._reverse: bool = reverse

803

804 def clear(self) -> None:

805 """Removes all elements from the queue."""

806 self._lst.clear()

807

808 def push(self, element: T) -> None:

809 """Inserts ``element`` while maintaining sorted order."""

810 bisect.insort(self._lst, element)

811

812 def pop(self) -> T:

813 """Removes and returns the smallest (or largest if reverse == True) element from the queue."""

814 return self._lst.pop() if self._reverse else self._lst.pop(0)

815

816 def peek(self) -> T:

817 """Returns the smallest (or largest if reverse == True) element without removing it."""

818 return self._lst[-1] if self._reverse else self._lst[0]

819

820 def remove(self, element: T) -> bool:

821 """Removes the first occurrence of ``element`` and returns True if it was present."""

822 lst = self._lst

823 i = bisect.bisect_left(lst, element)

824 is_contained = i < len(lst) and lst[i] == element

825 if is_contained:

826 del lst[i] # is an optimized memmove()

827 return is_contained

828

829 def __len__(self) -> int:

830 """Returns the number of queued elements."""

831 return len(self._lst)

832

833 def __contains__(self, element: T) -> bool:

834 """Returns ``True`` if ``element`` is present."""

835 lst = self._lst

836 i = bisect.bisect_left(lst, element)

837 return i < len(lst) and lst[i] == element

838

839 def __iter__(self) -> Iterator[T]:

840 """Iterates over queued elements in priority order."""

841 return reversed(self._lst) if self._reverse else iter(self._lst)

842

843 def __repr__(self) -> str:

844 """Representation showing queue contents in current order."""

845 return repr(list(reversed(self._lst))) if self._reverse else repr(self._lst)

846

847

848###############################################################################

849class SortedInterner(Generic[T]):

850 """Same as sys.intern() except that it isn't global and that it assumes the input list is sorted (for binary search)."""

851

852 def __init__(self, sorted_list: list[T]) -> None:

853 self._lst: list[T] = sorted_list

854

855 def interned(self, element: T) -> T:

856 """Returns the interned item if an equal item is contained, else returns the non-interned item."""

857 lst = self._lst

858 i = binary_search(lst, element)

859 return lst[i] if i >= 0 else element

860

861 def __contains__(self, element: T) -> bool:

862 """Returns ``True`` if ``element`` is present."""

863 return binary_search(self._lst, element) >= 0

864

865

866def binary_search(sorted_list: list[T], item: T) -> int:

867 """Java-style binary search; Returns index >=0 if an equal item is found in list, else '-insertion_point-1'; If it

868 returns index >=0, the index will be the left-most index in case multiple such equal items are contained."""

869 i = bisect.bisect_left(sorted_list, item)

870 return i if i < len(sorted_list) and sorted_list[i] == item else -i - 1

871

872

873#############################################################################

874class SynchronizedBool:

875 """Thread-safe wrapper around a regular bool."""

876

877 def __init__(self, val: bool) -> None:

878 assert isinstance(val, bool)

879 self._lock: threading.Lock = threading.Lock()

880 self._value: bool = val

881

882 @property

883 def value(self) -> bool:

884 """Returns the current boolean value."""

885 with self._lock:

886 return self._value

887

888 @value.setter

889 def value(self, new_value: bool) -> None:

890 """Atomically assign ``new_value``."""

891 with self._lock:

892 self._value = new_value

893

894 def get_and_set(self, new_value: bool) -> bool:

895 """Swaps in ``new_value`` and return the previous value."""

896 with self._lock:

897 old_value = self._value

898 self._value = new_value

899 return old_value

900

901 def compare_and_set(self, expected_value: bool, new_value: bool) -> bool:

902 """Sets to ``new_value`` only if current value equals ``expected_value``."""

903 with self._lock:

904 eq: bool = self._value == expected_value

905 if eq:

906 self._value = new_value

907 return eq

908

909 def __bool__(self) -> bool:

910 return self.value

911

912 def __repr__(self) -> str:

913 return repr(self.value)

914

915 def __str__(self) -> str:

916 return str(self.value)

917

918

919#############################################################################

920K = TypeVar("K")

921V = TypeVar("V")

922

923

924class SynchronizedDict(Generic[K, V]):

925 """Thread-safe wrapper around a regular dict."""

926

927 def __init__(self, val: dict[K, V]) -> None:

928 assert isinstance(val, dict)

929 self._lock: threading.Lock = threading.Lock()

930 self._dict: dict[K, V] = val

931

932 def __getitem__(self, key: K) -> V:

933 with self._lock:

934 return self._dict[key]

935

936 def __setitem__(self, key: K, value: V) -> None:

937 with self._lock:

938 self._dict[key] = value

939

940 def __delitem__(self, key: K) -> None:

941 with self._lock:

942 self._dict.pop(key)

943

944 def __contains__(self, key: K) -> bool:

945 with self._lock:

946 return key in self._dict

947

948 def __len__(self) -> int:

949 with self._lock:

950 return len(self._dict)

951

952 def __repr__(self) -> str:

953 with self._lock:

954 return repr(self._dict)

955

956 def __str__(self) -> str:

957 with self._lock:

958 return str(self._dict)

959

960 def get(self, key: K, default: V | None = None) -> V | None:

961 """Returns ``self[key]`` or ``default`` if missing."""

962 with self._lock:

963 return self._dict.get(key, default)

964

965 def pop(self, key: K, default: V | None = None) -> V | None:

966 """Removes ``key`` and returns its value."""

967 with self._lock:

968 return self._dict.pop(key, default)

969

970 def clear(self) -> None:

971 """Removes all items atomically."""

972 with self._lock:

973 self._dict.clear()

974

975 def items(self) -> ItemsView[K, V]:

976 """Returns a snapshot of dictionary items."""

977 with self._lock:

978 return self._dict.copy().items()

979

980

981#############################################################################

982class InterruptibleSleep:

983 """Provides a sleep(timeout) function that can be interrupted by another thread."""

984

985 def __init__(self, lock: threading.Lock | None = None) -> None:

986 self.is_stopping: bool = False

987 self._lock: threading.Lock = lock if lock is not None else threading.Lock()

988 self._condition: threading.Condition = threading.Condition(self._lock)

989

990 def sleep(self, duration_nanos: int) -> bool:

991 """Delays the current thread by the given number of nanoseconds."""

992 end_time_nanos: int = time.monotonic_ns() + duration_nanos

993 with self._lock:

994 while not self.is_stopping:

995 diff_nanos: int = end_time_nanos - time.monotonic_ns()

996 if diff_nanos <= 0:

997 return False

998 self._condition.wait(timeout=diff_nanos / 1_000_000_000) # release, then block until notified or timeout

999 return True

1000

1001 def interrupt(self) -> None:

1002 """Wakes up currently sleeping threads and makes any future sleep()s a noop."""

1003 with self._lock:

1004 if not self.is_stopping:

1005 self.is_stopping = True

1006 self._condition.notify_all()

1007

1008

1009#############################################################################

1010class _XFinally(contextlib.AbstractContextManager):

1011 """Context manager ensuring cleanup code executes after ``with`` blocks."""

1012

1013 def __init__(self, cleanup: Callable[[], None]) -> None:

1014 """Records the callable to run upon exit."""

1015 self._cleanup = cleanup # Zero-argument callable executed after the `with` block exits.

1016

1017 def __exit__( # type: ignore[exit-return] # need to ignore on python <= 3.8

1018 self, exc_type: type[BaseException] | None, exc: BaseException | None, tb: types.TracebackType | None

1019 ) -> bool:

1020 """Runs cleanup and propagate any exceptions appropriately."""

1021 try:

1022 self._cleanup()

1023 except BaseException as cleanup_exc:

1024 if exc is None:

1025 raise # No main error --> propagate cleanup error normally

1026 # Both failed

1027 # if sys.version_info >= (3, 11):

1028 # raise ExceptionGroup("main error and cleanup error", [exc, cleanup_exc]) from None

1029 # <= 3.10: attach so it shows up in traceback but doesn't mask

1030 exc.__context__ = cleanup_exc

1031 return False # reraise original exception

1032 return False # propagate main exception if any

1033

1034

1035def xfinally(cleanup: Callable[[], None]) -> _XFinally:

1036 """Usage: with xfinally(lambda: cleanup()): ...

1037 Returns a context manager that guarantees that cleanup() runs on exit and guarantees any error in cleanup() will never

1038 mask an exception raised earlier inside the body of the `with` block, while still surfacing both problems when possible.

1039

1040 Problem it solves

1041 -----------------

1042 A naive ``try ... finally`` may lose the original exception:

1043

1044 try:

1045 work()

1046 finally:

1047 cleanup() # <-- if this raises an exception, it replaces the real error!

1048

1049 `_XFinally` preserves exception priority:

1050

1051 * Body raises, cleanup succeeds --> original body exception is re-raised.

1052 * Body raises, cleanup also raises --> re-raises body exception; cleanup exception is linked via ``__context__``.

1053 * Body succeeds, cleanup raises --> cleanup exception propagates normally.

1054

1055 Example:

1056 -------

1057 >>> with xfinally(reset_logger): # doctest: +SKIP

1058 ... run_tasks()

1059

1060 The single *with* line replaces verbose ``try/except/finally`` boilerplate while preserving full error information.

1061 """

1062 return _XFinally(cleanup)