Coverage for bzfs_main/incremental_send_steps.py: 100%
46 statements
« prev ^ index » next coverage.py v7.10.2, created at 2025-08-06 13:30 +0000
« prev ^ index » next coverage.py v7.10.2, created at 2025-08-06 13:30 +0000
1# Copyright 2024 Wolfgang Hoschek AT mac DOT com
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14#
15"""Computes efficient incremental ZFS send/receive steps.
17This helper derives a minimal sequence of 'zfs send' commands from a list of snapshots and GUIDs to include. It favors fewer
18send steps for performance and works around resume and bookmark limitations.
19"""
21from __future__ import annotations
24def incremental_send_steps(
25 src_snapshots: list[str],
26 src_guids: list[str],
27 included_guids: set[str],
28 is_resume: bool,
29 force_convert_I_to_i: bool, # noqa: N803
30) -> list[tuple[str, str, str, list[str]]]:
31 """Computes steps to incrementally replicate the given src snapshots with the given src_guids such that we
32 include intermediate src snapshots that pass the policy specified by --{include,exclude}-snapshot-*
33 (represented here by included_guids), using an optimal series of -i/-I send/receive steps that skip
34 excluded src snapshots. The steps are optimal in the sense that no solution with fewer steps exists. A step
35 corresponds to a single ZFS send/receive operation. Fewer steps translate to better performance, especially
36 when sending many small snapshots. For example, 1 step that sends 100 small snapshots in a single operation is
37 much faster than 100 steps that each send only 1 such snapshot per ZFS send/receive operation.
38 Example: skip hourly snapshots and only include daily snapshots for replication
39 Example: [d1, h1, d2, d3, d4] (d is daily, h is hourly) --> [d1, d2, d3, d4] via
40 -i d1:d2 (i.e. exclude h1; '-i' and ':' indicate 'skip intermediate snapshots')
41 -I d2-d4 (i.e. also include d3; '-I' and '-' indicate 'include intermediate snapshots')
42 * The force_convert_I_to_i param is necessary as a work-around for https://github.com/openzfs/zfs/issues/16394
43 * The 'zfs send' CLI with a bookmark as starting snapshot does not (yet) support including intermediate
44 src_snapshots via -I flag per https://github.com/openzfs/zfs/issues/12415. Thus, if the replication source
45 is a bookmark we convert a -I step to a -i step followed by zero or more -i/-I steps.
46 * The is_resume param is necessary as 'zfs send -t' does not support sending more than a single snapshot
47 on resuming a previously interrupted 'zfs receive -s'. Thus, here too, we convert a -I step to a -i step
48 followed by zero or more -i/-I steps."""
50 def append_run(i: int, label: str) -> int:
51 """Appends a run of snapshots as one or more send steps."""
52 # step = ("-I", src_snapshots[start], src_snapshots[i], i - start)
53 # print(f"{label} {self.send_step_to_str(step)}")
54 is_not_resume: bool = len(steps) > 0 or not is_resume
55 if i - start > 1 and (not force_convert_I_to_i) and "@" in src_snapshots[start] and is_not_resume:
56 steps.append(("-I", src_snapshots[start], src_snapshots[i], src_snapshots[start + 1 : i + 1]))
57 elif "@" in src_snapshots[start] and is_not_resume:
58 for j in range(start, i): # convert -I step to -i steps
59 steps.append(("-i", src_snapshots[j], src_snapshots[j + 1], src_snapshots[j + 1 : j + 2]))
60 else: # it's a bookmark src or zfs send -t; convert -I step to -i step followed by zero or more -i/-I steps
61 steps.append(("-i", src_snapshots[start], src_snapshots[start + 1], src_snapshots[start + 1 : start + 2]))
62 i = start + 1
63 return i - 1
65 assert len(src_guids) == len(src_snapshots)
66 assert len(included_guids) >= 0
67 steps = []
68 guids: list[str] = src_guids
69 n = len(guids)
70 i = 0
71 while i < n and guids[i] not in included_guids: # skip hourlies
72 i += 1
74 while i < n:
75 assert guids[i] in included_guids # it's a daily
76 start = i
77 i += 1
78 while i < n and guids[i] in included_guids: # skip dailies
79 i += 1
80 if i < n:
81 if i - start == 1:
82 # it's a single daily (that was already replicated) followed by an hourly
83 i += 1
84 while i < n and guids[i] not in included_guids: # skip hourlies
85 i += 1
86 if i < n:
87 assert start != i
88 step = ("-i", src_snapshots[start], src_snapshots[i], src_snapshots[i : i + 1])
89 # print(f"r1 {self.send_step_to_str(step)}")
90 steps.append(step)
91 i -= 1
92 else: # it's a run of more than one daily
93 i -= 1
94 assert start != i
95 i = append_run(i, "r2")
96 else: # finish up run of trailing dailies
97 i -= 1
98 if start != i:
99 i = append_run(i, "r3")
100 i += 1
101 return steps
104def send_step_to_str(step: tuple[str, str, str]) -> str:
105 """Returns a readable representation of an incremental send step."""
106 # return str(step[1]) + ('-' if step[0] == '-I' else ':') + str(step[2])
107 return str(step)