Coverage for bzfs_main/incremental_send_steps.py: 100%

50 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-11-07 04:44 +0000

1# Copyright 2024 Wolfgang Hoschek AT mac DOT com 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# 

15"""Computes efficient incremental ZFS send/receive steps. 

16 

17This helper derives a minimal sequence of 'zfs send' commands from a list of snapshots and GUIDs to include. It favors fewer 

18send steps for performance and works around resume and bookmark limitations. 

19""" 

20 

21from __future__ import ( 

22 annotations, 

23) 

24 

25 

26def incremental_send_steps( 

27 src_snapshots: list[str], # [0] = the latest common snapshot (which may be a bookmark), followed by all src snapshots 

28 # (that are not a bookmark) that are more recent than that. 

29 src_guids: list[str], # the guid of each item in src_snapshots 

30 included_guids: set[str], # the guid of each snapshot (not bookmark!) that is included by --include/exclude-snapshot-* 

31 is_resume: bool, 

32 force_convert_I_to_i: bool, # noqa: N803 

33) -> list[tuple[str, str, str, list[str]]]: # the 4th tuple item lists the to_snapshots for that step, aiding trace/logging 

34 """Computes steps to incrementally replicate the given src snapshots with the given src_guids such that we include 

35 intermediate src snapshots that pass the policy specified by --{include,exclude}-snapshot-* (represented here by 

36 included_guids), using an optimal series of -i/-I send/receive steps that skip excluded src snapshots. The steps are 

37 optimal in the sense that no solution with fewer steps exists. A step corresponds to a single ZFS send/receive operation. 

38 Fewer steps translate to better performance, especially when sending many small snapshots. For example, 1 step that sends 

39 100 small snapshots in a single operation is much faster than 100 steps that each send only 1 such snapshot per ZFS 

40 send/receive operation. 

41 

42 Examples that skip hourly snapshots and only include daily snapshots for replication: 

43 

44 Example A where d1 is the latest common snapshot: 

45 src = [d1, h1, d2, d3, d4] (d is daily, h is hourly) --> dst = [d1, d2, d3, d4] via 

46 -i d1:d2 (i.e. exclude h1; '-i' and ':' indicate 'skip intermediate snapshots') 

47 -I d2-d4 (i.e. also include d3; '-I' and '-' indicate 'include intermediate snapshots') 

48 

49 Example B where h0 is the latest common snapshot: 

50 src = [h0, m0, d1, h1, d2, d3, d4] (d is daily, h is hourly) --> dst = [h0, d1, d2, d3, d4] via 

51 -i h0:d1 (i.e. exclude m0; '-i' and ':' indicate 'skip intermediate snapshots') 

52 -i d1:d2 (i.e. exclude h1; '-i' and ':' indicate 'skip intermediate snapshots') 

53 -I d2-d4 (i.e. also include d3; '-I' and '-' indicate 'include intermediate snapshots') 

54 

55 Example C where h0 is the latest common snapshot: 

56 src = [h0, m0] (d is daily, h is hourly) --> dst = [h0] via returning an empty list 

57 

58 * The force_convert_I_to_i param is necessary as a work-around for https://github.com/openzfs/zfs/issues/16394. 

59 * The 'zfs send' CLI with a bookmark as starting snapshot does not (yet) support including intermediate 

60 src_snapshots via -I flag per https://github.com/openzfs/zfs/issues/12415. Thus, if the replication source 

61 is a bookmark we convert a -I step to a -i step followed by zero or more -i/-I steps. 

62 * The is_resume param is necessary as 'zfs send -t' does not support sending more than a single snapshot on 

63 resuming a previously interrupted 'zfs receive -s'; for example, see https://github.com/openzfs/zfs/issues/16764. 

64 Thus, here too, we convert a -I step to a -i step followed by zero or more -i/-I steps. 

65 """ 

66 

67 def append_run(i: int, label: str) -> int: 

68 """Appends a run of snapshots as one or more send steps.""" 

69 # step = ("-I", src_snapshots[start], src_snapshots[i], i - start) 

70 # print(f"{label} {self.send_step_to_str(step)}") 

71 is_not_resume: bool = len(steps) > 0 or not is_resume 

72 if i - start > 1 and (not force_convert_I_to_i) and "@" in src_snapshots[start] and is_not_resume: 

73 steps.append(("-I", src_snapshots[start], src_snapshots[i], src_snapshots[start + 1 : i + 1])) 

74 elif "@" in src_snapshots[start] and is_not_resume: 

75 for j in range(start, i): # convert -I step to -i steps 

76 steps.append(("-i", src_snapshots[j], src_snapshots[j + 1], src_snapshots[j + 1 : j + 2])) 

77 else: # it's a bookmark src or zfs send -t; convert -I step to -i step followed by zero or more -i/-I steps 

78 steps.append(("-i", src_snapshots[start], src_snapshots[start + 1], src_snapshots[start + 1 : start + 2])) 

79 i = start + 1 

80 return i - 1 

81 

82 assert len(src_guids) == len(src_snapshots) 

83 assert isinstance(included_guids, set) 

84 steps = [] 

85 guids: list[str] = src_guids 

86 n = len(guids) 

87 i = 0 

88 start = i 

89 while i < n and guids[i] not in included_guids: # skip hourlies 

90 i += 1 

91 if i < n and i != start: 

92 # the latest common snapshot is an hourly or a bookmark, followed by zero or more hourlies, followed by a daily 

93 step = ("-i", src_snapshots[start], src_snapshots[i], src_snapshots[i : i + 1]) 

94 steps.append(step) 

95 

96 while i < n: 

97 assert guids[i] in included_guids # it's a daily 

98 start = i 

99 i += 1 

100 while i < n and guids[i] in included_guids: # skip dailies 

101 i += 1 

102 if i < n: 

103 if i - start == 1: 

104 # it's a single daily (that was already replicated) followed by an hourly 

105 i += 1 

106 while i < n and guids[i] not in included_guids: # skip hourlies 

107 i += 1 

108 if i < n: 

109 assert start != i 

110 step = ("-i", src_snapshots[start], src_snapshots[i], src_snapshots[i : i + 1]) 

111 # print(f"r1 {self.send_step_to_str(step)}") 

112 steps.append(step) 

113 i -= 1 

114 else: # it's a run of more than one daily 

115 i -= 1 

116 assert start != i 

117 i = append_run(i, "r2") 

118 else: # finish up run of trailing dailies 

119 i -= 1 

120 if start != i: 

121 i = append_run(i, "r3") 

122 i += 1 

123 return steps 

124 

125 

126def send_step_to_str(step: tuple[str, str, str]) -> str: 

127 """Returns a readable representation of an incremental send step.""" 

128 # return str(step[1]) + ('-' if step[0] == '-I' else ':') + str(step[2]) 

129 return str(step)