Coverage for bzfs_main/incremental_send_steps.py: 100%

46 statements  

« prev     ^ index     » next       coverage.py v7.10.2, created at 2025-08-06 13:30 +0000

1# Copyright 2024 Wolfgang Hoschek AT mac DOT com 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# 

15"""Computes efficient incremental ZFS send/receive steps. 

16 

17This helper derives a minimal sequence of 'zfs send' commands from a list of snapshots and GUIDs to include. It favors fewer 

18send steps for performance and works around resume and bookmark limitations. 

19""" 

20 

21from __future__ import annotations 

22 

23 

24def incremental_send_steps( 

25 src_snapshots: list[str], 

26 src_guids: list[str], 

27 included_guids: set[str], 

28 is_resume: bool, 

29 force_convert_I_to_i: bool, # noqa: N803 

30) -> list[tuple[str, str, str, list[str]]]: 

31 """Computes steps to incrementally replicate the given src snapshots with the given src_guids such that we 

32 include intermediate src snapshots that pass the policy specified by --{include,exclude}-snapshot-* 

33 (represented here by included_guids), using an optimal series of -i/-I send/receive steps that skip 

34 excluded src snapshots. The steps are optimal in the sense that no solution with fewer steps exists. A step 

35 corresponds to a single ZFS send/receive operation. Fewer steps translate to better performance, especially 

36 when sending many small snapshots. For example, 1 step that sends 100 small snapshots in a single operation is 

37 much faster than 100 steps that each send only 1 such snapshot per ZFS send/receive operation. 

38 Example: skip hourly snapshots and only include daily snapshots for replication 

39 Example: [d1, h1, d2, d3, d4] (d is daily, h is hourly) --> [d1, d2, d3, d4] via 

40 -i d1:d2 (i.e. exclude h1; '-i' and ':' indicate 'skip intermediate snapshots') 

41 -I d2-d4 (i.e. also include d3; '-I' and '-' indicate 'include intermediate snapshots') 

42 * The force_convert_I_to_i param is necessary as a work-around for https://github.com/openzfs/zfs/issues/16394 

43 * The 'zfs send' CLI with a bookmark as starting snapshot does not (yet) support including intermediate 

44 src_snapshots via -I flag per https://github.com/openzfs/zfs/issues/12415. Thus, if the replication source 

45 is a bookmark we convert a -I step to a -i step followed by zero or more -i/-I steps. 

46 * The is_resume param is necessary as 'zfs send -t' does not support sending more than a single snapshot 

47 on resuming a previously interrupted 'zfs receive -s'. Thus, here too, we convert a -I step to a -i step 

48 followed by zero or more -i/-I steps.""" 

49 

50 def append_run(i: int, label: str) -> int: 

51 """Appends a run of snapshots as one or more send steps.""" 

52 # step = ("-I", src_snapshots[start], src_snapshots[i], i - start) 

53 # print(f"{label} {self.send_step_to_str(step)}") 

54 is_not_resume: bool = len(steps) > 0 or not is_resume 

55 if i - start > 1 and (not force_convert_I_to_i) and "@" in src_snapshots[start] and is_not_resume: 

56 steps.append(("-I", src_snapshots[start], src_snapshots[i], src_snapshots[start + 1 : i + 1])) 

57 elif "@" in src_snapshots[start] and is_not_resume: 

58 for j in range(start, i): # convert -I step to -i steps 

59 steps.append(("-i", src_snapshots[j], src_snapshots[j + 1], src_snapshots[j + 1 : j + 2])) 

60 else: # it's a bookmark src or zfs send -t; convert -I step to -i step followed by zero or more -i/-I steps 

61 steps.append(("-i", src_snapshots[start], src_snapshots[start + 1], src_snapshots[start + 1 : start + 2])) 

62 i = start + 1 

63 return i - 1 

64 

65 assert len(src_guids) == len(src_snapshots) 

66 assert len(included_guids) >= 0 

67 steps = [] 

68 guids: list[str] = src_guids 

69 n = len(guids) 

70 i = 0 

71 while i < n and guids[i] not in included_guids: # skip hourlies 

72 i += 1 

73 

74 while i < n: 

75 assert guids[i] in included_guids # it's a daily 

76 start = i 

77 i += 1 

78 while i < n and guids[i] in included_guids: # skip dailies 

79 i += 1 

80 if i < n: 

81 if i - start == 1: 

82 # it's a single daily (that was already replicated) followed by an hourly 

83 i += 1 

84 while i < n and guids[i] not in included_guids: # skip hourlies 

85 i += 1 

86 if i < n: 

87 assert start != i 

88 step = ("-i", src_snapshots[start], src_snapshots[i], src_snapshots[i : i + 1]) 

89 # print(f"r1 {self.send_step_to_str(step)}") 

90 steps.append(step) 

91 i -= 1 

92 else: # it's a run of more than one daily 

93 i -= 1 

94 assert start != i 

95 i = append_run(i, "r2") 

96 else: # finish up run of trailing dailies 

97 i -= 1 

98 if start != i: 

99 i = append_run(i, "r3") 

100 i += 1 

101 return steps 

102 

103 

104def send_step_to_str(step: tuple[str, str, str]) -> str: 

105 """Returns a readable representation of an incremental send step.""" 

106 # return str(step[1]) + ('-' if step[0] == '-I' else ':') + str(step[2]) 

107 return str(step)