Package duplicity :: Module selection
[hide private]
[frames] | no frames]

Source Code for Module duplicity.selection

  1  # -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*- 
  2  # 
  3  # Copyright 2002 Ben Escoto <ben@emerose.org> 
  4  # Copyright 2007 Kenneth Loafman <kenneth@loafman.com> 
  5  # 
  6  # This file is part of duplicity. 
  7  # 
  8  # Duplicity is free software; you can redistribute it and/or modify it 
  9  # under the terms of the GNU General Public License as published by the 
 10  # Free Software Foundation; either version 2 of the License, or (at your 
 11  # option) any later version. 
 12  # 
 13  # Duplicity is distributed in the hope that it will be useful, but 
 14  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
 16  # General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with duplicity; if not, write to the Free Software Foundation, 
 20  # Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
 21   
 22  import os #@UnusedImport 
 23  import re #@UnusedImport 
 24  import stat #@UnusedImport 
 25  import sys 
 26   
 27  from duplicity.path import * #@UnusedWildImport 
 28  from duplicity import log #@Reimport 
 29  from duplicity import globals #@Reimport 
 30  from duplicity import diffdir 
 31  from duplicity import util #@Reimport 
 32   
 33  """Iterate exactly the requested files in a directory 
 34   
 35  Parses includes and excludes to yield correct files.  More 
 36  documentation on what this code does can be found on the man page. 
 37   
 38  """ 
 39   
40 -class SelectError(Exception):
41 """Some error dealing with the Select class""" 42 pass
43
44 -class FilePrefixError(SelectError):
45 """Signals that a specified file doesn't start with correct prefix""" 46 pass
47
48 -class GlobbingError(SelectError):
49 """Something has gone wrong when parsing a glob string""" 50 pass
51 52
53 -class Select:
54 """Iterate appropriate Paths in given directory 55 56 This class acts as an iterator on account of its next() method. 57 Basically, it just goes through all the files in a directory in 58 order (depth-first) and subjects each file to a bunch of tests 59 (selection functions) in order. The first test that includes or 60 excludes the file means that the file gets included (iterated) or 61 excluded. The default is include, so with no tests we would just 62 iterate all the files in the directory in order. 63 64 The one complication to this is that sometimes we don't know 65 whether or not to include a directory until we examine its 66 contents. For instance, if we want to include all the **.py 67 files. If /home/ben/foo.py exists, we should also include /home 68 and /home/ben, but if these directories contain no **.py files, 69 they shouldn't be included. For this reason, a test may not 70 include or exclude a directory, but merely "scan" it. If later a 71 file in the directory gets included, so does the directory. 72 73 As mentioned above, each test takes the form of a selection 74 function. The selection function takes a path, and returns: 75 76 None - means the test has nothing to say about the related file 77 0 - the file is excluded by the test 78 1 - the file is included 79 2 - the test says the file (must be directory) should be scanned 80 81 Also, a selection function f has a variable f.exclude which should 82 be true iff f could potentially exclude some file. This is used 83 to signal an error if the last function only includes, which would 84 be redundant and presumably isn't what the user intends. 85 86 """ 87 # This re should not match normal filenames, but usually just globs 88 glob_re = re.compile("(.*[*?[]|ignorecase\\:)", re.I | re.S) 89
90 - def __init__(self, path):
91 """Initializer, called with Path of root directory""" 92 assert isinstance(path, Path), str(path) 93 self.selection_functions = [] 94 self.rootpath = path 95 self.prefix = self.rootpath.name
96
97 - def set_iter(self):
98 """Initialize generator, prepare to iterate.""" 99 self.rootpath.setdata() # this may have changed since Select init 100 self.iter = self.Iterate(self.rootpath) 101 self.next = self.iter.next 102 self.__iter__ = lambda: self 103 return self
104
105 - def Iterate(self, path):
106 """Return iterator yielding paths in path 107 108 This function looks a bit more complicated than it needs to be 109 because it avoids extra recursion (and no extra function calls 110 for non-directory files) while still doing the "directory 111 scanning" bit. 112 113 """ 114 def error_handler(exc, path, filename): 115 fullpath = os.path.join(path.name, filename) 116 try: 117 mode = os.stat(fullpath)[stat.ST_MODE] 118 if stat.S_ISSOCK(mode): 119 log.Info(_("Skipping socket %s") % fullpath, 120 log.InfoCode.skipping_socket, 121 util.escape(fullpath)) 122 else: 123 log.Warn(_("Error initializing file %s") % fullpath, 124 log.WarningCode.cannot_iterate, 125 util.escape(fullpath)) 126 except OSError: 127 log.Warn(_("Error accessing possibly locked file %s") % fullpath, 128 log.WarningCode.cannot_stat, 129 util.escape(fullpath)) 130 return None
131 132 def diryield(path): 133 """Generate relevant files in directory path 134 135 Returns (path, num) where num == 0 means path should be 136 generated normally, num == 1 means the path is a directory 137 and should be included iff something inside is included. 138 139 """ 140 # todo: get around circular dependency issue by importing here 141 from duplicity import robust #@Reimport 142 for filename in robust.listpath(path): 143 new_path = robust.check_common_error( 144 error_handler, Path.append, (path, filename)) 145 # make sure file is read accessible 146 if (new_path and new_path.type in ["reg", "dir"] 147 and not os.access(new_path.name, os.R_OK)): 148 log.Warn(_("Error accessing possibly locked file %s") % new_path.name, 149 log.WarningCode.cannot_read, 150 util.escape(new_path.name)) 151 if diffdir.stats: 152 diffdir.stats.Errors +=1 153 new_path = None 154 elif new_path: 155 s = self.Select(new_path) 156 if s == 1: 157 yield (new_path, 0) 158 elif s == 2 and new_path.isdir(): 159 yield (new_path, 1)
160 161 if not path.type: 162 # base doesn't exist 163 log.Warn(_("Warning: base %s doesn't exist, continuing") % 164 path.name) 165 return 166 log.Debug(_("Selecting %s") % path.name) 167 yield path 168 if not path.isdir(): 169 return 170 diryield_stack = [diryield(path)] 171 delayed_path_stack = [] 172 173 while diryield_stack: 174 try: 175 subpath, val = diryield_stack[-1].next() 176 except StopIteration: 177 diryield_stack.pop() 178 if delayed_path_stack: 179 delayed_path_stack.pop() 180 continue 181 if val == 0: 182 if delayed_path_stack: 183 for delayed_path in delayed_path_stack: 184 log.Log(_("Selecting %s") % delayed_path.name, 6) 185 yield delayed_path 186 del delayed_path_stack[:] 187 log.Debug(_("Selecting %s") % subpath.name) 188 yield subpath 189 if subpath.isdir(): 190 diryield_stack.append(diryield(subpath)) 191 elif val == 1: 192 delayed_path_stack.append(subpath) 193 diryield_stack.append(diryield(subpath)) 194
195 - def Select(self, path):
196 """Run through the selection functions and return dominant val 0/1/2""" 197 if not self.selection_functions: 198 return 1 199 scan_pending = False 200 for sf in self.selection_functions[:-1]: 201 result = sf(path) 202 if result is 2: 203 scan_pending = True 204 if result in [0, 1]: 205 return result 206 if scan_pending: 207 return 2 208 sf = self.selection_functions[-1] 209 result = sf(path) 210 if result is not None: 211 return result 212 else: 213 return 1
214
215 - def ParseArgs(self, argtuples, filelists):
216 """Create selection functions based on list of tuples 217 218 The tuples are created when the initial commandline arguments 219 are read. They have the form (option string, additional 220 argument) except for the filelist tuples, which should be 221 (option-string, (additional argument, filelist_fp)). 222 223 """ 224 filelists_index = 0 225 try: 226 for opt, arg in argtuples: 227 if opt == "--exclude": 228 self.add_selection_func(self.glob_get_sf(arg, 0)) 229 elif opt == "--exclude-if-present": 230 self.add_selection_func(self.present_get_sf(arg, 0)) 231 elif opt == "--exclude-device-files": 232 self.add_selection_func(self.devfiles_get_sf()) 233 elif opt == "--exclude-filelist": 234 self.add_selection_func(self.filelist_get_sf( 235 filelists[filelists_index], 0, arg)) 236 filelists_index += 1 237 elif opt == "--exclude-globbing-filelist": 238 map(self.add_selection_func, 239 self.filelist_globbing_get_sfs(filelists[filelists_index], 0, arg)) 240 filelists_index += 1 241 elif opt == "--exclude-other-filesystems": 242 self.add_selection_func(self.other_filesystems_get_sf(0)) 243 elif opt == "--exclude-regexp": 244 self.add_selection_func(self.regexp_get_sf(arg, 0)) 245 elif opt == "--include": 246 self.add_selection_func(self.glob_get_sf(arg, 1)) 247 elif opt == "--include-filelist": 248 self.add_selection_func(self.filelist_get_sf( 249 filelists[filelists_index], 1, arg)) 250 filelists_index += 1 251 elif opt == "--include-globbing-filelist": 252 map(self.add_selection_func, 253 self.filelist_globbing_get_sfs(filelists[filelists_index], 1, arg)) 254 filelists_index += 1 255 elif opt == "--include-regexp": 256 self.add_selection_func(self.regexp_get_sf(arg, 1)) 257 else: 258 assert 0, "Bad selection option %s" % opt 259 except SelectError, e: 260 self.parse_catch_error(e) 261 assert filelists_index == len(filelists) 262 self.parse_last_excludes()
263
264 - def parse_catch_error(self, exc):
265 """Deal with selection error exc""" 266 if isinstance(exc, FilePrefixError): 267 log.FatalError(_( 268 """Fatal Error: The file specification 269 %s 270 cannot match any files in the base directory 271 %s 272 Useful file specifications begin with the base directory or some 273 pattern (such as '**') which matches the base directory.""") % 274 (exc, self.prefix), log.ErrorCode.file_prefix_error) 275 elif isinstance(exc, GlobbingError): 276 log.FatalError(_("Fatal Error while processing expression\n" 277 "%s") % exc, log.ErrorCode.globbing_error) 278 else: 279 raise
280
281 - def parse_last_excludes(self):
282 """Exit with error if last selection function isn't an exclude""" 283 if (self.selection_functions and 284 not self.selection_functions[-1].exclude): 285 log.FatalError(_( 286 """Last selection expression: 287 %s 288 only specifies that files be included. Because the default is to 289 include all files, the expression is redundant. Exiting because this 290 probably isn't what you meant.""") % 291 (self.selection_functions[-1].name,), 292 log.ErrorCode.redundant_inclusion)
293
294 - def add_selection_func(self, sel_func, add_to_start = None):
295 """Add another selection function at the end or beginning""" 296 if add_to_start: 297 self.selection_functions.insert(0, sel_func) 298 else: 299 self.selection_functions.append(sel_func)
300
301 - def filelist_get_sf(self, filelist_fp, inc_default, filelist_name):
302 """Return selection function by reading list of files 303 304 The format of the filelist is documented in the man page. 305 filelist_fp should be an (open) file object. 306 inc_default should be true if this is an include list, 307 false for an exclude list. 308 filelist_name is just a string used for logging. 309 310 """ 311 log.Notice(_("Reading filelist %s") % filelist_name) 312 tuple_list, something_excluded = \ 313 self.filelist_read(filelist_fp, inc_default, filelist_name) 314 log.Notice(_("Sorting filelist %s") % filelist_name) 315 tuple_list.sort() 316 i = [0] # We have to put index in list because of stupid scoping rules 317 318 def selection_function(path): 319 while 1: 320 if i[0] >= len(tuple_list): 321 return None 322 include, move_on = \ 323 self.filelist_pair_match(path, tuple_list[i[0]]) 324 if move_on: 325 i[0] += 1 326 if include is None: 327 continue # later line may match 328 return include
329 330 selection_function.exclude = something_excluded or inc_default == 0 331 selection_function.name = "Filelist: " + filelist_name 332 return selection_function 333
334 - def filelist_read(self, filelist_fp, include, filelist_name):
335 """Read filelist from fp, return (tuplelist, something_excluded)""" 336 prefix_warnings = [0] 337 def incr_warnings(exc): 338 """Warn if prefix is incorrect""" 339 prefix_warnings[0] += 1 340 if prefix_warnings[0] < 6: 341 log.Warn(_("Warning: file specification '%s' in filelist %s\n" 342 "doesn't start with correct prefix %s. Ignoring.") % 343 (exc, filelist_name, self.prefix)) 344 if prefix_warnings[0] == 5: 345 log.Warn(_("Future prefix errors will not be logged."))
346 347 something_excluded, tuple_list = None, [] 348 separator = globals.null_separator and "\0" or "\n" 349 for line in filelist_fp.read().split(separator): 350 if not line: 351 continue # skip blanks 352 try: 353 tuple = self.filelist_parse_line(line, include) 354 except FilePrefixError, exc: 355 incr_warnings(exc) 356 continue 357 tuple_list.append(tuple) 358 if not tuple[1]: 359 something_excluded = 1 360 if filelist_fp not in (sys.stdin,) and filelist_fp.close(): 361 log.Warn(_("Error closing filelist %s") % filelist_name) 362 return (tuple_list, something_excluded) 363
364 - def filelist_parse_line(self, line, include):
365 """Parse a single line of a filelist, returning a pair 366 367 pair will be of form (index, include), where index is another 368 tuple, and include is 1 if the line specifies that we are 369 including a file. The default is given as an argument. 370 prefix is the string that the index is relative to. 371 372 """ 373 line = line.strip() 374 if line[:2] == "+ ": 375 # Check for "+ "/"- " syntax 376 include = 1 377 line = line[2:] 378 elif line[:2] == "- ": 379 include = 0 380 line = line[2:] 381 382 if not line.startswith(self.prefix): 383 raise FilePrefixError(line) 384 line = line[len(self.prefix):] # Discard prefix 385 index = tuple(filter(lambda x: x, line.split("/"))) # remove empties 386 return (index, include)
387
388 - def filelist_pair_match(self, path, pair):
389 """Matches a filelist tuple against a path 390 391 Returns a pair (include, move_on). include is None if the 392 tuple doesn't match either way, and 0/1 if the tuple excludes 393 or includes the path. 394 395 move_on is true if the tuple cannot match a later index, and 396 so we should move on to the next tuple in the index. 397 398 """ 399 index, include = pair 400 if include == 1: 401 if index < path.index: 402 return (None, True) 403 if index == path.index: 404 return (1, True) 405 elif index[:len(path.index)] == path.index: 406 return (1, False) # /foo/bar implicitly includes /foo 407 else: 408 return (None, False) # path greater, not initial sequence 409 elif include == 0: 410 if path.index[:len(index)] == index: 411 return (0, False) # /foo implicitly excludes /foo/bar 412 elif index < path.index: 413 return (None, True) 414 else: 415 return (None, False) # path greater, not initial sequence 416 else: 417 assert 0, "Include is %s, should be 0 or 1" % (include,)
418
419 - def filelist_globbing_get_sfs(self, filelist_fp, inc_default, list_name):
420 """Return list of selection functions by reading fileobj 421 422 filelist_fp should be an open file object 423 inc_default is true iff this is an include list 424 list_name is just the name of the list, used for logging 425 See the man page on --[include/exclude]-globbing-filelist 426 427 """ 428 log.Notice(_("Reading globbing filelist %s") % list_name) 429 separator = globals.null_separator and "\0" or "\n" 430 for line in filelist_fp.read().split(separator): 431 if not line: # skip blanks 432 continue 433 if line[0] == "#": # skip comments 434 continue 435 if line[:2] == "+ ": 436 yield self.glob_get_sf(line[2:], 1) 437 elif line[:2] == "- ": 438 yield self.glob_get_sf(line[2:], 0) 439 else: 440 yield self.glob_get_sf(line, inc_default)
441
442 - def other_filesystems_get_sf(self, include):
443 """Return selection function matching files on other filesystems""" 444 assert include == 0 or include == 1 445 root_devloc = self.rootpath.getdevloc() 446 def sel_func(path): 447 if path.exists() and path.getdevloc() != root_devloc: 448 return include 449 else: 450 return None
451 sel_func.exclude = not include 452 sel_func.name = "Match other filesystems" 453 return sel_func 454
455 - def regexp_get_sf(self, regexp_string, include):
456 """Return selection function given by regexp_string""" 457 assert include == 0 or include == 1 458 try: 459 regexp = re.compile(regexp_string) 460 except Exception: 461 log.Warn(_("Error compiling regular expression %s") % regexp_string) 462 raise 463 464 def sel_func(path): 465 if regexp.search(path.name): 466 return include 467 else: 468 return None
469 470 sel_func.exclude = not include 471 sel_func.name = "Regular expression: %s" % regexp_string 472 return sel_func 473
474 - def devfiles_get_sf(self):
475 """Return a selection function to exclude all dev files""" 476 if self.selection_functions: 477 log.Warn(_("Warning: exclude-device-files is not the first " 478 "selector.\nThis may not be what you intended")) 479 def sel_func(path): 480 if path.isdev(): 481 return 0 482 else: 483 return None
484 sel_func.exclude = 1 485 sel_func.name = "Exclude device files" 486 return sel_func 487
488 - def glob_get_sf(self, glob_str, include):
489 """Return selection function given by glob string""" 490 assert include == 0 or include == 1 491 if glob_str == "**": 492 sel_func = lambda path: include 493 elif not self.glob_re.match(glob_str): 494 # normal file 495 sel_func = self.glob_get_filename_sf(glob_str, include) 496 else: 497 sel_func = self.glob_get_normal_sf(glob_str, include) 498 499 sel_func.exclude = not include 500 sel_func.name = "Command-line %s glob: %s" % \ 501 (include and "include" or "exclude", glob_str) 502 return sel_func
503
504 - def present_get_sf(self, filename, include):
505 """Return selection function given by existence of a file in a directory""" 506 assert include == 0 or include == 1 507 508 def exclude_sel_func(path): 509 if path.append(filename).exists(): 510 return 0 511 else: 512 return None
513 514 if include == 0: 515 sel_func = exclude_sel_func 516 else: 517 log.FatalError("--include-if-present not implemented (would it make sense?).", 518 log.ErrorCode.not_implemented) 519 520 sel_func.exclude = not include 521 sel_func.name = "Command-line %s filename: %s" % \ 522 (include and "include-if-present" or "exclude-if-present", filename) 523 return sel_func 524
525 - def glob_get_filename_sf(self, filename, include):
526 """Get a selection function given a normal filename 527 528 Some of the parsing is better explained in 529 filelist_parse_line. The reason this is split from normal 530 globbing is things are a lot less complicated if no special 531 globbing characters are used. 532 533 """ 534 if not filename.startswith(self.prefix): 535 raise FilePrefixError(filename) 536 index = tuple(filter(lambda x: x, 537 filename[len(self.prefix):].split("/"))) 538 return self.glob_get_tuple_sf(index, include)
539
540 - def glob_get_tuple_sf(self, tuple, include):
541 """Return selection function based on tuple""" 542 def include_sel_func(path): 543 if (path.index == tuple[:len(path.index)] or 544 path.index[:len(tuple)] == tuple): 545 return 1 # /foo/bar implicitly matches /foo, vice-versa 546 else: 547 return None
548 549 def exclude_sel_func(path): 550 if path.index[:len(tuple)] == tuple: 551 return 0 # /foo excludes /foo/bar, not vice-versa 552 else: 553 return None 554 555 if include == 1: 556 sel_func = include_sel_func 557 elif include == 0: 558 sel_func = exclude_sel_func 559 sel_func.exclude = not include 560 sel_func.name = "Tuple select %s" % (tuple,) 561 return sel_func 562
563 - def glob_get_normal_sf(self, glob_str, include):
564 """Return selection function based on glob_str 565 566 The basic idea is to turn glob_str into a regular expression, 567 and just use the normal regular expression. There is a 568 complication because the selection function should return '2' 569 (scan) for directories which may contain a file which matches 570 the glob_str. So we break up the glob string into parts, and 571 any file which matches an initial sequence of glob parts gets 572 scanned. 573 574 Thanks to Donovan Baarda who provided some code which did some 575 things similar to this. 576 577 """ 578 if glob_str.lower().startswith("ignorecase:"): 579 re_comp = lambda r: re.compile(r, re.I | re.S) 580 glob_str = glob_str[len("ignorecase:"):] 581 else: 582 re_comp = lambda r: re.compile(r, re.S) 583 584 # matches what glob matches and any files in directory 585 glob_comp_re = re_comp("^%s($|/)" % self.glob_to_re(glob_str)) 586 587 if glob_str.find("**") != -1: 588 glob_str = glob_str[:glob_str.find("**")+2] # truncate after ** 589 590 scan_comp_re = re_comp("^(%s)$" % 591 "|".join(self.glob_get_prefix_res(glob_str))) 592 593 def include_sel_func(path): 594 if glob_comp_re.match(path.name): 595 return 1 596 elif scan_comp_re.match(path.name): 597 return 2 598 else: 599 return None
600 601 def exclude_sel_func(path): 602 if glob_comp_re.match(path.name): 603 return 0 604 else: 605 return None 606 607 # Check to make sure prefix is ok 608 if not include_sel_func(self.rootpath): 609 raise FilePrefixError(glob_str) 610 611 if include: 612 return include_sel_func 613 else: 614 return exclude_sel_func 615
616 - def glob_get_prefix_res(self, glob_str):
617 """Return list of regexps equivalent to prefixes of glob_str""" 618 glob_parts = glob_str.split("/") 619 if "" in glob_parts[1:-1]: 620 # "" OK if comes first or last, as in /foo/ 621 raise GlobbingError("Consecutive '/'s found in globbing string " 622 + glob_str) 623 624 prefixes = map(lambda i: "/".join(glob_parts[:i+1]), 625 range(len(glob_parts))) 626 # we must make exception for root "/", only dir to end in slash 627 if prefixes[0] == "": 628 prefixes[0] = "/" 629 return map(self.glob_to_re, prefixes)
630
631 - def glob_to_re(self, pat):
632 """Returned regular expression equivalent to shell glob pat 633 634 Currently only the ?, *, [], and ** expressions are supported. 635 Ranges like [a-z] are also currently unsupported. There is no 636 way to quote these special characters. 637 638 This function taken with minor modifications from efnmatch.py 639 by Donovan Baarda. 640 641 """ 642 i, n, res = 0, len(pat), '' 643 while i < n: 644 c, s = pat[i], pat[i:i+2] 645 i = i+1 646 if s == '**': 647 res = res + '.*' 648 i = i + 1 649 elif c == '*': 650 res = res + '[^/]*' 651 elif c == '?': 652 res = res + '[^/]' 653 elif c == '[': 654 j = i 655 if j < n and pat[j] in '!^': 656 j = j+1 657 if j < n and pat[j] == ']': 658 j = j+1 659 while j < n and pat[j] != ']': 660 j = j+1 661 if j >= n: 662 res = res + '\\[' # interpret the [ literally 663 else: 664 # Deal with inside of [..] 665 stuff = pat[i:j].replace('\\','\\\\') 666 i = j+1 667 if stuff[0] in '!^': 668 stuff = '^' + stuff[1:] 669 res = res + '[' + stuff + ']' 670 else: 671 res = res + re.escape(c) 672 return res
673