1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import os
23 import re
24 import stat
25 import sys
26
27 from duplicity.path import *
28 from duplicity import log
29 from duplicity import globals
30 from duplicity import diffdir
31 from duplicity import util
32
33 """Iterate exactly the requested files in a directory
34
35 Parses includes and excludes to yield correct files. More
36 documentation on what this code does can be found on the man page.
37
38 """
39
41 """Some error dealing with the Select class"""
42 pass
43
45 """Signals that a specified file doesn't start with correct prefix"""
46 pass
47
49 """Something has gone wrong when parsing a glob string"""
50 pass
51
52
54 """Iterate appropriate Paths in given directory
55
56 This class acts as an iterator on account of its next() method.
57 Basically, it just goes through all the files in a directory in
58 order (depth-first) and subjects each file to a bunch of tests
59 (selection functions) in order. The first test that includes or
60 excludes the file means that the file gets included (iterated) or
61 excluded. The default is include, so with no tests we would just
62 iterate all the files in the directory in order.
63
64 The one complication to this is that sometimes we don't know
65 whether or not to include a directory until we examine its
66 contents. For instance, if we want to include all the **.py
67 files. If /home/ben/foo.py exists, we should also include /home
68 and /home/ben, but if these directories contain no **.py files,
69 they shouldn't be included. For this reason, a test may not
70 include or exclude a directory, but merely "scan" it. If later a
71 file in the directory gets included, so does the directory.
72
73 As mentioned above, each test takes the form of a selection
74 function. The selection function takes a path, and returns:
75
76 None - means the test has nothing to say about the related file
77 0 - the file is excluded by the test
78 1 - the file is included
79 2 - the test says the file (must be directory) should be scanned
80
81 Also, a selection function f has a variable f.exclude which should
82 be true iff f could potentially exclude some file. This is used
83 to signal an error if the last function only includes, which would
84 be redundant and presumably isn't what the user intends.
85
86 """
87
88 glob_re = re.compile("(.*[*?[]|ignorecase\\:)", re.I | re.S)
89
91 """Initializer, called with Path of root directory"""
92 assert isinstance(path, Path), str(path)
93 self.selection_functions = []
94 self.rootpath = path
95 self.prefix = self.rootpath.name
96
98 """Initialize generator, prepare to iterate."""
99 self.rootpath.setdata()
100 self.iter = self.Iterate(self.rootpath)
101 self.next = self.iter.next
102 self.__iter__ = lambda: self
103 return self
104
106 """Return iterator yielding paths in path
107
108 This function looks a bit more complicated than it needs to be
109 because it avoids extra recursion (and no extra function calls
110 for non-directory files) while still doing the "directory
111 scanning" bit.
112
113 """
114 def error_handler(exc, path, filename):
115 fullpath = os.path.join(path.name, filename)
116 try:
117 mode = os.stat(fullpath)[stat.ST_MODE]
118 if stat.S_ISSOCK(mode):
119 log.Info(_("Skipping socket %s") % fullpath,
120 log.InfoCode.skipping_socket,
121 util.escape(fullpath))
122 else:
123 log.Warn(_("Error initializing file %s") % fullpath,
124 log.WarningCode.cannot_iterate,
125 util.escape(fullpath))
126 except OSError:
127 log.Warn(_("Error accessing possibly locked file %s") % fullpath,
128 log.WarningCode.cannot_stat,
129 util.escape(fullpath))
130 return None
131
132 def diryield(path):
133 """Generate relevant files in directory path
134
135 Returns (path, num) where num == 0 means path should be
136 generated normally, num == 1 means the path is a directory
137 and should be included iff something inside is included.
138
139 """
140
141 from duplicity import robust
142 for filename in robust.listpath(path):
143 new_path = robust.check_common_error(
144 error_handler, Path.append, (path, filename))
145
146 if (new_path and new_path.type in ["reg", "dir"]
147 and not os.access(new_path.name, os.R_OK)):
148 log.Warn(_("Error accessing possibly locked file %s") % new_path.name,
149 log.WarningCode.cannot_read,
150 util.escape(new_path.name))
151 if diffdir.stats:
152 diffdir.stats.Errors +=1
153 new_path = None
154 elif new_path:
155 s = self.Select(new_path)
156 if s == 1:
157 yield (new_path, 0)
158 elif s == 2 and new_path.isdir():
159 yield (new_path, 1)
160
161 if not path.type:
162
163 log.Warn(_("Warning: base %s doesn't exist, continuing") %
164 path.name)
165 return
166 log.Debug(_("Selecting %s") % path.name)
167 yield path
168 if not path.isdir():
169 return
170 diryield_stack = [diryield(path)]
171 delayed_path_stack = []
172
173 while diryield_stack:
174 try:
175 subpath, val = diryield_stack[-1].next()
176 except StopIteration:
177 diryield_stack.pop()
178 if delayed_path_stack:
179 delayed_path_stack.pop()
180 continue
181 if val == 0:
182 if delayed_path_stack:
183 for delayed_path in delayed_path_stack:
184 log.Log(_("Selecting %s") % delayed_path.name, 6)
185 yield delayed_path
186 del delayed_path_stack[:]
187 log.Debug(_("Selecting %s") % subpath.name)
188 yield subpath
189 if subpath.isdir():
190 diryield_stack.append(diryield(subpath))
191 elif val == 1:
192 delayed_path_stack.append(subpath)
193 diryield_stack.append(diryield(subpath))
194
196 """Run through the selection functions and return dominant val 0/1/2"""
197 if not self.selection_functions:
198 return 1
199 scan_pending = False
200 for sf in self.selection_functions[:-1]:
201 result = sf(path)
202 if result is 2:
203 scan_pending = True
204 if result in [0, 1]:
205 return result
206 if scan_pending:
207 return 2
208 sf = self.selection_functions[-1]
209 result = sf(path)
210 if result is not None:
211 return result
212 else:
213 return 1
214
216 """Create selection functions based on list of tuples
217
218 The tuples are created when the initial commandline arguments
219 are read. They have the form (option string, additional
220 argument) except for the filelist tuples, which should be
221 (option-string, (additional argument, filelist_fp)).
222
223 """
224 filelists_index = 0
225 try:
226 for opt, arg in argtuples:
227 if opt == "--exclude":
228 self.add_selection_func(self.glob_get_sf(arg, 0))
229 elif opt == "--exclude-if-present":
230 self.add_selection_func(self.present_get_sf(arg, 0))
231 elif opt == "--exclude-device-files":
232 self.add_selection_func(self.devfiles_get_sf())
233 elif opt == "--exclude-filelist":
234 self.add_selection_func(self.filelist_get_sf(
235 filelists[filelists_index], 0, arg))
236 filelists_index += 1
237 elif opt == "--exclude-globbing-filelist":
238 map(self.add_selection_func,
239 self.filelist_globbing_get_sfs(filelists[filelists_index], 0, arg))
240 filelists_index += 1
241 elif opt == "--exclude-other-filesystems":
242 self.add_selection_func(self.other_filesystems_get_sf(0))
243 elif opt == "--exclude-regexp":
244 self.add_selection_func(self.regexp_get_sf(arg, 0))
245 elif opt == "--include":
246 self.add_selection_func(self.glob_get_sf(arg, 1))
247 elif opt == "--include-filelist":
248 self.add_selection_func(self.filelist_get_sf(
249 filelists[filelists_index], 1, arg))
250 filelists_index += 1
251 elif opt == "--include-globbing-filelist":
252 map(self.add_selection_func,
253 self.filelist_globbing_get_sfs(filelists[filelists_index], 1, arg))
254 filelists_index += 1
255 elif opt == "--include-regexp":
256 self.add_selection_func(self.regexp_get_sf(arg, 1))
257 else:
258 assert 0, "Bad selection option %s" % opt
259 except SelectError, e:
260 self.parse_catch_error(e)
261 assert filelists_index == len(filelists)
262 self.parse_last_excludes()
263
265 """Deal with selection error exc"""
266 if isinstance(exc, FilePrefixError):
267 log.FatalError(_(
268 """Fatal Error: The file specification
269 %s
270 cannot match any files in the base directory
271 %s
272 Useful file specifications begin with the base directory or some
273 pattern (such as '**') which matches the base directory.""") %
274 (exc, self.prefix), log.ErrorCode.file_prefix_error)
275 elif isinstance(exc, GlobbingError):
276 log.FatalError(_("Fatal Error while processing expression\n"
277 "%s") % exc, log.ErrorCode.globbing_error)
278 else:
279 raise
280
282 """Exit with error if last selection function isn't an exclude"""
283 if (self.selection_functions and
284 not self.selection_functions[-1].exclude):
285 log.FatalError(_(
286 """Last selection expression:
287 %s
288 only specifies that files be included. Because the default is to
289 include all files, the expression is redundant. Exiting because this
290 probably isn't what you meant.""") %
291 (self.selection_functions[-1].name,),
292 log.ErrorCode.redundant_inclusion)
293
295 """Add another selection function at the end or beginning"""
296 if add_to_start:
297 self.selection_functions.insert(0, sel_func)
298 else:
299 self.selection_functions.append(sel_func)
300
302 """Return selection function by reading list of files
303
304 The format of the filelist is documented in the man page.
305 filelist_fp should be an (open) file object.
306 inc_default should be true if this is an include list,
307 false for an exclude list.
308 filelist_name is just a string used for logging.
309
310 """
311 log.Notice(_("Reading filelist %s") % filelist_name)
312 tuple_list, something_excluded = \
313 self.filelist_read(filelist_fp, inc_default, filelist_name)
314 log.Notice(_("Sorting filelist %s") % filelist_name)
315 tuple_list.sort()
316 i = [0]
317
318 def selection_function(path):
319 while 1:
320 if i[0] >= len(tuple_list):
321 return None
322 include, move_on = \
323 self.filelist_pair_match(path, tuple_list[i[0]])
324 if move_on:
325 i[0] += 1
326 if include is None:
327 continue
328 return include
329
330 selection_function.exclude = something_excluded or inc_default == 0
331 selection_function.name = "Filelist: " + filelist_name
332 return selection_function
333
335 """Read filelist from fp, return (tuplelist, something_excluded)"""
336 prefix_warnings = [0]
337 def incr_warnings(exc):
338 """Warn if prefix is incorrect"""
339 prefix_warnings[0] += 1
340 if prefix_warnings[0] < 6:
341 log.Warn(_("Warning: file specification '%s' in filelist %s\n"
342 "doesn't start with correct prefix %s. Ignoring.") %
343 (exc, filelist_name, self.prefix))
344 if prefix_warnings[0] == 5:
345 log.Warn(_("Future prefix errors will not be logged."))
346
347 something_excluded, tuple_list = None, []
348 separator = globals.null_separator and "\0" or "\n"
349 for line in filelist_fp.read().split(separator):
350 if not line:
351 continue
352 try:
353 tuple = self.filelist_parse_line(line, include)
354 except FilePrefixError, exc:
355 incr_warnings(exc)
356 continue
357 tuple_list.append(tuple)
358 if not tuple[1]:
359 something_excluded = 1
360 if filelist_fp not in (sys.stdin,) and filelist_fp.close():
361 log.Warn(_("Error closing filelist %s") % filelist_name)
362 return (tuple_list, something_excluded)
363
365 """Parse a single line of a filelist, returning a pair
366
367 pair will be of form (index, include), where index is another
368 tuple, and include is 1 if the line specifies that we are
369 including a file. The default is given as an argument.
370 prefix is the string that the index is relative to.
371
372 """
373 line = line.strip()
374 if line[:2] == "+ ":
375
376 include = 1
377 line = line[2:]
378 elif line[:2] == "- ":
379 include = 0
380 line = line[2:]
381
382 if not line.startswith(self.prefix):
383 raise FilePrefixError(line)
384 line = line[len(self.prefix):]
385 index = tuple(filter(lambda x: x, line.split("/")))
386 return (index, include)
387
389 """Matches a filelist tuple against a path
390
391 Returns a pair (include, move_on). include is None if the
392 tuple doesn't match either way, and 0/1 if the tuple excludes
393 or includes the path.
394
395 move_on is true if the tuple cannot match a later index, and
396 so we should move on to the next tuple in the index.
397
398 """
399 index, include = pair
400 if include == 1:
401 if index < path.index:
402 return (None, True)
403 if index == path.index:
404 return (1, True)
405 elif index[:len(path.index)] == path.index:
406 return (1, False)
407 else:
408 return (None, False)
409 elif include == 0:
410 if path.index[:len(index)] == index:
411 return (0, False)
412 elif index < path.index:
413 return (None, True)
414 else:
415 return (None, False)
416 else:
417 assert 0, "Include is %s, should be 0 or 1" % (include,)
418
420 """Return list of selection functions by reading fileobj
421
422 filelist_fp should be an open file object
423 inc_default is true iff this is an include list
424 list_name is just the name of the list, used for logging
425 See the man page on --[include/exclude]-globbing-filelist
426
427 """
428 log.Notice(_("Reading globbing filelist %s") % list_name)
429 separator = globals.null_separator and "\0" or "\n"
430 for line in filelist_fp.read().split(separator):
431 if not line:
432 continue
433 if line[0] == "#":
434 continue
435 if line[:2] == "+ ":
436 yield self.glob_get_sf(line[2:], 1)
437 elif line[:2] == "- ":
438 yield self.glob_get_sf(line[2:], 0)
439 else:
440 yield self.glob_get_sf(line, inc_default)
441
443 """Return selection function matching files on other filesystems"""
444 assert include == 0 or include == 1
445 root_devloc = self.rootpath.getdevloc()
446 def sel_func(path):
447 if path.exists() and path.getdevloc() != root_devloc:
448 return include
449 else:
450 return None
451 sel_func.exclude = not include
452 sel_func.name = "Match other filesystems"
453 return sel_func
454
456 """Return selection function given by regexp_string"""
457 assert include == 0 or include == 1
458 try:
459 regexp = re.compile(regexp_string)
460 except Exception:
461 log.Warn(_("Error compiling regular expression %s") % regexp_string)
462 raise
463
464 def sel_func(path):
465 if regexp.search(path.name):
466 return include
467 else:
468 return None
469
470 sel_func.exclude = not include
471 sel_func.name = "Regular expression: %s" % regexp_string
472 return sel_func
473
475 """Return a selection function to exclude all dev files"""
476 if self.selection_functions:
477 log.Warn(_("Warning: exclude-device-files is not the first "
478 "selector.\nThis may not be what you intended"))
479 def sel_func(path):
480 if path.isdev():
481 return 0
482 else:
483 return None
484 sel_func.exclude = 1
485 sel_func.name = "Exclude device files"
486 return sel_func
487
489 """Return selection function given by glob string"""
490 assert include == 0 or include == 1
491 if glob_str == "**":
492 sel_func = lambda path: include
493 elif not self.glob_re.match(glob_str):
494
495 sel_func = self.glob_get_filename_sf(glob_str, include)
496 else:
497 sel_func = self.glob_get_normal_sf(glob_str, include)
498
499 sel_func.exclude = not include
500 sel_func.name = "Command-line %s glob: %s" % \
501 (include and "include" or "exclude", glob_str)
502 return sel_func
503
505 """Return selection function given by existence of a file in a directory"""
506 assert include == 0 or include == 1
507
508 def exclude_sel_func(path):
509 if path.append(filename).exists():
510 return 0
511 else:
512 return None
513
514 if include == 0:
515 sel_func = exclude_sel_func
516 else:
517 log.FatalError("--include-if-present not implemented (would it make sense?).",
518 log.ErrorCode.not_implemented)
519
520 sel_func.exclude = not include
521 sel_func.name = "Command-line %s filename: %s" % \
522 (include and "include-if-present" or "exclude-if-present", filename)
523 return sel_func
524
526 """Get a selection function given a normal filename
527
528 Some of the parsing is better explained in
529 filelist_parse_line. The reason this is split from normal
530 globbing is things are a lot less complicated if no special
531 globbing characters are used.
532
533 """
534 if not filename.startswith(self.prefix):
535 raise FilePrefixError(filename)
536 index = tuple(filter(lambda x: x,
537 filename[len(self.prefix):].split("/")))
538 return self.glob_get_tuple_sf(index, include)
539
541 """Return selection function based on tuple"""
542 def include_sel_func(path):
543 if (path.index == tuple[:len(path.index)] or
544 path.index[:len(tuple)] == tuple):
545 return 1
546 else:
547 return None
548
549 def exclude_sel_func(path):
550 if path.index[:len(tuple)] == tuple:
551 return 0
552 else:
553 return None
554
555 if include == 1:
556 sel_func = include_sel_func
557 elif include == 0:
558 sel_func = exclude_sel_func
559 sel_func.exclude = not include
560 sel_func.name = "Tuple select %s" % (tuple,)
561 return sel_func
562
564 """Return selection function based on glob_str
565
566 The basic idea is to turn glob_str into a regular expression,
567 and just use the normal regular expression. There is a
568 complication because the selection function should return '2'
569 (scan) for directories which may contain a file which matches
570 the glob_str. So we break up the glob string into parts, and
571 any file which matches an initial sequence of glob parts gets
572 scanned.
573
574 Thanks to Donovan Baarda who provided some code which did some
575 things similar to this.
576
577 """
578 if glob_str.lower().startswith("ignorecase:"):
579 re_comp = lambda r: re.compile(r, re.I | re.S)
580 glob_str = glob_str[len("ignorecase:"):]
581 else:
582 re_comp = lambda r: re.compile(r, re.S)
583
584
585 glob_comp_re = re_comp("^%s($|/)" % self.glob_to_re(glob_str))
586
587 if glob_str.find("**") != -1:
588 glob_str = glob_str[:glob_str.find("**")+2]
589
590 scan_comp_re = re_comp("^(%s)$" %
591 "|".join(self.glob_get_prefix_res(glob_str)))
592
593 def include_sel_func(path):
594 if glob_comp_re.match(path.name):
595 return 1
596 elif scan_comp_re.match(path.name):
597 return 2
598 else:
599 return None
600
601 def exclude_sel_func(path):
602 if glob_comp_re.match(path.name):
603 return 0
604 else:
605 return None
606
607
608 if not include_sel_func(self.rootpath):
609 raise FilePrefixError(glob_str)
610
611 if include:
612 return include_sel_func
613 else:
614 return exclude_sel_func
615
617 """Return list of regexps equivalent to prefixes of glob_str"""
618 glob_parts = glob_str.split("/")
619 if "" in glob_parts[1:-1]:
620
621 raise GlobbingError("Consecutive '/'s found in globbing string "
622 + glob_str)
623
624 prefixes = map(lambda i: "/".join(glob_parts[:i+1]),
625 range(len(glob_parts)))
626
627 if prefixes[0] == "":
628 prefixes[0] = "/"
629 return map(self.glob_to_re, prefixes)
630
632 """Returned regular expression equivalent to shell glob pat
633
634 Currently only the ?, *, [], and ** expressions are supported.
635 Ranges like [a-z] are also currently unsupported. There is no
636 way to quote these special characters.
637
638 This function taken with minor modifications from efnmatch.py
639 by Donovan Baarda.
640
641 """
642 i, n, res = 0, len(pat), ''
643 while i < n:
644 c, s = pat[i], pat[i:i+2]
645 i = i+1
646 if s == '**':
647 res = res + '.*'
648 i = i + 1
649 elif c == '*':
650 res = res + '[^/]*'
651 elif c == '?':
652 res = res + '[^/]'
653 elif c == '[':
654 j = i
655 if j < n and pat[j] in '!^':
656 j = j+1
657 if j < n and pat[j] == ']':
658 j = j+1
659 while j < n and pat[j] != ']':
660 j = j+1
661 if j >= n:
662 res = res + '\\['
663 else:
664
665 stuff = pat[i:j].replace('\\','\\\\')
666 i = j+1
667 if stuff[0] in '!^':
668 stuff = '^' + stuff[1:]
669 res = res + '[' + stuff + ']'
670 else:
671 res = res + re.escape(c)
672 return res
673