1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10   
 11   
 12   
 13   
 14   
 15   
 16   
 17   
 18   
 19   
 20   
 21   
 22   
 23   
 24   
 25   
 26   
 27   
 28   
 29   
 30   
 31   
 32   
 33   
 34   
 35   
 36   
 37   
 38   
 39  """ 
 40  Provides an extension to split up large files in staging directories. 
 41   
 42  When this extension is executed, it will look through the configured Cedar 
 43  Backup staging directory for files exceeding a specified size limit, and split 
 44  them down into smaller files using the 'split' utility.  Any directory which 
 45  has already been split (as indicated by the C{cback.split} file) will be 
 46  ignored. 
 47   
 48  This extension requires a new configuration section <split> and is intended 
 49  to be run immediately after the standard stage action or immediately before the 
 50  standard store action.  Aside from its own configuration, it requires the 
 51  options and staging configuration sections in the standard Cedar Backup 
 52  configuration file. 
 53   
 54  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
 55  """ 
 56   
 57   
 58   
 59   
 60   
 61   
 62  import os 
 63  import re 
 64  import logging 
 65   
 66   
 67  from CedarBackup2.util import resolveCommand, executeCommand, changeOwnership 
 68  from CedarBackup2.xmlutil import createInputDom, addContainerNode 
 69  from CedarBackup2.xmlutil import readFirstChild 
 70  from CedarBackup2.actions.util import findDailyDirs, writeIndicatorFile, getBackupFiles 
 71  from CedarBackup2.config import ByteQuantity, readByteQuantity, addByteQuantityNode 
 72   
 73   
 74   
 75   
 76   
 77   
 78  logger = logging.getLogger("CedarBackup2.log.extend.split") 
 79   
 80  SPLIT_COMMAND = [ "split", ] 
 81  SPLIT_INDICATOR = "cback.split" 
 89   
 90     """ 
 91     Class representing split configuration. 
 92   
 93     Split configuration is used for splitting staging directories. 
 94   
 95     The following restrictions exist on data in this class: 
 96   
 97        - The size limit must be a ByteQuantity 
 98        - The split size must be a ByteQuantity 
 99   
100     @sort: __init__, __repr__, __str__, __cmp__, sizeLimit, splitSize 
101     """ 
102   
103 -   def __init__(self, sizeLimit=None, splitSize=None): 
 104        """ 
105        Constructor for the C{SplitCOnfig} class. 
106   
107        @param sizeLimit: Size limit of the files, in bytes 
108        @param splitSize: Size that files exceeding the limit will be split into, in bytes 
109   
110        @raise ValueError: If one of the values is invalid. 
111        """ 
112        self._sizeLimit = None 
113        self._splitSize = None 
114        self.sizeLimit = sizeLimit 
115        self.splitSize = splitSize 
 116   
118        """ 
119        Official string representation for class instance. 
120        """ 
121        return "SplitConfig(%s, %s)" % (self.sizeLimit, self.splitSize) 
 122   
124        """ 
125        Informal string representation for class instance. 
126        """ 
127        return self.__repr__() 
 128   
130        """ 
131        Definition of equals operator for this class. 
132        Lists within this class are "unordered" for equality comparisons. 
133        @param other: Other object to compare to. 
134        @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other. 
135        """ 
136        if other is None: 
137           return 1 
138        if self.sizeLimit != other.sizeLimit: 
139           if self.sizeLimit < other.sizeLimit: 
140              return -1 
141           else: 
142              return 1 
143        if self.splitSize != other.splitSize: 
144           if self.splitSize < other.splitSize: 
145              return -1 
146           else: 
147              return 1 
148        return 0 
 149   
151        """ 
152        Property target used to set the size limit. 
153        If not C{None}, the value must be a C{ByteQuantity} object. 
154        @raise ValueError: If the value is not a C{ByteQuantity} 
155        """ 
156        if value is None: 
157           self._sizeLimit = None 
158        else: 
159           if not isinstance(value, ByteQuantity): 
160              raise ValueError("Value must be a C{ByteQuantity} object.") 
161           self._sizeLimit = value 
 162   
164        """ 
165        Property target used to get the size limit. 
166        """ 
167        return self._sizeLimit 
 168   
170        """ 
171        Property target used to set the split size. 
172        If not C{None}, the value must be a C{ByteQuantity} object. 
173        @raise ValueError: If the value is not a C{ByteQuantity} 
174        """ 
175        if value is None: 
176           self._splitSize = None 
177        else: 
178           if not isinstance(value, ByteQuantity): 
179              raise ValueError("Value must be a C{ByteQuantity} object.") 
180           self._splitSize = value 
 181   
183        """ 
184        Property target used to get the split size. 
185        """ 
186        return self._splitSize 
 187   
188     sizeLimit = property(_getSizeLimit, _setSizeLimit, None, doc="Size limit, as a ByteQuantity") 
189     splitSize = property(_getSplitSize, _setSplitSize, None, doc="Split size, as a ByteQuantity") 
 190   
197   
198     """ 
199     Class representing this extension's configuration document. 
200   
201     This is not a general-purpose configuration object like the main Cedar 
202     Backup configuration object.  Instead, it just knows how to parse and emit 
203     split-specific configuration values.  Third parties who need to read and 
204     write configuration related to this extension should access it through the 
205     constructor, C{validate} and C{addConfig} methods. 
206   
207     @note: Lists within this class are "unordered" for equality comparisons. 
208   
209     @sort: __init__, __repr__, __str__, __cmp__, split, validate, addConfig 
210     """ 
211   
212 -   def __init__(self, xmlData=None, xmlPath=None, validate=True): 
 213        """ 
214        Initializes a configuration object. 
215   
216        If you initialize the object without passing either C{xmlData} or 
217        C{xmlPath} then configuration will be empty and will be invalid until it 
218        is filled in properly. 
219   
220        No reference to the original XML data or original path is saved off by 
221        this class.  Once the data has been parsed (successfully or not) this 
222        original information is discarded. 
223   
224        Unless the C{validate} argument is C{False}, the L{LocalConfig.validate} 
225        method will be called (with its default arguments) against configuration 
226        after successfully parsing any passed-in XML.  Keep in mind that even if 
227        C{validate} is C{False}, it might not be possible to parse the passed-in 
228        XML document if lower-level validations fail. 
229   
230        @note: It is strongly suggested that the C{validate} option always be set 
231        to C{True} (the default) unless there is a specific need to read in 
232        invalid configuration from disk.   
233   
234        @param xmlData: XML data representing configuration. 
235        @type xmlData: String data. 
236   
237        @param xmlPath: Path to an XML file on disk. 
238        @type xmlPath: Absolute path to a file on disk. 
239   
240        @param validate: Validate the document after parsing it. 
241        @type validate: Boolean true/false. 
242   
243        @raise ValueError: If both C{xmlData} and C{xmlPath} are passed-in. 
244        @raise ValueError: If the XML data in C{xmlData} or C{xmlPath} cannot be parsed. 
245        @raise ValueError: If the parsed configuration document is not valid. 
246        """ 
247        self._split = None 
248        self.split = None 
249        if xmlData is not None and xmlPath is not None: 
250           raise ValueError("Use either xmlData or xmlPath, but not both.") 
251        if xmlData is not None: 
252           self._parseXmlData(xmlData) 
253           if validate: 
254              self.validate() 
255        elif xmlPath is not None: 
256           xmlData = open(xmlPath).read() 
257           self._parseXmlData(xmlData) 
258           if validate: 
259              self.validate() 
 260   
262        """ 
263        Official string representation for class instance. 
264        """ 
265        return "LocalConfig(%s)" % (self.split) 
 266   
268        """ 
269        Informal string representation for class instance. 
270        """ 
271        return self.__repr__() 
 272   
274        """ 
275        Definition of equals operator for this class. 
276        Lists within this class are "unordered" for equality comparisons. 
277        @param other: Other object to compare to. 
278        @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other. 
279        """ 
280        if other is None: 
281           return 1 
282        if self.split != other.split: 
283           if self.split < other.split: 
284              return -1 
285           else: 
286              return 1 
287        return 0 
 288   
290        """ 
291        Property target used to set the split configuration value. 
292        If not C{None}, the value must be a C{SplitConfig} object. 
293        @raise ValueError: If the value is not a C{SplitConfig} 
294        """ 
295        if value is None: 
296           self._split = None 
297        else: 
298           if not isinstance(value, SplitConfig): 
299              raise ValueError("Value must be a C{SplitConfig} object.") 
300           self._split = value 
 301   
303        """ 
304        Property target used to get the split configuration value. 
305        """ 
306        return self._split 
 307   
308     split = property(_getSplit, _setSplit, None, "Split configuration in terms of a C{SplitConfig} object.") 
309   
311        """ 
312        Validates configuration represented by the object. 
313   
314        Split configuration must be filled in.  Within that, both the size limit 
315        and split size must be filled in. 
316   
317        @raise ValueError: If one of the validations fails. 
318        """ 
319        if self.split is None: 
320           raise ValueError("Split section is required.") 
321        if self.split.sizeLimit is None: 
322           raise ValueError("Size limit must be set.") 
323        if self.split.splitSize is None: 
324           raise ValueError("Split size must be set.") 
 325   
327        """ 
328        Adds a <split> configuration section as the next child of a parent. 
329   
330        Third parties should use this function to write configuration related to 
331        this extension. 
332   
333        We add the following fields to the document:: 
334   
335           sizeLimit      //cb_config/split/size_limit 
336           splitSize      //cb_config/split/split_size 
337   
338        @param xmlDom: DOM tree as from C{impl.createDocument()}. 
339        @param parentNode: Parent that the section should be appended to. 
340        """ 
341        if self.split is not None: 
342           sectionNode = addContainerNode(xmlDom, parentNode, "split") 
343           addByteQuantityNode(xmlDom, sectionNode, "size_limit", self.split.sizeLimit) 
344           addByteQuantityNode(xmlDom, sectionNode, "split_size", self.split.splitSize) 
 345   
347        """ 
348        Internal method to parse an XML string into the object. 
349   
350        This method parses the XML document into a DOM tree (C{xmlDom}) and then 
351        calls a static method to parse the split configuration section. 
352   
353        @param xmlData: XML data to be parsed 
354        @type xmlData: String data 
355   
356        @raise ValueError: If the XML cannot be successfully parsed. 
357        """ 
358        (xmlDom, parentNode) = createInputDom(xmlData) 
359        self._split = LocalConfig._parseSplit(parentNode) 
 360   
361     @staticmethod 
363        """ 
364        Parses an split configuration section. 
365         
366        We read the following individual fields:: 
367   
368           sizeLimit      //cb_config/split/size_limit 
369           splitSize      //cb_config/split/split_size 
370   
371        @param parent: Parent node to search beneath. 
372   
373        @return: C{EncryptConfig} object or C{None} if the section does not exist. 
374        @raise ValueError: If some filled-in value is invalid. 
375        """ 
376        split = None 
377        section = readFirstChild(parent, "split") 
378        if section is not None: 
379           split = SplitConfig() 
380           split.sizeLimit = readByteQuantity(section, "size_limit") 
381           split.splitSize = readByteQuantity(section, "split_size") 
382        return split 
  383   
384   
385   
386   
387   
388   
389   
390   
391   
392   
393 -def executeAction(configPath, options, config): 
 419   
420   
421   
422   
423   
424   
425 -def _splitDailyDir(dailyDir, sizeLimit, splitSize, backupUser, backupGroup): 
 426     """ 
427     Splits large files in a daily staging directory. 
428   
429     Files that match INDICATOR_PATTERNS (i.e. C{"cback.store"}, 
430     C{"cback.stage"}, etc.) are assumed to be indicator files and are ignored. 
431     All other files are split. 
432   
433     @param dailyDir: Daily directory to encrypt 
434     @param sizeLimit: Size limit, in bytes 
435     @param splitSize: Split size, in bytes 
436     @param backupUser: User that target files should be owned by 
437     @param backupGroup: Group that target files should be owned by 
438   
439     @raise ValueError: If the encrypt mode is not supported. 
440     @raise ValueError: If the daily staging directory does not exist. 
441     """ 
442     logger.debug("Begin splitting contents of [%s]." % dailyDir) 
443     fileList = getBackupFiles(dailyDir)   
444     for path in fileList: 
445        size = float(os.stat(path).st_size) 
446        if size > sizeLimit.bytes: 
447           _splitFile(path, splitSize, backupUser, backupGroup, removeSource=True) 
448     logger.debug("Completed splitting contents of [%s]." % dailyDir) 
 449   
450   
451   
452   
453   
454   
455 -def _splitFile(sourcePath, splitSize, backupUser, backupGroup, removeSource=False): 
 456     """ 
457     Splits the source file into chunks of the indicated size. 
458   
459     The split files will be owned by the indicated backup user and group.  If 
460     C{removeSource} is C{True}, then the source file will be removed after it is 
461     successfully split. 
462   
463     @param sourcePath: Absolute path of the source file to split 
464     @param splitSize: Encryption mode (only "gpg" is allowed) 
465     @param backupUser: User that target files should be owned by 
466     @param backupGroup: Group that target files should be owned by 
467     @param removeSource: Indicates whether to remove the source file 
468   
469     @raise IOError: If there is a problem accessing, splitting or removing the source file. 
470     """ 
471     cwd = os.getcwd() 
472     try: 
473        if not os.path.exists(sourcePath): 
474           raise ValueError("Source path [%s] does not exist." % sourcePath) 
475        dirname = os.path.dirname(sourcePath) 
476        filename = os.path.basename(sourcePath) 
477        prefix = "%s_" % filename 
478        bytes = int(splitSize.bytes)  
479        os.chdir(dirname)  
480        command = resolveCommand(SPLIT_COMMAND) 
481        args = [ "--verbose", "--numeric-suffixes", "--suffix-length=5", "--bytes=%d" % bytes, filename, prefix, ] 
482        (result, output) = executeCommand(command, args, returnOutput=True, ignoreStderr=False) 
483        if result != 0: 
484           raise IOError("Error [%d] calling split for [%s]." % (result, sourcePath)) 
485        pattern = re.compile(r"(creating file `)(%s)(.*)(')" % prefix) 
486        match = pattern.search(output[-1:][0]) 
487        if match is None: 
488           raise IOError("Unable to parse output from split command.") 
489        value = int(match.group(3).strip()) 
490        for index in range(0, value): 
491           path = "%s%05d" % (prefix, index) 
492           if not os.path.exists(path): 
493              raise IOError("After call to split, expected file [%s] does not exist." % path) 
494           changeOwnership(path, backupUser, backupGroup) 
495        if removeSource: 
496           if os.path.exists(sourcePath): 
497              try:  
498                 os.remove(sourcePath) 
499                 logger.debug("Completed removing old file [%s]." % sourcePath) 
500              except:  
501                 raise IOError("Failed to remove file [%s] after splitting it." % (sourcePath)) 
502     finally: 
503        os.chdir(cwd) 
 504