############################################################################## # # Zope Public License (ZPL) Version 1.0 # ------------------------------------- # # Copyright (c) Digital Creations. All rights reserved. # # This license has been certified as Open Source(tm). # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # 1. Redistributions in source code must retain the above copyright # notice, this list of conditions, and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # 3. Digital Creations requests that attribution be given to Zope # in any manner possible. Zope includes a "Powered by Zope" # button that is installed by default. While it is not a license # violation to remove this button, it is requested that the # attribution remain. A significant investment has been put # into Zope, and this effort will continue if the Zope community # continues to grow. This is one way to assure that growth. # # 4. All advertising materials and documentation mentioning # features derived from or use of this software must display # the following acknowledgement: # # "This product includes software developed by Digital Creations # for use in the Z Object Publishing Environment # (http://www.zope.org/)." # # In the event that the product being advertised includes an # intact Zope distribution (with copyright and license included) # then this clause is waived. # # 5. Names associated with Zope or Digital Creations must not be used to # endorse or promote products derived from this software without # prior written permission from Digital Creations. # # 6. Modified redistributions of any form whatsoever must retain # the following acknowledgment: # # "This product includes software developed by Digital Creations # for use in the Z Object Publishing Environment # (http://www.zope.org/)." # # Intact (re-)distributions of any official Zope release do not # require an external acknowledgement. # # 7. Modifications are encouraged but must be packaged separately as # patches to official Zope releases. Distributions that do not # clearly separate the patches from the original work must be clearly # labeled as unofficial distributions. Modifications which do not # carry the name Zope may be packaged in any form, as long as they # conform to all of the clauses above. # # # Disclaimer # # THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY # EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # # This software consists of contributions made by Digital Creations and # many individuals on behalf of Digital Creations. Specific # attributions are listed in the accompanying credits file. # ############################################################################## ''' PartitionedFile module $Id$ Allows you to work with multiple files as if they were a single file. The resulting logical file can be arbitrarily large or span multiple physical devices. This is one way to overcome 2 GB file size limitations. Use at your own risk! ''' from os.path import getsize, exists import os, string try: from posix import fsync posixfsync = fsync except: posixfsync=None class DefaultPartitionNamingStrategy: ''' Defines a strategy for naming the successive partitions in a partitioned file. Use these instead of open() and the os.* operations. ''' def __init__(self, partlen=2L**29): self.partlen = partlen def getPartitionName(self, filename, fileno): ''' Computes a partition filename from the base filename and the partition number. ''' # Override to use a different naming strategy. if fileno == 0: return filename else: return '%s.%d' % (filename, fileno) def open(self, name, mode='r', bufsize=0): return PartitionedFile(self, name, mode, bufsize, self.partlen) def remove(self, filename): '''Removes each partition.''' fileno = 0 while 1: name = self.getPartitionName(filename, fileno) if exists(name): os.remove(name) fileno = fileno + 1 else: break unlink = remove def rename(self, oldname, newname): '''Renames (or moves) each partition.''' fileno = 0 while 1: oname = self.getPartitionName(oldname, fileno) if exists(oname): nname = self.getPartitionName(newname, fileno) os.rename(oname, nname) fileno = fileno + 1 else: break class PartitionedFileState: '''This class is intended to be a mostly immutable object that holds the state of a set of partitions. Instances are replaced as more partitions are located. ''' def __init__(self, files, names, fileno): self.names = names self.files = files self.fileno = fileno self.allLocated = 0 self.sizeOfLast = -1 sizes = [] boundaries = [] boundary = 0L index = 0 for file in files: file.flush() for name in names: size = getsize(name) sizes.append(size) boundaries.append(boundary) boundary = boundary + size # Note that the size of the last file is not stored in __sizes. self.__sizes = tuple(sizes[:-1]) self.boundaries = tuple(boundaries) def file(self): '''Returns the file object corresponding to the current partition. ''' return self.files[self.fileno] def size(self, cacheable): ''' Returns the size of the current partition. Special computation is needed for the last partition. ''' fileno = self.fileno sizes = self.__sizes if fileno < len(sizes): return sizes[fileno] else: if cacheable: sizeOfLast = self.sizeOfLast if sizeOfLast >= 0: return sizeOfLast file = self.files[-1] file.flush() name = self.names[-1] size = getsize(name) if cacheable: self.sizeOfLast = size return size def boundary(self): '''Returns the position of the current partition within the logical file. ''' return self.boundaries[self.fileno] def invalidateSizeOfLast(self): '''Receives notification that the size of the last partition is now unknown. ''' self.sizeOfLast = -1 class PartitionedFile: ''' Reads and writes a file in multiple partitions. "Partitions" in this context do not refer to hard drive partitions, but rather to multiple files which may or may not be located on different physical media. ''' def __init__(self, strategy, name, mode='r', bufsize=0, partlen=2L**29): self.__strategy = strategy self.name = name self.mode = mode self.__bufsize = bufsize self.__partlen = long(partlen) if 'a' in mode or 'w' in mode or '+' in mode: # In write mode we can make the assumption # that the size of the file will not change # unless the change is made by this object. # This is an optimization that can be disabled. self.__controlledsize = 1 else: self.__controlledsize = 0 # Open the first partition. filename = self.__strategy.getPartitionName(name, 0) f = open(filename, mode, bufsize) # Create a state with only one file open. self.__state = PartitionedFileState((f,), (filename,), 0) if 'w' in mode: # Truncate all partitions. self.truncate(0L) # Provide the two extra fields required by the File interface. self.closed = 0 self.softspace = 0 def __seekToNextPartition(self, writeMode=0, last=0): ''' Moves the fileno pointer forward. Modifies self.__state. In non-writeMode, returns None if no more partitions exist. In writeMode, never returns None. ''' state = self.__state files = state.files lastfileno = len(files) - 1 if last: fileno = lastfileno else: fileno = state.fileno if fileno < lastfileno: # Move to the next file, already opened. fileno = fileno + 1 f = files[fileno] f.seek(0) state.fileno = fileno return f # Try to open or create the next partition. if not writeMode and state.allLocated: # All partitions have been located. return None fileno = len(files) name = self.__strategy.getPartitionName(self.name, fileno) if writeMode or exists(name): names = state.names try: f = open(name, self.mode, self.__bufsize) except IOError: if 'r' in self.mode and '+' in self.mode: # Creation not allowed in r+ mode. # Try to create using w+. mode = string.replace(self.mode, 'r', 'w') f = open(name, mode, self.__bufsize) else: raise newstate = PartitionedFileState( files + (f,), names + (name,), fileno) self.__state = newstate return f else: # No more partitions have been created. If # the size is controlled, we don't have to search for # partitions anymore: set allLocated. if self.__controlledsize: state.allLocated = 1 return None def close(self): '''Closes all partitions.''' oldstate = self.__state self.__state = PartitionedFileState((), (), 0) for file in oldstate.files: file.close() self.closed = 1 def flush(self): '''Flushes all partitions.''' files = self.__state.files for file in files: file.flush() def isatty(self): '''Always returns 0.''' return 0 def fileno(self): '''Returns the fileno of the first partition.''' return self.__state.files[0].fileno() def read(self, size=-1): ''' Reads up to (size) bytes. ''' size = long(size) state = self.__state file = state.file() data = file.read(size) while (size < 0 or len(data) < size) and \ file.tell() >= state.size(self.__controlledsize): # Possibly didn't read everything. # Read from the next partition also. file = self.__seekToNextPartition() if file is not None: state = self.__state toAppend = file.read(size - len(data)) data = data + toAppend else: break return data def readline(self, size=-1): ''' Reads one line. ''' size = long(size) state = self.__state file = state.file() data = file.readline(size) lastchar = data[-1:] # Possible bug: if the underlying implementation # recognizes only \n but not \r as a line terminator, # the following will make it inconsistent. while lastchar not in ('\n', '\r') and \ (size < 0 or len(data) < size) and \ file.tell() >= state.size(self.__controlledsize): # Possibly didn't read the whole line. # Read from the next file also. file = self.__seekToNextPartition() if file is not None: state = self.__state toAppend = file.readline(size) data = data + toAppend lastchar = data[-1:] else: break return data def readlines(self, sizehint=-1): ''' Reads all lines in the logical file. Because of memory constraints, this may not be a wise thing to do... ''' state = self.__state file = state.file() data = file.readlines(sizehint) while 1: file = self.__seekToNextPartition() if file is not None: state = self.__state toAppend = file.readlines(sizehint) if len(data) > 0: lastchar = data[-1][-1:] else: lastchar = '' # Possible newline bug here too. if len(toAppend) < 1 or lastchar in ('\n', '\r'): # Normal list concatenation. data = data + toAppend else: # Fix the broken string. data = data[:-1] + [data[-1] + toAppend[0]] + toAppend[1:] else: break return data def seek(self, offset, whence=0): ''' Seeks to the given position in the file. When seeking beyond the end of the file in write mode, pads with zero bytes. ''' offset = long(offset) if whence == 1: # Relative positioning. offset = self.tell() + offset elif whence == 2: # Seek from end. state = self.__state if not state.allLocated: while self.__seekToNextPartition(last=1) is not None: pass state = self.__state sizeOfLast = state.size(self.__controlledsize) total = state.boundaries[-1] + sizeOfLast offset = total - offset # else absolute positioning. elif whence != 0: raise IOError, 'Invalid argument' if offset < 0: raise IOError, 'Invalid argument' # offset is now absolute. # Figure out which file to move into. state = self.__state boundaries = state.boundaries fileno = state.fileno boundary = boundaries[fileno] if offset < boundary: # Move to a lower partition number. while 1: fileno = fileno - 1 boundary = boundaries[fileno] if offset >= boundary: # Found it. break elif offset > boundary: # Move to a higher partition number as necessary. while 1: state = self.__state boundaries = state.boundaries # print state.fileno, state.file().tell(), boundary if fileno < len(boundaries) - 1: # This partition will not change in size. if offset < boundaries[fileno + 1]: # We've found the right partition. break else: fileno = fileno + 1 else: # This is the last partition that has been located. boundary = boundaries[fileno] sizeOfLast = state.size(self.__controlledsize) if offset - boundary <= sizeOfLast: # The offset is located in the last known partition. break else: # Try seeking into the next partition, if any. state.fileno = fileno if self.__seekToNextPartition() is not None: # There is another partition that # had not been located until now. fileno = self.__state.fileno elif offset - boundary <= self.__partlen: # The last partition has been located # and it can expand. break elif ('+' in self.mode or 'w' in self.mode or 'a' in self.mode): # Write access. # Emulate the behavior of seeking beyond # the end of a file by padding with zeros # and creating the next partition. file = state.file() if sizeOfLast < self.__partlen: file.seek(self.__partlen - 1) file.write(chr(0)) self.__seekToNextPartition(1) fileno = self.__state.fileno else: # Read-only. break state = self.__state boundaries = state.boundaries boundary = boundaries[fileno] # fileno now indicates which partition. file = state.files[fileno] file.seek(offset - boundary) state.fileno = fileno def tell(self): ''' Returns the current absolute file offset. ''' state = self.__state return state.boundary() + state.file().tell() def truncate(self, size=-1): ''' Truncates the file at the current position. ''' if size < 0: size = self.tell() else: size = long(size) origFileno = self.__state.fileno # Locate all partitions. while self.__seekToNextPartition() is not None: pass state = self.__state files = state.files names = state.names boundaries = state.boundaries fileCount = len(files) idx = 0 # Determine which partitions to delete # and which partition to truncate. while idx < fileCount: if boundaries[idx] >= size: # Remove all partitions starting with this one. break idx = idx + 1 filenoToTruncate = max(idx - 1, 0) if idx < fileCount: # Delete excess files. newFileno = min(origFileno, filenoToTruncate) retainedFiles = max(idx, 1) # Always retain the first file. self.__state = PartitionedFileState( files[:retainedFiles], names[:retainedFiles], newFileno) # Note that "files" and "names" are members # of the *old* state. for file in files[retainedFiles:]: file.close() for name in names[retainedFiles:]: os.remove(name) else: self.__state.fileno = origFileno # Truncate one file. file = files[filenoToTruncate] boundary = boundaries[filenoToTruncate] file.truncate(size - boundary) self.__state.invalidateSizeOfLast() def write(self, str): ''' Writes data to the file. Automatically partitions as needed. ''' state = self.__state while len(str) > 0: file = state.file() size = state.size(self.__controlledsize) pos = file.tell() if pos + len(str) <= size: # We're only overwriting former contents. # Just write. file.write(str) return else: createNew = 0 if self.__seekToNextPartition() is None: # All partitions have been located and # file and size refer to the last partition. # We will be changing the size of the last # partition. state.invalidateSizeOfLast() if size < self.__partlen: # Allow expansion up to partlen. size = self.__partlen if pos + len(str) <= size: # We're not writing beyond partlen, # so just write and return. file.write(str) return createNew = 1 # Write a portion of the string. splitpos = int(size - pos) file.seek(pos) file.write(str[:splitpos]) if createNew: # Create a new partition. nextfile = self.__seekToNextPartition(1) str = str[splitpos:] state = self.__state def writelines(self, list): ''' Outputs all lines in a list of strings. ''' # Do we need to optimize this? for line in list: self.write(line) # According to the spec, doesn't add a newline. def fsync(self): ''' Requests an immediate flush of all data to physical media. ''' if posixfsync is not None: state = self.__state for file in state.files: posixfsync(file.fileno()) class PartitionedFileOperations: ''' Directs file operations to either the standard library or PartitionedFile based on the largeFileStrategy argument. Needed by ZODB.FileStorage. ''' def __init__(self, pstrat): self.pstrat = pstrat def open(self, name, mode='r', bufsize=0, largeFileStrategy=0): # Set largeFileStrategy to 1 to enable opening the # file in a way that can handle extra large files. if largeFileStrategy: return self.pstrat.open(name, mode, bufsize) else: global open return open(name, mode, bufsize) def rename(self, oldname, newname, largeFileStrategy=0): if largeFileStrategy: self.pstrat.rename(oldname, newname) else: os.rename(oldname, newname) def remove(self, name, largeFileStrategy=0): if largeFileStrategy: self.pstrat.remove(name) else: os.remove(name) def unlink(self, name, largeFileStrategy=0): if largeFileStrategy: self.pstrat.unlink(name) else: os.unlink(name) def fsync(self, file): if hasattr(file, 'fsync'): file.fsync() else: if posixfsync is not None: try: posixfsync(file.fileno()) except: pass if __name__ == '__main__': # Test the seek, write, and read functions. import whrandom generator = whrandom.whrandom() while 1: pstrat = DefaultPartitionNamingStrategy(partlen=991) try: os.mkdir('test') except: pass # Notice how easy it is to open a PartitionedFile: file = pstrat.open('test/parts', 'wb') # Here's how you do the same thing without partitions: # file = open('test/parts', 'wb') # Randomly shove data in the file, storing the same # data in a string at the same time. Then read # the file contents and see if it compares. # Do this repeatedly. MAXPOS = 12349 compare = '' blank = '\0' * MAXPOS for n in range(500): s = ('%04d' % n) * 495 location = int(generator.random() * MAXPOS) compare = (compare + blank)[:location] + s + \ compare[location + len(s):] file.seek(location) file.write(s) file.close() file = pstrat.open('test/parts', 'r') # file = open('test/parts', 'r') if file.read() == compare: print 'Passed.' else: print 'FAILED! ' * 10 file.close()