##############################################################################
# 
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
# 
# Copyright (c) Digital Creations.  All rights reserved.
# 
# This license has been certified as Open Source(tm).
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# 
# 1. Redistributions in source code must retain the above copyright
#    notice, this list of conditions, and the following disclaimer.
# 
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions, and the following disclaimer in
#    the documentation and/or other materials provided with the
#    distribution.
# 
# 3. Digital Creations requests that attribution be given to Zope
#    in any manner possible. Zope includes a "Powered by Zope"
#    button that is installed by default. While it is not a license
#    violation to remove this button, it is requested that the
#    attribution remain. A significant investment has been put
#    into Zope, and this effort will continue if the Zope community
#    continues to grow. This is one way to assure that growth.
# 
# 4. All advertising materials and documentation mentioning
#    features derived from or use of this software must display
#    the following acknowledgement:
# 
#      "This product includes software developed by Digital Creations
#      for use in the Z Object Publishing Environment
#      (http://www.zope.org/)."
# 
#    In the event that the product being advertised includes an
#    intact Zope distribution (with copyright and license included)
#    then this clause is waived.
# 
# 5. Names associated with Zope or Digital Creations must not be used to
#    endorse or promote products derived from this software without
#    prior written permission from Digital Creations.
# 
# 6. Modified redistributions of any form whatsoever must retain
#    the following acknowledgment:
# 
#      "This product includes software developed by Digital Creations
#      for use in the Z Object Publishing Environment
#      (http://www.zope.org/)."
# 
#    Intact (re-)distributions of any official Zope release do not
#    require an external acknowledgement.
# 
# 7. Modifications are encouraged but must be packaged separately as
#    patches to official Zope releases.  Distributions that do not
#    clearly separate the patches from the original work must be clearly
#    labeled as unofficial distributions.  Modifications which do not
#    carry the name Zope may be packaged in any form, as long as they
#    conform to all of the clauses above.
# 
# 
# Disclaimer
# 
#   THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
#   EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
#   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
#   PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
#   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
#   USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
#   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
#   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
#   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
#   SUCH DAMAGE.
# 
# 
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations.  Specific
# attributions are listed in the accompanying credits file.
# 
##############################################################################

'''
PartitionedFile module
$Id$

Allows you to work with multiple files as if they were a single file.
The resulting logical file can be arbitrarily large or span multiple
physical devices.  This is one way to overcome 2 GB file size limitations.

Use at your own risk!
'''

from os.path import getsize, exists
import os, string

try:
    from posix import fsync
    posixfsync = fsync
except:
    posixfsync=None


class DefaultPartitionNamingStrategy:
    '''
    Defines a strategy for naming the successive partitions
    in a partitioned file.  Use these instead of open() and the
    os.* operations.
    '''
    def __init__(self, partlen=2L**29):
        self.partlen = partlen
    
    def getPartitionName(self, filename, fileno):
        '''
        Computes a partition filename from the base filename and the
        partition number.
        '''
        # Override to use a different naming strategy.
        if fileno == 0:
            return filename
        else:
            return '%s.%d' % (filename, fileno)

    def open(self, name, mode='r', bufsize=0):
        return PartitionedFile(self, name, mode, bufsize, self.partlen)

    def remove(self, filename):
        '''Removes each partition.'''
        fileno = 0
        while 1:
            name = self.getPartitionName(filename, fileno)
            if exists(name):
                os.remove(name)
                fileno = fileno + 1
            else:
                break

    unlink = remove

    def rename(self, oldname, newname):
        '''Renames (or moves) each partition.'''
        fileno = 0
        while 1:
            oname = self.getPartitionName(oldname, fileno)
            if exists(oname):
                nname = self.getPartitionName(newname, fileno)
                os.rename(oname, nname)
                fileno = fileno + 1
            else:
                break


class PartitionedFileState:
    '''This class is intended to be a mostly immutable
    object that holds the state of a set of partitions.
    Instances are replaced as more partitions are located.
    '''

    def __init__(self, files, names, fileno):
        self.names = names
        self.files = files
        self.fileno = fileno
        self.allLocated = 0
        self.sizeOfLast = -1
        sizes = []
        boundaries = []
        boundary = 0L
        index = 0
        for file in files:
            file.flush()
        for name in names:
            size = getsize(name)
            sizes.append(size)
            boundaries.append(boundary)
            boundary = boundary + size
        # Note that the size of the last file is not stored in __sizes.
        self.__sizes = tuple(sizes[:-1])
        self.boundaries = tuple(boundaries)

    def file(self):
        '''Returns the file object corresponding to the current partition.
        '''
        return self.files[self.fileno]

    def size(self, cacheable):
        '''
        Returns the size of the current partition.  Special computation is
        needed for the last partition.
        '''
        fileno = self.fileno
        sizes = self.__sizes
        if fileno < len(sizes):
            return sizes[fileno]
        else:
            if cacheable:
                sizeOfLast = self.sizeOfLast
                if sizeOfLast >= 0:
                    return sizeOfLast
            file = self.files[-1]
            file.flush()
            name = self.names[-1]
            size = getsize(name)
            if cacheable:
                self.sizeOfLast = size
            return size

    def boundary(self):
        '''Returns the position of the current partition within
        the logical file.
        '''
        return self.boundaries[self.fileno]

    def invalidateSizeOfLast(self):
        '''Receives notification that the size of the last
        partition is now unknown.
        '''
        self.sizeOfLast = -1


class PartitionedFile:
    '''
    Reads and writes a file in multiple partitions.
    "Partitions" in this context do not refer to
    hard drive partitions, but rather to multiple files
    which may or may not be located on different
    physical media.
    '''

    def __init__(self, strategy, name, mode='r', bufsize=0, partlen=2L**29):
        self.__strategy = strategy
        self.name = name
        self.mode = mode
        self.__bufsize = bufsize
        self.__partlen = long(partlen)
        if 'a' in mode or 'w' in mode or '+' in mode:
            # In write mode we can make the assumption
            # that the size of the file will not change
            # unless the change is made by this object.
            # This is an optimization that can be disabled.
            self.__controlledsize = 1
        else:
            self.__controlledsize = 0
        # Open the first partition.
        filename = self.__strategy.getPartitionName(name, 0)
        f = open(filename, mode, bufsize)
        # Create a state with only one file open.
        self.__state = PartitionedFileState((f,), (filename,), 0)
        if 'w' in mode:
            # Truncate all partitions.
            self.truncate(0L)
        # Provide the two extra fields required by the File interface.
        self.closed = 0
        self.softspace = 0
 
    def __seekToNextPartition(self, writeMode=0, last=0):
        '''
        Moves the fileno pointer forward.  Modifies self.__state.
        In non-writeMode, returns None if no more partitions exist.
        In writeMode, never returns None.
        '''
        state = self.__state
        files = state.files
        lastfileno = len(files) - 1
        if last:
            fileno = lastfileno
        else:
            fileno = state.fileno
        if fileno < lastfileno:
            # Move to the next file, already opened.
            fileno = fileno + 1
            f = files[fileno]
            f.seek(0)
            state.fileno = fileno
            return f

        # Try to open or create the next partition.
        if not writeMode and state.allLocated:
            # All partitions have been located.
            return None
        fileno = len(files)
        name = self.__strategy.getPartitionName(self.name, fileno)
        if writeMode or exists(name):
            names = state.names
            try:
                f = open(name, self.mode, self.__bufsize)
            except IOError:
                if 'r' in self.mode and '+' in self.mode:
                    # Creation not allowed in r+ mode.
                    # Try to create using w+.
                    mode = string.replace(self.mode, 'r', 'w')
                    f = open(name, mode, self.__bufsize)
                else:
                    raise
            newstate = PartitionedFileState(
                files + (f,), names + (name,), fileno)
            self.__state = newstate
            return f
        else:
            # No more partitions have been created.  If
            # the size is controlled, we don't have to search for
            # partitions anymore: set allLocated.
            if self.__controlledsize:
                state.allLocated = 1
            return None

    def close(self):
        '''Closes all partitions.'''
        oldstate = self.__state
        self.__state = PartitionedFileState((), (), 0)
        for file in oldstate.files:
            file.close()
        self.closed = 1

    def flush(self):
        '''Flushes all partitions.'''
        files = self.__state.files
        for file in files:
            file.flush()

    def isatty(self):
        '''Always returns 0.'''
        return 0

    def fileno(self):
        '''Returns the fileno of the first partition.'''
        return self.__state.files[0].fileno()

    def read(self, size=-1):
        '''
        Reads up to (size) bytes.
        '''
        size = long(size)
        state = self.__state
        file = state.file()
        data = file.read(size)
        while (size < 0 or len(data) < size) and \
              file.tell() >= state.size(self.__controlledsize):
            # Possibly didn't read everything.
            # Read from the next partition also.
            file = self.__seekToNextPartition()
            if file is not None:
                state = self.__state
                toAppend = file.read(size - len(data))
                data = data + toAppend
            else:
                break
        return data

    def readline(self, size=-1):
        '''
        Reads one line.
        '''
        size = long(size)
        state = self.__state
        file = state.file()
        data = file.readline(size)
        lastchar = data[-1:]
        # Possible bug: if the underlying implementation
        # recognizes only \n but not \r as a line terminator,
        # the following will make it inconsistent.
        while lastchar not in ('\n', '\r') and \
              (size < 0 or len(data) < size) and \
              file.tell() >= state.size(self.__controlledsize):
            # Possibly didn't read the whole line.
            # Read from the next file also.
            file = self.__seekToNextPartition()
            if file is not None:
                state = self.__state
                toAppend = file.readline(size)
                data = data + toAppend
                lastchar = data[-1:]
            else:
                break
        return data

    def readlines(self, sizehint=-1):
        '''
        Reads all lines in the logical file.  Because of memory
        constraints, this may not be a wise thing to do...
        '''
        state = self.__state
        file = state.file()
        data = file.readlines(sizehint)
        while 1:
            file = self.__seekToNextPartition()
            if file is not None:
                state = self.__state
                toAppend = file.readlines(sizehint)
                if len(data) > 0:
                    lastchar = data[-1][-1:]
                else:
                    lastchar = ''
                # Possible newline bug here too.
                if len(toAppend) < 1 or lastchar in ('\n', '\r'):
                    # Normal list concatenation.
                    data = data + toAppend
                else:
                    # Fix the broken string.
                    data = data[:-1] + [data[-1] + toAppend[0]] + toAppend[1:]
            else:
                break
        return data

    def seek(self, offset, whence=0):
        '''
        Seeks to the given position in the file.  When seeking beyond the
        end of the file in write mode, pads with zero bytes.
        '''
        offset = long(offset)
        if whence == 1:
            # Relative positioning.
            offset = self.tell() + offset
        elif whence == 2:
            # Seek from end.
            state = self.__state
            if not state.allLocated:
                while self.__seekToNextPartition(last=1) is not None:
                    pass
            state = self.__state
            sizeOfLast = state.size(self.__controlledsize)
            total = state.boundaries[-1] + sizeOfLast
            offset = total - offset
        # else absolute positioning.
        elif whence != 0:
            raise IOError, 'Invalid argument'
        if offset < 0:
            raise IOError, 'Invalid argument'

        # offset is now absolute.
        # Figure out which file to move into.
        state = self.__state
        boundaries = state.boundaries
        fileno = state.fileno
        boundary = boundaries[fileno]
        if offset < boundary:
            # Move to a lower partition number.
            while 1:
                fileno = fileno - 1
                boundary = boundaries[fileno]
                if offset >= boundary:
                    # Found it.
                    break
        elif offset > boundary:
            # Move to a higher partition number as necessary.
            while 1:
                state = self.__state
                boundaries = state.boundaries

                # print state.fileno, state.file().tell(), boundary
                if fileno < len(boundaries) - 1:
                    # This partition will not change in size.
                    if offset < boundaries[fileno + 1]:
                        # We've found the right partition.
                        break
                    else:
                        fileno = fileno + 1
                else:
                    # This is the last partition that has been located.
                    boundary = boundaries[fileno]
                    sizeOfLast = state.size(self.__controlledsize)
                    if offset - boundary <= sizeOfLast:
                        # The offset is located in the last known partition.
                        break
                    else:
                        # Try seeking into the next partition, if any.
                        state.fileno = fileno
                        if self.__seekToNextPartition() is not None:
                            # There is another partition that
                            # had not been located until now.
                            fileno = self.__state.fileno
                        elif offset - boundary <= self.__partlen:
                            # The last partition has been located
                            # and it can expand.
                            break
                        elif ('+' in self.mode or 'w' in self.mode
                              or 'a' in self.mode):
                            # Write access.
                            # Emulate the behavior of seeking beyond
                            # the end of a file by padding with zeros
                            # and creating the next partition.
                            file = state.file()
                            if sizeOfLast < self.__partlen:
                                file.seek(self.__partlen - 1)
                                file.write(chr(0))
                            self.__seekToNextPartition(1)
                            fileno = self.__state.fileno
                        else:
                            # Read-only.
                            break

        state = self.__state
        boundaries = state.boundaries
        boundary = boundaries[fileno]
            
        # fileno now indicates which partition.
        file = state.files[fileno]
        file.seek(offset - boundary)
        state.fileno = fileno
            
    def tell(self):
        '''
        Returns the current absolute file offset.
        '''
        state = self.__state
        return state.boundary() + state.file().tell()

    def truncate(self, size=-1):
        '''
        Truncates the file at the current position.
        '''
        if size < 0:
            size = self.tell()
        else:
            size = long(size)
        origFileno = self.__state.fileno
        # Locate all partitions.
        while self.__seekToNextPartition() is not None:
            pass

        state = self.__state
        files = state.files
        names = state.names
        boundaries = state.boundaries
        fileCount = len(files)
        idx = 0
        # Determine which partitions to delete
        # and which partition to truncate.
        while idx < fileCount:
            if boundaries[idx] >= size:
                # Remove all partitions starting with this one.
                break
            idx = idx + 1
        filenoToTruncate = max(idx - 1, 0)
        if idx < fileCount:
            # Delete excess files.
            newFileno = min(origFileno, filenoToTruncate)
            retainedFiles = max(idx, 1)   # Always retain the first file.
            self.__state = PartitionedFileState(
                files[:retainedFiles], names[:retainedFiles], newFileno)
            # Note that "files" and "names" are members
            # of the *old* state.
            for file in files[retainedFiles:]:
                file.close()
            for name in names[retainedFiles:]:
                os.remove(name)
        else:
            self.__state.fileno = origFileno
        # Truncate one file.
        file = files[filenoToTruncate]
        boundary = boundaries[filenoToTruncate]
        file.truncate(size - boundary)
        self.__state.invalidateSizeOfLast()

    def write(self, str):
        '''
        Writes data to the file.  Automatically partitions as needed.
        '''
        state = self.__state
        while len(str) > 0:
            file = state.file()
            size = state.size(self.__controlledsize)
            pos = file.tell()
            if pos + len(str) <= size:
                # We're only overwriting former contents.
                # Just write.
                file.write(str)
                return
            else:
                createNew = 0
                if self.__seekToNextPartition() is None:
                    # All partitions have been located and
                    # file and size refer to the last partition.
                    # We will be changing the size of the last
                    # partition.
                    state.invalidateSizeOfLast()
                    if size < self.__partlen:
                        # Allow expansion up to partlen.
                        size = self.__partlen
                        if pos + len(str) <= size:
                            # We're not writing beyond partlen,
                            # so just write and return.
                            file.write(str)
                            return
                    createNew = 1
                # Write a portion of the string.
                splitpos = int(size - pos)
                file.seek(pos)
                file.write(str[:splitpos])
                if createNew:
                    # Create a new partition.
                    nextfile = self.__seekToNextPartition(1)
                str = str[splitpos:]
                state = self.__state
                        
    def writelines(self, list):
        '''
        Outputs all lines in a list of strings.
        '''
        # Do we need to optimize this?
        for line in list:
            self.write(line)  # According to the spec, doesn't add a newline.

    def fsync(self):
        '''
        Requests an immediate flush of all data to physical media.
        '''
        if posixfsync is not None:
            state = self.__state
            for file in state.files:
                posixfsync(file.fileno())


class PartitionedFileOperations:
    '''
    Directs file operations to either the standard library or
    PartitionedFile based on the largeFileStrategy argument.
    Needed by ZODB.FileStorage.
    '''

    def __init__(self, pstrat):
        self.pstrat = pstrat
    
    def open(self, name, mode='r', bufsize=0, largeFileStrategy=0):
        # Set largeFileStrategy to 1 to enable opening the
        # file in a way that can handle extra large files.
        if largeFileStrategy:
            return self.pstrat.open(name, mode, bufsize)
        else:
            global open
            return open(name, mode, bufsize)

    def rename(self, oldname, newname, largeFileStrategy=0):
        if largeFileStrategy:
            self.pstrat.rename(oldname, newname)
        else:
            os.rename(oldname, newname)

    def remove(self, name, largeFileStrategy=0):
        if largeFileStrategy:
            self.pstrat.remove(name)
        else:
            os.remove(name)

    def unlink(self, name, largeFileStrategy=0):
        if largeFileStrategy:
            self.pstrat.unlink(name)
        else:
            os.unlink(name)

    def fsync(self, file):
        if hasattr(file, 'fsync'):
            file.fsync()
        else:
            if posixfsync is not None:
                try: posixfsync(file.fileno())
                except: pass


if __name__ == '__main__':
    # Test the seek, write, and read functions.
    import whrandom
    generator = whrandom.whrandom()
    while 1:
        pstrat = DefaultPartitionNamingStrategy(partlen=991)
        try: os.mkdir('test')
        except: pass

        # Notice how easy it is to open a PartitionedFile:
        file = pstrat.open('test/parts', 'wb')

        # Here's how you do the same thing without partitions:
        # file = open('test/parts', 'wb')

        # Randomly shove data in the file, storing the same
        # data in a string at the same time.  Then read
        # the file contents and see if it compares.
        # Do this repeatedly.
        MAXPOS = 12349
        compare = ''
        blank = '\0' * MAXPOS
        for n in range(500):
            s = ('%04d' % n) * 495
            location = int(generator.random() * MAXPOS)
            compare = (compare + blank)[:location] + s + \
                      compare[location + len(s):]
            file.seek(location)
            file.write(s)
        file.close()
        file = pstrat.open('test/parts', 'r')
        # file = open('test/parts', 'r')
        if file.read() == compare:
            print 'Passed.'
        else:
            print 'FAILED! ' * 10
        file.close()