#!/usr/bin/python
"""salvage_data

This is a tool for recovering data from a partially failing drive.  It will
start at the beginning of the device, and start reading sequentially until it
hits a problem, and will then split the remaining work to avoid bad sections as
long as possible.
"""
import sys

class WorkQueue(object):
    """Queue for the extents left to attempt to read."""
    def __init__(self, start, end):
        self.queue = [(start, end)]

    def is_empty(self):
        """Lets us know when we are done"""
        return not self.queue

    def pop(self):
        """Returns a (start, end) tuple of the next section to attempt"""
        return self.queue.pop()

    def push(self, start, end, split=False):
        """Adds the start, end section to the queue, splitting if told to"""
        if start >= end:
            # nothing to queue
            return
        if split:
            middle = (start + end) / 2
            if start < middle:
                self.queue.insert(0, (start, middle))
            if middle < end:
                self.queue.append((middle, end))
        else:
            self.queue.append((start, end))

def recover(drive, start, end, output):
    """Recovers data from the provided file object within the given start and
    end, and writes the log to the output file object.
    """
    extent_size = 1024*1024 # Size of blocks to attempt to read
    # queue of data ranges to try to recover
    queue = WorkQueue(start, end)

    while not queue.is_empty():
        start, end = queue.pop()
        if end - start < extent_size:
            chunk = end - start
        else:
            chunk = extent_size
        try:
            drive.seek(start)
            data = drive.read(chunk)
            error = False
        except IOError:
            error = True

        if error:
            # error reading at this offset
            output.write("E %s\n" % start)
            output.flush()
            queue.push(start+1, end, True)
        elif not data:
            # error reading at this offset; I think this only happened due to a
            # software bug
            output.write("S %s\n" % start)
            output.flush()
            queue.push(start+1, end, True)
        else:
            output.write("D %s %s\n%s\n" % (start, len(data), data))
            output.flush()
            # if we had a short read, probably an error at the end of this, so
            # split the extent
            queue.push(start+len(data), end, len(data)<chunk)

def main(args):
    """Takes the device to read as the first argument.  May optionally specify
    the start and end offsets to recover.
    """
    if len(args) < 1:
        sys.stderr.write("Syntax error: %s <inputfile> [start [end]]\n"
            "Reads from the specified input file (device)\n"
            "Optionally starts at the specified start offset and ends at the "
            "specified end offset.\n"
            "If not specified, start defaults to 0, and end defaults to the "
            "end of the device.\n"
        )
        sys.exit(1)

    device = args[0]

    # determine device size
    drive = open(device, 'r')
    drive.seek(0, 2)
    end = drive.tell()

    # see if the user has specified a range to recover
    start = 0
    try:
        start = int(args[1])
        end = int(args[2])
    except IndexError:
        pass

    sys.stderr.write("Reading %s from %s to %s\n" % (device, start, end))
    sys.stderr.flush()

    output = sys.stdout

    recover(drive, start, end, output)

if __name__ == '__main__':
    main(sys.argv[1:])
