#!/usr/bin/python """salvage_data This is a tool for recovering data from a partially failing drive. It will start at the beginning of the device, and start reading sequentially until it hits a problem, and will then split the remaining work to avoid bad sections as long as possible. """ import sys class WorkQueue(object): """Queue for the extents left to attempt to read.""" def __init__(self, start, end): self.queue = [(start, end)] def is_empty(self): """Lets us know when we are done""" return not self.queue def pop(self): """Returns a (start, end) tuple of the next section to attempt""" return self.queue.pop() def push(self, start, end, split=False): """Adds the start, end section to the queue, splitting if told to""" if start >= end: # nothing to queue return if split: middle = (start + end) / 2 if start < middle: self.queue.insert(0, (start, middle)) if middle < end: self.queue.append((middle, end)) else: self.queue.append((start, end)) def recover(drive, start, end, output): """Recovers data from the provided file object within the given start and end, and writes the log to the output file object. """ extent_size = 1024*1024 # Size of blocks to attempt to read # queue of data ranges to try to recover queue = WorkQueue(start, end) while not queue.is_empty(): start, end = queue.pop() if end - start < extent_size: chunk = end - start else: chunk = extent_size try: drive.seek(start) data = drive.read(chunk) error = False except IOError: error = True if error: # error reading at this offset output.write("E %s\n" % start) output.flush() queue.push(start+1, end, True) elif not data: # error reading at this offset; I think this only happened due to a # software bug output.write("S %s\n" % start) output.flush() queue.push(start+1, end, True) else: output.write("D %s %s\n%s\n" % (start, len(data), data)) output.flush() # if we had a short read, probably an error at the end of this, so # split the extent queue.push(start+len(data), end, len(data) [start [end]]\n" "Reads from the specified input file (device)\n" "Optionally starts at the specified start offset and ends at the " "specified end offset.\n" "If not specified, start defaults to 0, and end defaults to the " "end of the device.\n" ) sys.exit(1) device = args[0] # determine device size drive = open(device, 'r') drive.seek(0, 2) end = drive.tell() # see if the user has specified a range to recover start = 0 try: start = int(args[1]) end = int(args[2]) except IndexError: pass sys.stderr.write("Reading %s from %s to %s\n" % (device, start, end)) sys.stderr.flush() output = sys.stdout recover(drive, start, end, output) if __name__ == '__main__': main(sys.argv[1:])