|
|
@@ -0,0 +1,259 @@ |
|
|
#!/usr/bin/python |
|
|
|
|
|
# Copyright 2007 by Tobia Conforto <tobia.conforto@gmail.com> |
|
|
# |
|
|
# This program is free software; you can redistribute it and/or modify it under the terms of the GNU General |
|
|
# Public License as published by the Free Software Foundation; either version 2 of the License, or (at your |
|
|
# option) any later version. |
|
|
# |
|
|
# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the |
|
|
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
|
# for more details. |
|
|
# |
|
|
# You should have received a copy of the GNU General Public License along with this program. |
|
|
# If not, see http://www.gnu.org/licenses/ |
|
|
|
|
|
# Versions: 0.1 2007-08-13 Initial release |
|
|
# 0.2 2008-05-12 Small fixes for Zen Xtra models |
|
|
# 0.3 2009-02-23 Zen Vision M compatible version (Leho Kraav <leho@kraav.com>) |
|
|
|
|
|
from __future__ import division |
|
|
import sys, os, codecs, array, time, operator, getopt |
|
|
import LRU |
|
|
|
|
|
class CFS: |
|
|
# 32k cluster on visionm30 |
|
|
clusterSize = 0x8000 |
|
|
cacheMem = 10 * 2**20 # keep 20MB of recently read clusters in ram |
|
|
|
|
|
def __init__(self, filename, offset = 0): |
|
|
'''Filename and optional offset where the CFS filesystem begins |
|
|
(offset of cluster -1, the one filled with 0xff)''' |
|
|
self.image = file(filename) |
|
|
self.offset = offset |
|
|
self.clusterCache = LRU.LRU(self.cacheMem // self.clusterSize) |
|
|
|
|
|
def __getitem__(self, key): |
|
|
'''Get the nth CFS cluster from the image and cache it for later usage. |
|
|
Accepts simple slices of clusters, but doesn't process negative indices. |
|
|
In any case it returns the requested data as a byte string.''' |
|
|
if isinstance(key, slice): |
|
|
cstart, cstop = key.start, key.stop |
|
|
else: |
|
|
cstart, cstop = key, key + 1 |
|
|
data = '' |
|
|
for cluster in range(cstart, cstop): |
|
|
if cluster not in self.clusterCache: |
|
|
self.image.seek(self.offset + (cluster + 1) * self.clusterSize) |
|
|
self.clusterCache[cluster] = self.image.read(self.clusterSize) |
|
|
data += self.clusterCache[cluster] |
|
|
return data |
|
|
|
|
|
def get_byteswapped_data(self, cluster): |
|
|
'''Get the nth CFS cluster from the image, without caching it. |
|
|
Swap the position of every two bytes and return it as an array object. |
|
|
This method is designed for bulk file retrieving.''' |
|
|
a = array.array('H') |
|
|
self.image.seek(self.offset + (cluster + 1) * self.clusterSize) |
|
|
a.fromfile(self.image, self.clusterSize // 2) |
|
|
# visionm 30 doesnt need byte swapping |
|
|
# a.byteswap() |
|
|
return a |
|
|
|
|
|
def inode(self, cluster): |
|
|
return CFSInode(self, cluster) |
|
|
|
|
|
#def pdp_uint32(data, offset = 0): |
|
|
# o2, o1, o4, o3 = map(ord, data[offset : offset + 4]) |
|
|
# return (o1 << 24) | (o2 << 16) | (o3 << 8) | o4 |
|
|
|
|
|
def pdp_uint32(data, offset = 0): |
|
|
o4, o3, o2, o1 = map(ord, data[offset : offset + 4]) |
|
|
return (o1 << 24) | (o2 << 16) | (o3 << 8) | o4 |
|
|
|
|
|
def pdp_uint16(data, offset = 0): |
|
|
o2, o1 = map(ord, data[offset : offset + 2]) |
|
|
return (o1 << 8) | o2 |
|
|
|
|
|
def ucs2string(data, offset, length): # length in bytes |
|
|
return codecs.utf_16_le_decode(data[offset : offset + length])[0] |
|
|
|
|
|
def pdp_getbit(bitmap, bit_no): |
|
|
return (pdp_uint32(bitmap, bit_no // 32 * 4) >> (bit_no % 32)) & 1 |
|
|
|
|
|
class CFSInode: |
|
|
filename = '(no filename)' |
|
|
filesize = 0 |
|
|
path = [] |
|
|
|
|
|
def __init__(self, cfs, cluster): |
|
|
self.cluster = cluster |
|
|
self.cfs = cfs |
|
|
inode = cfs[cluster] |
|
|
# reading misc flags and values |
|
|
print "pdp_uint: %x" % pdp_uint32(inode[4:8]) |
|
|
print "cluster: %x" % cluster |
|
|
assert pdp_uint32(inode[4:8]) == cluster # self-reference |
|
|
self.serial = pdp_uint32(inode, 0x78) |
|
|
# reading metadata |
|
|
count_metadata = pdp_uint32(inode, 0x7c) |
|
|
offset = 0x80 |
|
|
self.metadata = {} |
|
|
for i in range(count_metadata): |
|
|
assert pdp_uint16(inode, offset) == 3 |
|
|
length = pdp_uint16(inode, offset + 2) |
|
|
tag = ucs2string(inode, offset + 4, 4) |
|
|
self.metadata[tag] = inode[offset + 10 : offset + 10 + length] |
|
|
# byte reordering issue, 07 -> 70, 0= -> =0, 0> -> >0 |
|
|
# but we cannot figure out where to get path info, tag '51' doesnt work |
|
|
if tag == '70': |
|
|
self.filename = ucs2string(inode, offset + 10, length - 2) |
|
|
elif tag == '51': |
|
|
self.path = ucs2string(inode, offset + 10, length - 2).strip('\\').split('\\') |
|
|
elif tag == '>0': |
|
|
self.filesize = pdp_uint32(inode, offset + 10) |
|
|
offset += 10 + length |
|
|
# collecting flat list of data clusters |
|
|
self.dataclusters = [] |
|
|
pointerclusters = [] |
|
|
for off in range(0x20, 0x4c + 1, 4): |
|
|
c = pdp_uint32(inode, off) |
|
|
if c != 0xFFFFFFFFL: |
|
|
self.dataclusters.append(c) |
|
|
second_class_chain = pdp_uint32(inode, 0x58) |
|
|
if second_class_chain != 0xFFFFFFFFL: |
|
|
pointerclusters.append(second_class_chain) |
|
|
third_class_chain = pdp_uint32(inode, 0x64) |
|
|
# change 0x2000 to 0x8000 -> we might have 32k clusters on visionm 30g |
|
|
if third_class_chain != 0xFFFFFFFFL: |
|
|
for off in range(0, 0x8000, 4): |
|
|
c = pdp_uint32(cfs[third_class_chain], off) |
|
|
if c == 0xFFFFFFFFL: |
|
|
break |
|
|
pointerclusters.append(c) |
|
|
|
|
|
# once again, 32k cluster |
|
|
for pnt in pointerclusters: |
|
|
for off in range(0, 0x8000, 4): |
|
|
c = pdp_uint32(cfs[pnt], off) |
|
|
if c == 0xFFFFFFFFL: |
|
|
break |
|
|
self.dataclusters.append(c) |
|
|
# reading directory entries |
|
|
if not self.metadata: # any better way of telling dirs and files apart? |
|
|
count_direntries = pdp_uint32(self, 8) |
|
|
self.direntries = [] |
|
|
found = 0 |
|
|
# since clusters are 4 times bigger now, we need % 2 and // 2 instead of ... 8 |
|
|
assert len(self.dataclusters) % 2 == 0 |
|
|
for block_no in range(len(self.dataclusters) // 2): |
|
|
block = self[block_no * 0x10000 : block_no * 0x10000 + 0x10000] |
|
|
bitmap = block[16 : 16 + 204] |
|
|
for n in range(1632): |
|
|
if pdp_getbit(bitmap, n): |
|
|
off = 220 + n * 40 |
|
|
self.direntries.append(CFSDirEntry(cfs, block[off : off + 40])) |
|
|
found += 1 |
|
|
assert found == count_direntries |
|
|
|
|
|
def __getitem__(self, key): |
|
|
'''Returns the given byte (or byte slice) from the file contents.''' |
|
|
if isinstance(key, slice): |
|
|
bstart, bstop = key.start, key.stop |
|
|
else: |
|
|
bstart, bstop = key, key + 1 |
|
|
cs = self.cfs.clusterSize |
|
|
cstart = bstart // cs |
|
|
cstop = (bstop - 1) // cs + 1 |
|
|
data = ''.join([ self.cfs[x] for x in self.dataclusters[cstart : cstop] ]) |
|
|
return data[bstart - cs * cstart : bstop - cs * cstart] |
|
|
|
|
|
class CFSDirEntry: |
|
|
def __init__(self, cfs, entrydata): |
|
|
self.cluster = pdp_uint32(entrydata) # cluster no. of the inode |
|
|
# length of full filename |
|
|
self.len_filename = pdp_uint16(entrydata, 4) |
|
|
# first 15 chars of filename |
|
|
self.shortname = ucs2string(entrydata, 8, min(30, self.len_filename * 2)) |
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
|
|
# commandline arguments |
|
|
optlist, args = getopt.gnu_getopt(sys.argv[1:], 'o:') |
|
|
opts = dict(optlist) |
|
|
offset = int(opts.get('-o', 20 * 2**20)) |
|
|
|
|
|
if len(args) != 3: |
|
|
print 'Usage: zenrecover.py [-o OFFSET] DISK_OR_IMAGE SECTION OUTPUT_DIR' |
|
|
print 'DISK_OR_IMAGE is the disk containing the filesystem, or an image thereof' |
|
|
print 'OFFSET is the offset at which the filesystem starts (in bytes, default 20M)' |
|
|
print 'SECTION is the section of the filesystem to recover: "archives" or "songs"' |
|
|
print 'OUTPUT_DIR is the directory in which to place the recovered files' |
|
|
sys.exit(1) |
|
|
|
|
|
cfs = CFS(args[0], offset) |
|
|
section = args[1] |
|
|
outdir = args[2] |
|
|
|
|
|
# find the root inode |
|
|
rootinode = None |
|
|
for c in range(4, 0x10000): |
|
|
if pdp_uint32(cfs[c][:4]) == 0x3bbe0ad9: |
|
|
print "Found inode at cluster 0x%x" % c |
|
|
i = cfs.inode(c) |
|
|
if i.serial != 0xFFFFFFFFL: |
|
|
print "Found inode at cluster 0x%x, but serial number is not -1" % c |
|
|
continue |
|
|
rootinode = i |
|
|
break |
|
|
if not rootinode: |
|
|
raise "Could not find the root inode" |
|
|
|
|
|
# find the root directories |
|
|
root = {} |
|
|
for entry in rootinode.direntries: |
|
|
root[entry.shortname] = entry.cluster |
|
|
|
|
|
print root |
|
|
|
|
|
# begin recovery |
|
|
dirinode = cfs.inode(root[section]) |
|
|
os.makedirs(outdir) |
|
|
lastfiles = [(1,1)] # timing of latest few files recovered (size in bytes, time in secs) |
|
|
t = len(dirinode.direntries) |
|
|
for i, entry in enumerate(dirinode.direntries): |
|
|
if entry.shortname != '.': |
|
|
t0 = time.time() |
|
|
inode = cfs.inode(entry.cluster) |
|
|
print |
|
|
m=inode.metadata |
|
|
for j in m: |
|
|
if len(m[j])==4: |
|
|
print repr(j), pdp_uint32(m[j]) |
|
|
else: |
|
|
print repr(j), repr(''.join([m[j][x] for x in range(0,len(m[j]),2)])) |
|
|
print '\r%d%% %.1fMB/s "%s" (%.1fMB)\033[K' % ( |
|
|
i * 100 // t, |
|
|
operator.truediv(*map(sum, zip(*lastfiles))) / 2**20, |
|
|
inode.filename[:50], |
|
|
inode.filesize / 2**20), |
|
|
sys.stdout.flush() |
|
|
path = os.path.join(outdir, *inode.path) |
|
|
try: |
|
|
os.makedirs(path) |
|
|
except: |
|
|
pass |
|
|
f = file(os.path.join(path, inode.filename), 'w') |
|
|
remaining = inode.filesize |
|
|
for c in inode.dataclusters: |
|
|
if remaining >= cfs.clusterSize: |
|
|
cfs.get_byteswapped_data(c).tofile(f) |
|
|
else: |
|
|
f.write(cfs.get_byteswapped_data(c).tostring()[:remaining]) |
|
|
remaining -= min(cfs.clusterSize, remaining) |
|
|
f.close() |
|
|
assert remaining == 0 |
|
|
if len(lastfiles) >= 32: #transfer speed is calculated on latest 32 files |
|
|
lastfiles.pop(0) |
|
|
lastfiles.append((inode.filesize, time.time() - t0)) |
|
|
print '\rDone.\033[K' |