Created
July 16, 2016 12:43
-
-
Save ed-asriyan/8984ecab4bbc4fe73865c995c7a0807a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from sys import argv | |
| from math import log | |
| import fsuniquesearcher | |
| try: | |
| from tabulate import tabulate | |
| except: | |
| print("Module tabulate not installed.") | |
| print("Please install it first. To do this run the following command:") | |
| print("\tpython -m pip install tabulate") | |
| exit() | |
| def size_to_str(size): | |
| size = int(size) | |
| power = int(log(1024, size)) + 1 | |
| s = { | |
| 0: "", | |
| 1: "K", | |
| 2: "M", | |
| 3: "G", | |
| 4: "T" | |
| }[power] | |
| return "{:.3f} {:}B".format(size / 1024 ** power, s) | |
| def print_table(head, body): | |
| print(tabulate(body, headers=head, tablefmt="fancy_grid")) | |
| print('\n') | |
| if __name__ == '__main__': | |
| if len(argv) < 2: | |
| print("Usage: python main.py [directiories...]") | |
| print("Example:\tpython main.py ~/Desktop/ ~/Yandex.Disk/") | |
| exit() | |
| files = [ ] | |
| for path in argv[1:]: | |
| _files = fsuniquesearcher.get_fs_items(path) | |
| files += [ item for item in _files if not (item in files) ] | |
| files_size = sum([ file.get_size() for file in files ]) | |
| files_count = len(files) | |
| map_ = fsuniquesearcher.FsUniqueItemsMap(files) | |
| files_groups = map_.get_file_groups() | |
| files_remove = [ ] | |
| for files in files_groups: | |
| for file in files[1:]: | |
| files_remove.append(file) | |
| files_remove_size = sum([ file.get_size() for file in files_remove ]) | |
| files_remove_count = len(files_remove) | |
| table_samples = [ ] | |
| for files in files_groups: | |
| for file in files: | |
| table_samples.append([ file.get_path(), size_to_str(file.get_size()), len(files) ]) | |
| table_samples.append([ "", "", "" ]) | |
| table_remove = [ [ file.get_path(), size_to_str(file.get_size()) ] for file in files_remove ] | |
| table_total = [ [ "Files total:", files_count, "Size total:" , size_to_str(files_size) ], | |
| [ "Remaining files:", files_count - files_remove_count, "Remaining size:", size_to_str(files_size - files_remove_size) ], | |
| [ "Files to remove:", files_remove_count, "Removal size:", size_to_str(files_remove_size) ] | |
| ] | |
| print("Groups of identical files:") | |
| print_table([ "File group", "File size", "Group size" ], table_samples) | |
| print("Files which can be removed:") | |
| print_table([ "File", "Size"], table_remove) | |
| print_table([ ], table_total) | |
| if files_remove_size: | |
| print("Choose an action") | |
| print("- delete dublicates (del)") | |
| print("- save groups of equal files (saveeq)") | |
| print("- save dubliates (savedub)") | |
| print("- quit (q)") | |
| ch = input(": ") | |
| if ch == "del": | |
| for file in files_remove: | |
| path = file.get_path() | |
| remove(file.get_path()) | |
| print("Removed", path) | |
| if ch == "saveeq": | |
| with open("eq_files.txt", "w") as f: | |
| for files in files_groups: | |
| for file in files: | |
| f.write(file.get_path() + "\n") | |
| f.write("\n") | |
| if ch == "savedub": | |
| with open("dub_files.txt", w) as f: | |
| for file in files_remove_size: | |
| f.write(file.get_path() + "\n") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| from hashlib import md5, sha256 | |
| # --- Exeption classes --------------------------------------------- | |
| class FsItemNotFoundException(Exception): | |
| def __init__(self, path): | |
| self._path = path | |
| def get_path(self): | |
| return self._path | |
| class InvalidTypeException(Exception): | |
| def __init__(self, real_type, expected_type): | |
| self._real_type = real_type | |
| self._expected_type = expected_type | |
| def get_real_type(self): | |
| return self._real_type | |
| def get_expected_type(self): | |
| return self._expected_type | |
| # --- Hash members ------------------------------------------------ | |
| def _md5_sum(fname): | |
| hash_md5 = md5() | |
| with open(fname, "rb") as f: | |
| for chunk in iter(lambda: f.read(4096), b""): | |
| hash_md5.update(chunk) | |
| return hash_md5.hexdigest() | |
| def _sha256_sum(fname): | |
| fhash = sha256() | |
| with open(fname, "rb") as f: | |
| for chunk in iter(lambda: f.read(4096), b""): | |
| fhash.update(chunk) | |
| return fhash.hexdigest() | |
| # --- Fs Classes --------------------------------------------------- | |
| class FsItem: | |
| def __init__(self, path, type="n"): | |
| self._path = path | |
| def get_path(self): | |
| return self._path | |
| def get_name(self): | |
| head, tail = split(self.get_path()) | |
| return tail | |
| def __str__(self): | |
| return self.get_name() | |
| class FsFile(FsItem): | |
| def __init__(self, path): | |
| path = str(path) | |
| if not os.path.isfile(path): | |
| raise FsItemNotFoundException(path, "f") | |
| FsItem.__init__(self, path); | |
| self._hash = None | |
| self._size = None | |
| def get_hash(self): | |
| if self._hash is None: | |
| self._hash = _md5_sum(self._path) + _sha256_sum(self._path) | |
| return self._hash | |
| def get_size(self): | |
| if not self._size: | |
| self._size = os.path.getsize(self._path) | |
| return self._size | |
| def __str__(self): | |
| return FsItem.__str__(self) | |
| def __eq__(self, obj): | |
| if obj.get_size() == self.get_size(): | |
| return True | |
| return obj.get_hash() == self.get_hash() | |
| # --- Local search members ----------------------------------------- | |
| def get_fs_items(dir_path, deep=-1): | |
| dir_path = str(dir_path) | |
| if not os.path.isdir(dir_path): | |
| raise FsItemNotFoundException(dir_path, "d") | |
| items = os.listdir(dir_path) | |
| result = [ ] | |
| for item in items: | |
| item = os.path.join(dir_path, item) | |
| if os.path.isfile(item): | |
| result.append(FsFile(item)) | |
| elif deep != 0: | |
| result += get_fs_items(item, deep - 1) | |
| return result | |
| # --- Public members ----------------------------------------------- | |
| # --- Analyzing Classes -------------------------------------------- | |
| class FsUniqueItemsMap: | |
| def __init__(self, files_list): | |
| self._files = files_list | |
| self._group_list = None | |
| def get_file_groups(self): | |
| if self._group_list is None: # really i dont like big if blocks | |
| size_map = { } | |
| for item in self._files: | |
| if isinstance(item, str): | |
| item = FsFile(item) | |
| if not isinstance(item, FsFile): | |
| raise InvalidTypeException(type(item), FsFile) | |
| f_size = item.get_size() | |
| if f_size in size_map: | |
| size_map[f_size].append(item) | |
| else: | |
| size_map[f_size] = [ item ] | |
| hash_map = { } # not full map | |
| for i_size, items in size_map.items(): | |
| if len(items) > 1: | |
| for item in items: | |
| _hash = item.get_hash() | |
| if _hash in hash_map: | |
| hash_map[_hash].append(item) | |
| else: | |
| hash_map[_hash] = [ item ] | |
| self._group_list = [ ] | |
| for i_hash, items in hash_map.items(): | |
| if len(items) > 1: | |
| self._group_list.append(items) | |
| return self._group_list |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment