Skip to content

Instantly share code, notes, and snippets.

@HowardOhMyGod
Created September 23, 2018 05:50
Show Gist options
  • Select an option

  • Save HowardOhMyGod/6c8b14b0d977c5258b671723d1beb1e3 to your computer and use it in GitHub Desktop.

Select an option

Save HowardOhMyGod/6c8b14b0d977c5258b671723d1beb1e3 to your computer and use it in GitHub Desktop.
Run RasMMA in multi-processes for a range of families.
import sys
from multiprocessing import Pool, Manager
extract_family_range = [1, 15]
family_folder_path = '/home/master/r07725027/dataset/aries_v2_simplified_15up'
output_path_root = "/home/master/r07725027/dataset/rasMMA-output"
pickle_dir = ''
families = os.listdir(family_folder_path)
# return family directory within range
def get_family_names():
family_names = []
for family in families:
(family_num, family_name) = family.split('.')
family_num = int(family_num)
if extract_family_range[0] <= family_num <= extract_family_range[1]:
family_names.append(family)
return family_names
# extract function for multiprocessing
def extract(family_name, error_messages):
global pickle_dir
data_directory = f'{family_folder_path}/{family_name}/'
tag = family_name.split('.')[1] + "_0.8" # used for naming pickle
output_path = f'{output_path_root}/{tag}/'
pickle_dir = f'{output_path}pickle/'
try:
main(data_directory, tag, output_path, manualThresholdNumber)
except Exception as e:
error_type = sys.exc_info()[0]
error = str(e)
print('Error: ' + family_name, type(e).__name__, ': ', error)
error_messages.put(f'{tag} -> {type(e).__name__}: {error}')
def main_extract():
# shared memory error list between processes
manager = Manager()
error_messages = manager.Queue()
# use multiprocess
with Pool(processes = 15) as pool:
pool.starmap(extract, [(family, error_messages) for family in get_family_names()])
print('\n----- Error Messages -----')
while error_messages.empty() is False:
print(error_messages.get())
main_extract()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment