Skip to content

Instantly share code, notes, and snippets.

@sinamajidian
Last active April 15, 2026 09:14
Show Gist options
  • Select an option

  • Save sinamajidian/aba4749c1873ad808375050300166c3a to your computer and use it in GitHub Desktop.

Select an option

Save sinamajidian/aba4749c1873ad808375050300166c3a to your computer and use it in GitHub Desktop.

Revisions

  1. sinamajidian renamed this gist Apr 15, 2026. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  2. sinamajidian revised this gist Apr 15, 2026. 1 changed file with 6 additions and 1 deletion.
    7 changes: 6 additions & 1 deletion run_EdgeHOG.md
    Original file line number Diff line number Diff line change
    @@ -1 +1,6 @@
    How to tun EdgeHOG https://github.com/DessimozLab/edgehog in jupyter notebook!


    ## How to tun EdgeHOG in jupyter notebook!

    https://github.com/DessimozLab/edgehog

  3. sinamajidian renamed this gist Apr 15, 2026. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  4. sinamajidian revised this gist Apr 15, 2026. 1 changed file with 297 additions and 0 deletions.
    297 changes: 297 additions & 0 deletions run_edgehog.ipynb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,297 @@
    {
    "cells": [
    {
    "cell_type": "code",
    "execution_count": 1,
    "id": "f29813af-81ed-4ed1-bc9c-49c7757714e5",
    "metadata": {},
    "outputs": [
    {
    "name": "stdout",
    "output_type": "stream",
    "text": [
    "Hello!\n"
    ]
    }
    ],
    "source": [
    "print('Hello!')"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 2,
    "id": "fcd8eebc-d467-4006-a6d9-ee78ded8e66c",
    "metadata": {},
    "outputs": [],
    "source": [
    "import edgehog\n",
    "from edgehog.process_hogs import map_hogs_onto_tree, get_hogxml_entries\n",
    "from edgehog.characterize_species_tree import characterize_tree\n",
    "from edgehog.init_extant_synteny_graphs import init_extant_graphs, init_extant_graphs_from_hdf5\n",
    "from edgehog.infer_ancestral_synteny_graphs import leaves_to_root_synteny_propagation, root_to_leaves_edge_trimming, linearize_graphs\n",
    "from edgehog.write_output import write_output, write_as_hdf5\n",
    "from edgehog.check_args import check_args\n",
    "#from .add_ons import date_edges, phylostratify"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 3,
    "id": "28ad008e-b4b4-4804-92c6-143ea44ea48e",
    "metadata": {},
    "outputs": [
    {
    "data": {
    "text/plain": [
    "_StoreAction(option_strings=['--out-format'], dest='out_format', nargs=None, const=None, default='TSV', type=None, choices=('TSV', 'HDF5'), required=False, help='define output format. Can be TSV (tab seperated files) or HDF5 (compatible for integration into oma hdf5)', metavar=None)"
    ]
    },
    "execution_count": 3,
    "metadata": {},
    "output_type": "execute_result"
    }
    ],
    "source": [
    "import argparse\n",
    "arg_parser = argparse.ArgumentParser(description='edgehog is a software tool that infers an ancestral synteny graphat each internal node of an input species phylogenetic tree')\n",
    "#arg_parser.add_argument('--version', action='version', help='print version number and exit', version=version)\n",
    "arg_parser.add_argument('--output_directory', default='./edgehog_output', type=str, help='path to output directory (default is ./edgehog_output)')\n",
    "arg_parser.add_argument('--species_tree', type=str, required=True, help='path to species/genomes phylogenetic tree (newick format)')\n",
    "arg_parser.add_argument('--hogs', type=str, required=True, help='path to the HierarchicalGroups.orthoxml file in which HOGs are stored')\n",
    "arg_parser.add_argument('--gff_directory', type=str, help='path to directory with the gffs of extant genomes '\n",
    " '(each gff file must be named according to the name of an extant genome / leaf on the species tree)')\n",
    "arg_parser.add_argument('--hdf5', type=str, help='path to the hdf5 file (alternative to gff_directory to run edgeHOG on the entire OMA database)')\n",
    "arg_parser.add_argument('--orient_edges', action='store_true', help='whether the transcriptional orientation of edges should be predicted') \n",
    "arg_parser.add_argument('--date_edges', action='store_true', help='whether the age of edges should be predicted')\n",
    "arg_parser.add_argument('--phylostratify', action='store_true', help='whether the number of edge retention, gain and loss should be analyzed for each node of the species tree')\n",
    "arg_parser.add_argument('--max_gaps', type=int, help='max_gaps can be seen as the theoritical maximal number of consecutive novel genes that can emerge between two older genes (default = 3), '\n",
    " 'e.g. if max_gaps = 2: the probabilistic A-b-c-D-E-f-g-h-I-J graph will be turn into A-D-E ; I-J in the ancestor'\n",
    " 'while if max_gaps = 3: the probabilistic A-b-c-D-E-f-g-h-I-J graph will be turn into A-D-E-I-J in the ancestor', default=3)\n",
    "arg_parser.add_argument('--include_extant_genes', action='store_true', help='whether to use a concatenation of all descending extant genes to describe an ancestral gene in the output files')\n",
    "arg_parser.add_argument(\"--out-format\", choices=(\"TSV\", \"HDF5\"), default=\"TSV\",\n",
    " help=\"define output format. Can be TSV (tab seperated files) or HDF5 (compatible for integration into oma hdf5)\")\n",
    "#args = arg_parser.parse_args()\n",
    "\n",
    "\n"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 4,
    "id": "2e18812b-1e62-4089-ab11-f5feadabcee2",
    "metadata": {},
    "outputs": [
    {
    "name": "stdout",
    "output_type": "stream",
    "text": [
    "###################################\n",
    "Running edgehog 0.1.10\n",
    "###################################\n",
    "Checking arguments ...\n",
    "output directory: '/work/FAC/FBM/DBC/cdessim2/default/smajidi1/qfo/proteome2025/run_test_edgehog/edgehog_run/out'\n",
    "species tree: 'species_tree_checked.nwk'\n",
    "HOGs.orthoxml: 'FastOMA_HOGs.orthoxml'\n",
    "gff directory: 'gff_dir'\n",
    "###################################\n",
    "Initializing synteny graphs of extant genomes ...\n",
    "processing extant genome 1/4: MOUSE\n",
    "processing extant genome 2/4: PANTR\n",
    "processing extant genome 3/4: HUMAN\n",
    "processing extant genome 4/4: GORGO\n"
    ]
    },
    {
    "data": {
    "text/plain": [
    "<pyham.ham.Ham at 0x7f9c96e9e970>"
    ]
    },
    "execution_count": 4,
    "metadata": {},
    "output_type": "execute_result"
    }
    ],
    "source": [
    "arguments=\" --species_tree species_tree_checked.nwk --hogs FastOMA_HOGs.orthoxml --output_directory out --gff_directory gff_dir \"\n",
    "args = arg_parser.parse_args(arguments.split())\n",
    "\n",
    "out_dir = check_args(args)\n",
    "ham = map_hogs_onto_tree(args.hogs, args.species_tree, args.hdf5)\n",
    "hogxml_entries, protein_id_to_hogxml_entry = get_hogxml_entries(ham)\n",
    "characterize_tree(ham.taxonomy.tree)\n",
    "init_extant_graphs(1, ham, args.hogs, args.gff_directory, hogxml_entries, protein_id_to_hogxml_entry, args.orient_edges)\n",
    "# if args.hdf5:\n",
    "# init_extant_graphs_from_hdf5(ham, args.hdf5, args.orient_edges)\n"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 5,
    "id": "74602e6e-62fa-4b9e-8854-4626699e5db3",
    "metadata": {},
    "outputs": [
    {
    "name": "stdout",
    "output_type": "stream",
    "text": [
    "###################################\n",
    "Bottom-up_phase: propagate count of adjacencies observed between two genes from children nodes to their parent node on the species tree (as long as the parent node has the two ancestral genes)\n",
    "processing ancestral genome 1/2: Homininae\n",
    "processing ancestral genome 2/2: Theria\n",
    "###################################\n",
    "Top-down phase: prune any adjacency propagated before the last ancestor in which this adjacency is inferred to have emerged \n",
    "processing ancestral genome 1/2: Theria\n",
    "processing ancestral genome 2/2: Homininae\n",
    "###################################\n",
    "Linearization: infer the most likely ancestral genome ...\n",
    "processing ancestral genome 1/2: Theria\n",
    "processing ancestral genome 2/2: Homininae\n",
    "###################################\n",
    "Writing output files ...\n",
    "/work/FAC/FBM/DBC/cdessim2/default/smajidi1/qfo/proteome2025/run_test_edgehog/edgehog_run/out\n"
    ]
    }
    ],
    "source": [
    "leaves_to_root_synteny_propagation(ham, args.max_gaps, args.orient_edges)\n",
    "root_to_leaves_edge_trimming(ham, args.orient_edges)\n",
    "# if args.date_edges:\n",
    "# date_edges(ham)\n",
    "linearize_graphs(ham)\n",
    "# if args.phylostratify:\n",
    "# phylostratify(ham)\n",
    "\n",
    "# if args.out_format == \"HDF5\":\n",
    "# write_as_hdf5(args, ham, out_dir)\n",
    "\n",
    "\n",
    "write_output(args, ham, out_dir)\n",
    "print(out_dir)"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": null,
    "id": "ebb09a44-4609-48ff-850b-a1fb0608e5ee",
    "metadata": {},
    "outputs": [],
    "source": []
    },
    {
    "cell_type": "code",
    "execution_count": 8,
    "id": "d70f3bc8-3e9d-486f-8d37-dc4ffbad6911",
    "metadata": {},
    "outputs": [],
    "source": [
    "import pyham\n",
    "from edgehog.write_output import label_nodes\n",
    "from edgehog.write_output import graph_to_df\n",
    "\n",
    "for tree_node in ham.taxonomy.tree.traverse('preorder'):\n",
    " genome = tree_node.genome\n",
    " if isinstance(genome, pyham.AncestralGenome):\n",
    " label_dict, annotation_dict = label_nodes(tree_node.bottom_up_synteny)\n",
    " df = graph_to_df(tree_node.linear_synteny, genome, args.date_edges, args.orient_edges, label_dict,\n",
    " annotation_dict, args.include_extant_genes)\n"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 10,
    "id": "381eedc0-c579-45a2-915b-b01e77d3b6a5",
    "metadata": {},
    "outputs": [
    {
    "data": {
    "text/html": [
    "<div>\n",
    "<style scoped>\n",
    " .dataframe tbody tr th:only-of-type {\n",
    " vertical-align: middle;\n",
    " }\n",
    "\n",
    " .dataframe tbody tr th {\n",
    " vertical-align: top;\n",
    " }\n",
    "\n",
    " .dataframe thead th {\n",
    " text-align: right;\n",
    " }\n",
    "</style>\n",
    "<table border=\"1\" class=\"dataframe\">\n",
    " <thead>\n",
    " <tr style=\"text-align: right;\">\n",
    " <th></th>\n",
    " <th>gene1</th>\n",
    " <th>gene2</th>\n",
    " <th>weight</th>\n",
    " <th>contiguous_region</th>\n",
    " <th>nb_internal_nodes_from_ancestor_with_updated_weight</th>\n",
    " <th>supporting_children</th>\n",
    " </tr>\n",
    " </thead>\n",
    " <tbody>\n",
    " <tr>\n",
    " <th>0</th>\n",
    " <td>HOG_HOG:0008382_2</td>\n",
    " <td>HOG_HOG:0004218.1a_2</td>\n",
    " <td>2.0</td>\n",
    " <td>0.0</td>\n",
    " <td>0.0</td>\n",
    " <td>GORGO;HUMAN</td>\n",
    " </tr>\n",
    " </tbody>\n",
    "</table>\n",
    "</div>"
    ],
    "text/plain": [
    " gene1 gene2 weight contiguous_region \\\n",
    "0 HOG_HOG:0008382_2 HOG_HOG:0004218.1a_2 2.0 0.0 \n",
    "\n",
    " nb_internal_nodes_from_ancestor_with_updated_weight supporting_children \n",
    "0 0.0 GORGO;HUMAN "
    ]
    },
    "execution_count": 10,
    "metadata": {},
    "output_type": "execute_result"
    }
    ],
    "source": [
    "df.head(1)"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": null,
    "id": "18e8f709-728b-4247-b0fb-3fa23ef684ec",
    "metadata": {},
    "outputs": [],
    "source": []
    }
    ],
    "metadata": {
    "kernelspec": {
    "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
    },
    "language_info": {
    "codemirror_mode": {
    "name": "ipython",
    "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.9.23"
    }
    },
    "nbformat": 4,
    "nbformat_minor": 5
    }
  5. sinamajidian created this gist Apr 15, 2026.
    1 change: 1 addition & 0 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1 @@
    How to tun EdgeHOG https://github.com/DessimozLab/edgehog in jupyter notebook!