Skip to content

Instantly share code, notes, and snippets.

@f6v
Created June 7, 2020 21:06
Show Gist options
  • Select an option

  • Save f6v/71dabc50f397b21e2f34de5f1d19aaf3 to your computer and use it in GitHub Desktop.

Select an option

Save f6v/71dabc50f397b21e2f34de5f1d19aaf3 to your computer and use it in GitHub Desktop.

Revisions

  1. f6v created this gist Jun 7, 2020.
    37 changes: 37 additions & 0 deletions overlap_greedy.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,37 @@
    import itertools
    from Bio import SeqIO

    def overlap(read_a, read_b, min_length):
    start = 0
    while True:
    start = read_a.find(read_b[:min_length], start)
    if start == -1:
    return 0
    if read_b.startswith(read_a[start:]):
    return len(read_a) - start
    start += 1

    def pick_maximal_overlap(reads, min_overlap):
    read_a, read_b = None, None
    best_olen = 0
    for a, b in itertools.permutations(reads, 2):
    olen = overlap(a, b, min_overlap)
    if olen > best_olen:
    read_a, read_b = a, b
    best_olen = olen

    return read_a, read_b, best_olen

    def shortestSuperstring(file_name):
    sequences = SeqIO.parse(file_name, 'fasta')
    reads = list(map(lambda seq_rec: str(seq_rec.seq), sequences))
    min_overlap = len(reads[0]) // 2 + 1

    read_a, read_b, overlap_len = pick_maximal_overlap(reads, min_overlap)
    while overlap_len > 0:
    reads.remove(read_a)
    reads.remove(read_b)
    reads.append(read_a + read_b[overlap_len:])
    read_a, read_b, overlap_len = pick_maximal_overlap(reads, min_overlap)

    return ''.join(reads)