Skip to content

Instantly share code, notes, and snippets.

@vestalisvirginis
Last active September 1, 2019 11:13
Show Gist options
  • Save vestalisvirginis/131201d629fedd5fc20c969bb2d1b210 to your computer and use it in GitHub Desktop.
Save vestalisvirginis/131201d629fedd5fc20c969bb2d1b210 to your computer and use it in GitHub Desktop.
dna manipulation functions
from itertools import product
from functional import seq
def all_kmers(k):
'''return list of all dna carthesien products of length k'''
all = list(product('ACGT', repeat=k))
return seq(all).map(lambda x: ''.join(x)).to_list()
def kmer_per_segment(dna_segment, k):
'''return all the dna substrings of length k of the different dna strings in dna list'''
return [dna_segment[i:i+k] for i in range(len(dna_segment)-(k-1))]
def median_distance(dna, k, list_kmers):
'''return the first substring of length k encounters with the lowest substring/dna distance'''
distance = float('inf')
median = (str)
for kmer in all_kmers(k):
sum_distance = 0
for i in dna:
distance_per_kmer = float('inf')
for mer in kmer_per_segment(i, k):
d = lv.distance(mer,kmer)
if distance_per_kmer > d:
distance_per_kmer = d
sum_distance += distance_per_kmer
if distance > sum_distance:
distance = sum_distance
median = kmer
return distance, median
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment