Skip to content

Instantly share code, notes, and snippets.

@elsherbini
Last active March 3, 2017 15:34
Show Gist options
  • Save elsherbini/625df7983f2c90c2580b8f44ccb51150 to your computer and use it in GitHub Desktop.
Save elsherbini/625df7983f2c90c2580b8f44ccb51150 to your computer and use it in GitHub Desktop.
calculate parsimonious synonymous and nonsynonymous mutations for any two codons
import networkx as nx
def setup_codon_graph():
"""create a networkx graph of codons as nodes and mutations as codon_edges.
if the mutation is synonymous, the edgeweight is 0.01
if the mutation is non-synonymous, the edgeweight is 1.01
then, by calculating the shortest path length between two bases,
you can get the most parsimonious path
"""
codon_table = {
'AAA': 'K', 'AAC': 'N', 'AAG': 'K', 'AAT': 'N',
'ACA': 'T', 'ACC': 'T', 'ACG': 'T', 'ACT': 'T',
'AGA': 'R', 'AGC': 'S', 'AGG': 'R', 'AGT': 'S',
'ATA': 'I', 'ATC': 'I', 'ATG': 'M', 'ATT': 'I',
'CAA': 'Q', 'CAC': 'H', 'CAG': 'Q', 'CAT': 'H',
'CCA': 'P', 'CCC': 'P', 'CCG': 'P', 'CCT': 'P',
'CGA': 'R', 'CGC': 'R', 'CGG': 'R', 'CGT': 'R',
'CTA': 'L', 'CTC': 'L', 'CTG': 'L', 'CTT': 'L',
'GAA': 'E', 'GAC': 'D', 'GAG': 'E', 'GAT': 'D',
'GCA': 'A', 'GCC': 'A', 'GCG': 'A', 'GCT': 'A',
'GGA': 'G', 'GGC': 'G', 'GGG': 'G', 'GGT': 'G',
'GTA': 'V', 'GTC': 'V', 'GTG': 'V', 'GTT': 'V',
'TAC': 'Y', 'TAT': 'Y',
'TCA': 'S', 'TCC': 'S', 'TCG': 'S', 'TCT': 'S',
'TGC': 'C', 'TGG': 'W', 'TGT': 'C',
'TTA': 'L', 'TTC': 'F', 'TTG': 'L', 'TTT': 'F'}
def codon_edges():
for k in codon_table:
one_aways = get_one_aways(k)
for c in one_aways:
if c in codon_table:
if codon_table[k] == codon_table[c]:
yield (k, c, 0.01)
else:
yield(k, c, 1.01)
def get_one_aways(k):
one_aways = []
l = list(k)
for i, p in enumerate(l):
for n in ["A", "C", "G", "T"]:
if n != p:
copy = l.copy()
copy[i] = n
one_aways.append("".join(copy))
return one_aways
codon_graph = nx.Graph()
codon_graph.add_nodes_from(codon_table.keys())
codon_graph.add_weighted_edges_from(codon_edges())
for k, v in codon_table.items():
codon_graph.node[k]['translation'] = v
return codon_graph
def get_ns_and_s(codon1, codon2, codon_graph):
"""get # of synonymous and non-synonymous mutations for a codon pair.
this is kinda crappy. the edge weights are decimal numbers,
with the integer part representing the non-synonymous mutations
and the part after the decimal representing the total mutations.
gross.
"""
distance = nx.shortest_path_length(codon_graph, "CTT", "TCC", weight="weight")
ns = int(distance)
s = round((distance - ns) * 100)
return (s, ns)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment