Skip to content

Instantly share code, notes, and snippets.

@sahrendt0
Created October 9, 2012 22:01
Show Gist options
  • Save sahrendt0/3861709 to your computer and use it in GitHub Desktop.
Save sahrendt0/3861709 to your computer and use it in GitHub Desktop.
GEN 220 Problem Set 2
#!/usr/bin/perl -w
# Script: ps2a.pl
# Description: GEN220 homework set 2, questions 1-4
# Author: Steven Ahrendt
# email: sahre001@ucr.edu
# Due: 10.9.12
##############################
# [x]: Q1 (length of sequence & num of codons)
# [x]: Q2 (first start codon & length of 5' UTR)
# [x]: Q3 (first stop codon)
# [x]: Q4 (length of: CDS, predicted protein, & 3' UTR)
################################
use strict;
my $infile = shift;
## Read in and process sequence
open(FAS,"<$infile") or die "Can't open file \"$infile\"...\n";
my $seqlen = 0;
my $sequence = "";
foreach my $line (<FAS>)
{
next if ($line =~ m/^>/);
chomp $line;
$sequence = join("",$sequence,$line);
}
close(FAS);
$seqlen = length($sequence);
print "This sequence is of length $seqlen bp and there are ".($seqlen/3)." codons.\n"; # Answer to Q1
## Q2/3/4: CDS and UTRs
my $ind = index($sequence, "ATG");
my $start = $ind; # index of start codon
my $stop = -1; # index of stop codon (-1 for now)
while($ind < length($sequence))
{
my $codon = substr($sequence,$ind,3);
if(($codon eq "TAA") || ($codon eq "TAG") || ($codon eq "TGA"))
{
$stop = ($ind+1); # +1 offset because sequences start at 1, not 0
last;
}
$ind = $ind+3;
}
print "The first start codon is at basepair ".($start+1).", and the in-frame stop codon is at basepair $stop.\n";
my $UTR5 = $start; # In this case, index of start codon also gives length of 5' UTR
my $UTR3 = $seqlen-($stop+3)+1; # +3 offset because 3' UTR does not contain stop codon
print "The 5'UTR is $UTR5 bp long and the 3' UTR is $UTR3 bp long.\n";
my $CDS = $seqlen-$UTR5-$UTR3;
print "The CDS length is $CDS bp and the predicted protein length is ".(($CDS-3)/3).".\n"; # -3 offset because protein does not contain stop codon
#!/usr/bin/perl -w
# Script: ps2b.pl
# Description: GEN220 homework set 2, questions 5-7
# Author: Steven Ahrendt
# email: sahre001@ucr.edu
# Due: 10.9.12
##############################
# [x]: Q5 (alignment hash: name & length of seqs)
# [x]: Q6 (num of gapped columns in alignment)
# [x]: Q7 (identical residues b/w two seqs)
################################
use strict;
## Q5/6/7: Alignment
my %alignment;
$alignment{AAC35278} = "LLIAITYYNEDKVLTARTLHGVMQNPAWQKIVVCLVFDGIDPVLATIGV-VMKKDVDGKE";
$alignment{AnCSMA} = "AMCLVTCYSEGEEGIRTTLDSIALTPN-SHKSIVVICDGIIKVLRMMRD-TGSKRHNMAK";
$alignment{AfCHSF} = "ALCLVTCYSEGEEGIRTTLDSIAMTPN-SHKTIIVICDGIIKVLRMMRD-TGSKRHNMAK";
$alignment{AAF19527} = "AILLVTAYSEGELGIRTTLDSIATTPN-SHKTILVICDGIIKVLGMMKD-RGSKRHNMAK";
$alignment{"P30573-1"} = "TINLVTCYSEDEEGIRITLDSIATTPN-SHKLILVICDGIIKVLDMMSDAQGSKRHNMAK";
## Loop through the sequences in the hash
# For each sequence, get the number of residues and the number of gaps
my $num_gaps = 0; # Total number of gapped columns
my @gap_columns; # Store the indices of the columns with gaps
foreach my $key (keys %alignment)
{
my @cols = split("",$alignment{$key});
my $gaps = 0; # Number of gaps in this sequence
for(my $i =0; $i < scalar @cols; $i++)
{
if($cols[$i] eq "-")
{
$gaps++;
my $present = 0;
foreach my $index (@gap_columns)
{
if($index == $i){$present = 1;} # If the index exists in the array
}
if(!$present){push(@gap_columns,$i);}
}
}
print $key," ",length($alignment{$key})-$gaps,"\n"; # Answer to Q5
}
print "The number of gapped columns in this alignment is ".scalar(@gap_columns).".\n"; # Answer to Q6
## Nested loops through the hash
# Compare each sequence to the one after it (pairwise)
my @a_keys = keys %alignment;
for(my $fc = 0; $fc < scalar(@a_keys); $fc++) # fc = first counter
{
for(my $sc = $fc+1; $sc < scalar(@a_keys); $sc++) # sc = second counter
{
my @first = split("",$alignment{$a_keys[$fc]}); # First sequence
my @second = split("",$alignment{$a_keys[$sc]}); # Second sequence
my $pos = 0; # Position in the sequence
my $ident = 0; # Num of identical residues
while ($pos < scalar @first)
{
if($first[$pos] eq $second[$pos]){$ident++;}
$pos++;
}
print "Identical residues between $a_keys[$fc] and $a_keys[$sc] is $ident.\n"; # Answer to Q7
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment