From f5609b5577496ef75d6634713c938a36d1327052 Mon Sep 17 00:00:00 2001 From: coolneng Date: Mon, 21 Oct 2019 17:35:30 +0200 Subject: [PATCH] Add ReverseComplement function --- Code/ReverseComplement.py | 17 +++++++++++++++++ Notebook.org | 22 +++++++++++++--------- 2 files changed, 30 insertions(+), 9 deletions(-) create mode 100644 Code/ReverseComplement.py diff --git a/Code/ReverseComplement.py b/Code/ReverseComplement.py new file mode 100644 index 0000000..de6b4ef --- /dev/null +++ b/Code/ReverseComplement.py @@ -0,0 +1,17 @@ +def ReverseComplement(Pattern): + Pattern = Reverse(Pattern) + Pattern = Complement(Pattern) + return Pattern + + +def Reverse(Pattern): + reversed = Pattern[::-1] + return reversed + + +def Complement(Pattern): + compl = "" + complement_letters = {"A": "T", "T": "A", "C": "G", "G": "C"} + for char in Pattern: + compl += complement_letters[char] + return compl diff --git a/Notebook.org b/Notebook.org index 81ffd9b..272959e 100644 --- a/Notebook.org +++ b/Notebook.org @@ -8,17 +8,21 @@ Locating an ori is key for gene therapy (e.g. viral vectors), to introduce a theraupetic gene. -**** Exercise: computational approach to find ori in bacteria +**** Exercises: computational approaches to find ori in Vibrio Cholerae - We'll look for the *DnaA box* sequence, using a sliding window, in that case our code would be the following: +***** Exercise: find Pattern + + We'll look for the *DnaA box* sequence, using a sliding window, in that case we will use the function [[./Code/Replication.py][Replication]] to find out how many times + does a sequence appear in the genome. + + For the second part, we're going to calculate the frequency map of the sequences of length /k/, for that purpose we'll use [[./Code/FrequentWords.py][FrequentWords]] + +***** Exercise: Find the reverse complement of a sequence + + We're going to generate the reverse complement of a sequence, which is the complement of a sequence, read in the same direction (5' -> 3'). + In this case, we're going to use [[./Code/ReverseComplement.py][ReverseComplement]] + After using our function on the Vibrio's Cholerae genome, we realize that some of the frequent k-mers are reverse complements of other frequent ones. - #+begin_src python - count = 0 - 
for i in range(len(Text)-len(Pattern)+1): - if Text[i:i+len(Pattern)] == Pattern: - count = count+1 - print(Pattern + ": " + count) - #+end_src *** Vocabulary - k-mer: subsquences of length /k/ in a biological sequence