From dc740e1c54634d5bb4a6711ed1ea11b7e4cb8a18 Mon Sep 17 00:00:00 2001 From: coolneng Date: Mon, 21 Oct 2019 17:35:44 +0200 Subject: [PATCH] Add PatternMatching function --- Code/PatternMatching.py | 6 ++++++ Notebook.org | 8 +++++++- 2 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 Code/PatternMatching.py diff --git a/Code/PatternMatching.py b/Code/PatternMatching.py new file mode 100644 index 0000000..c4e8225 --- /dev/null +++ b/Code/PatternMatching.py @@ -0,0 +1,6 @@ +def PatternMatching(Pattern, Genome): + positions = [] + for i in range(len(Genome)-len(Pattern)+1): + if Genome[i:i+len(Pattern)] == Pattern: + positions.append(i) + return positions diff --git a/Notebook.org b/Notebook.org index 272959e..6896000 100644 --- a/Notebook.org +++ b/Notebook.org @@ -21,8 +21,14 @@ We're going to generate the reverse complement of a sequence, which is the complement of a sequence, read in the same direction (5' -> 3'). In this case, we're going to use [[./Code/ReverseComplement.py][ReverseComplement]] - After using our function on the Vibrio's Cholerae genome, we realize that some of the frequent k-mers are reverse complements of other frequent ones. + After using our function on the Vibrio Cholerae's genome, we realize that some of the frequent k-mers are reverse complements of other frequent ones. +***** Exercise: Find a subsequence within a sequence + + We're going to find the ocurrences of a subsquence inside a sequence, and save the index of the first letter in the sequence. + This time, we'll use [[./Code/PatternMatching.py][PatternMatching]] + After using our function on the Vibrio Cholerae's genome, we find out that the /9-mers/ with the highest frequency appear in cluster. + This is strong statistical evidence that our subsequences are /DnaA boxes/. *** Vocabulary - k-mer: subsquences of length /k/ in a biological sequence