From dc740e1c54634d5bb4a6711ed1ea11b7e4cb8a18 Mon Sep 17 00:00:00 2001
From: coolneng <akasroua@gmail.com>
Date: Mon, 21 Oct 2019 17:35:44 +0200
Subject: [PATCH] Add PatternMatching function

---
 Code/PatternMatching.py | 6 ++++++
 Notebook.org            | 8 +++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 Code/PatternMatching.py

diff --git a/Code/PatternMatching.py b/Code/PatternMatching.py
new file mode 100644
index 0000000..c4e8225
--- /dev/null
+++ b/Code/PatternMatching.py
@@ -0,0 +1,6 @@
+def PatternMatching(Pattern, Genome):
+    positions = []
+    for i in range(len(Genome)-len(Pattern)+1):
+        if Genome[i:i+len(Pattern)] == Pattern:
+            positions.append(i)
+    return positions
diff --git a/Notebook.org b/Notebook.org
index 272959e..6896000 100644
--- a/Notebook.org
+++ b/Notebook.org
@@ -21,8 +21,14 @@
      
       We're going to generate the reverse complement of a sequence, which is the complement of a sequence, read in the same direction (5' -> 3').
       In this case, we're going to use [[./Code/ReverseComplement.py][ReverseComplement]] 
-      After using our function on the Vibrio's Cholerae genome, we realize that some of the frequent k-mers are reverse complements of other frequent ones.
+      After using our function on the Vibrio Cholerae's genome, we realize that some of the frequent k-mers are reverse complements of other frequent ones.
      
+***** Exercise: Find a subsequence within a sequence
+      
+      We're going to find the ocurrences of a subsquence inside a sequence, and save the index of the first letter in the sequence.
+      This time, we'll use [[./Code/PatternMatching.py][PatternMatching]] 
+      After using our function on the Vibrio Cholerae's genome, we find out that the /9-mers/ with the highest frequency appear in cluster.
+      This is strong statistical evidence that our subsequences are /DnaA boxes/.
 
 *** Vocabulary
       - k-mer: subsquences of length /k/ in a biological sequence