diff --git a/Code/FrequentWords.py b/Code/FrequentWords.py new file mode 100644 index 0000000..06ef4b2 --- /dev/null +++ b/Code/FrequentWords.py @@ -0,0 +1,20 @@ +def FrequentWords(Text, k): + words = [] + freq = FrequencyMap(Text, k) + m = max(freq.values()) + for key in freq: + if freq[key] == m: + words.append(key) + return words + + +def FrequencyMap(Text, k): + freq = {} + n = len(Text) + for i in range(n - k + 1): + Pattern = Text[i:i + k] + freq[Pattern] = 0 + for i in range(n - k + 1): + Pattern = Text[i:i + k] + freq[Pattern] += 1 + return freq diff --git a/Notebook.org b/Notebook.org index 5e37ce7..81ffd9b 100644 --- a/Notebook.org +++ b/Notebook.org @@ -21,4 +21,5 @@ #+end_src *** Vocabulary - - k-mer: subsquences of length /k/ in a biological sequence + - k-mer: subsquences of length /k/ in a biological sequence + - Frequency map: sequence --> frequency of the sequence