|
|
|
|
@@ -1,4 +1,5 @@
|
|
|
|
|
library(immuneSIM)
|
|
|
|
|
library(Biostrings)
|
|
|
|
|
|
|
|
|
|
generate_repertoires <- function(number_of_sequences) {
|
|
|
|
|
a_chain <- immuneSIM(
|
|
|
|
|
@@ -18,6 +19,27 @@ generate_repertoires <- function(number_of_sequences) {
|
|
|
|
|
return(list("a_chain" = a_chain, "b_chain" = b_chain))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
process_chain <- function(repertoire) {
|
|
|
|
|
sequences <- as.character(repertoire$sequence)
|
|
|
|
|
counts <- as.integer(repertoire$counts)
|
|
|
|
|
reads <- Biostrings::DNAStringSet(rep(sequences, counts))
|
|
|
|
|
names(reads) <- seq_len(length(reads))
|
|
|
|
|
reverse_complement <- Biostrings::reverseComplement(reads)
|
|
|
|
|
return(reverse_complement)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
preprocess_data <- function(repertoires) {
|
|
|
|
|
filtered_repertoires <- lapply(repertoires, process_chain)
|
|
|
|
|
names(filtered_repertoires) <- names(repertoires)
|
|
|
|
|
return(filtered_repertoires)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
save_data <- function(repertoires) {
|
|
|
|
|
for (chain in names(repertoires)) {
|
|
|
|
|
file_name <- paste("data/", chain, ".fastq", sep = "")
|
|
|
|
|
Biostrings::writeXStringSet(repertoires[[chain]], file_name, format = "fastq")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
parse_cli_arguments <- function(args) {
|
|
|
|
|
if (length(args) != 1) {
|
|
|
|
|
@@ -29,3 +51,5 @@ parse_cli_arguments <- function(args) {
|
|
|
|
|
args <- commandArgs(trailingOnly = TRUE)
|
|
|
|
|
number_of_sequences <- parse_cli_arguments(args)
|
|
|
|
|
sim_repertoire <- generate_repertoires(number_of_sequences)
|
|
|
|
|
processed_data <- preprocess_data(sim_repertoire)
|
|
|
|
|
save_data(processed_data)
|