Add sequencing runs CLI argument

This commit is contained in:
2021-02-28 02:23:58 +01:00
parent ae5014fd74
commit 42aadb1e28
2 changed files with 21 additions and 28 deletions

View File

@@ -12,37 +12,32 @@ generate_repertoires <- function(number_of_sequences) {
return(b_chain)
}
process_chain <- function(repertoire) {
# TODO save also v_call and j_call
preprocess_data <- function(repertoire, sequencing_runs) {
sequences <- as.character(repertoire$sequence)
counts <- as.integer(repertoire$counts)
reads <- Biostrings::DNAStringSet(rep(sequences, counts))
reads <- Biostrings::DNAStringSet(rep(sequences, sequencing_runs))
names(reads) <- seq_len(length(reads))
reverse_complement <- Biostrings::reverseComplement(reads)
return(reverse_complement)
}
preprocess_data <- function(repertoires) {
filtered_repertoires <- lapply(repertoires, process_chain)
names(filtered_repertoires) <- names(repertoires)
return(filtered_repertoires)
}
save_data <- function(repertoires) {
for (chain in names(repertoires)) {
file_name <- paste("data/", chain, ".fastq", sep = "")
Biostrings::writeXStringSet(repertoires[[chain]], file_name, format = "fastq")
}
save_data <- function(repertoire) {
file_name <- "data/sequence.fastq"
# TODO Change format to fasta
Biostrings::writeXStringSet(repertoire, file_name, format = "fastq")
}
parse_cli_arguments <- function(args) {
if (length(args) != 1) {
stop("usage: repertoire.r <number of sequences>")
if (length(args) != 2) {
stop("usage: repertoire.r <number of sequences> <sequencing_runs>")
}
return(as.integer(args[1]))
return(c(args[1], args[2]))
}
args <- commandArgs(trailingOnly = TRUE)
number_of_sequences <- parse_cli_arguments(args)
sim_repertoire <- generate_repertoires(number_of_sequences)
processed_data <- preprocess_data(sim_repertoire)
parameters <- parse_cli_arguments(args)
number_of_sequences <- as.integer(parameters[1])
sequencing_runs <- as.integer(parameters[2])
repertoire <- generate_repertoires(number_of_sequences)
processed_data <- preprocess_data(repertoire, sequencing_runs)
save_data(processed_data)