39 lines
1.0 KiB
R
39 lines
1.0 KiB
R
library(immuneSIM)
|
|
library(Biostrings)
|
|
|
|
generate_repertoire <- function(number_of_sequences) {
|
|
return(immuneSIM(
|
|
number_of_seqs = number_of_sequences,
|
|
species = "hs",
|
|
receptor = "tr",
|
|
chain = "b"
|
|
))
|
|
}
|
|
|
|
save_data <- function(data) {
|
|
Biostrings::writeXStringSet(data$sequence, "data/sequence.fasta")
|
|
vdj_sequences <- data[-1]
|
|
write.csv(vdj_sequences, "data/vdj_alignment.csv", row.names = FALSE)
|
|
}
|
|
|
|
process_data <- function(repertoire, sequencing_runs) {
|
|
columns <- c(
|
|
"sequence", "v_sequence_alignment",
|
|
"d_sequence_alignment", "j_sequence_alignment"
|
|
)
|
|
data <- repertoire[, columns]
|
|
data$sequence <- Biostrings::reverseComplement(data$sequence)
|
|
save_data(data)
|
|
}
|
|
|
|
parse_cli_arguments <- function() {
|
|
args <- commandArgs(trailingOnly = TRUE)
|
|
if (length(args) != 1) {
|
|
stop("usage: repertoire.r <number of sequences>")
|
|
}
|
|
return(args[1])
|
|
}
|
|
|
|
arguments <- parse_cli_arguments(commandArgs(trailing))
|
|
repertoire <- generate_repertoire(number_of_sequences = arguments[1])
|
|
process_data(repertoire, sequencing_runs) |