Amplify VDJ sequences to simplify parsing
This commit is contained in:
@@ -10,13 +10,14 @@ generate_repertoire <- function(number_of_sequences) {
|
||||
))
|
||||
}
|
||||
|
||||
save_data <- function(data) {
|
||||
save_data <- function(data, reads) {
|
||||
Biostrings::writeXStringSet(data$sequence, "data/sequence.fasta")
|
||||
vdj_sequences <- data[-1]
|
||||
write.csv(vdj_sequences, "data/vdj_alignment.csv", row.names = FALSE)
|
||||
amplified_vdj <- vdj_sequences[rep(seq_len(nrow(vdj_sequences)), reads), ]
|
||||
write.csv(amplified_vdj, "data/vdj_alignment.csv", row.names = FALSE)
|
||||
}
|
||||
|
||||
process_data <- function(repertoire) {
|
||||
process_data <- function(repertoire, reads) {
|
||||
columns <- c(
|
||||
"sequence", "v_sequence_alignment",
|
||||
"d_sequence_alignment", "j_sequence_alignment"
|
||||
@@ -24,17 +25,17 @@ process_data <- function(repertoire) {
|
||||
data <- repertoire[, columns]
|
||||
dna_sequence <- Biostrings::DNAStringSet(data$sequence)
|
||||
data$sequence <- Biostrings::reverseComplement(dna_sequence)
|
||||
save_data(data)
|
||||
save_data(data, reads)
|
||||
}
|
||||
|
||||
parse_cli_arguments <- function() {
|
||||
args <- commandArgs(trailingOnly = TRUE)
|
||||
if (length(args) != 1) {
|
||||
stop("usage: repertoire.r <number of sequences>")
|
||||
if (length(args) != 2) {
|
||||
stop("usage: repertoire.r <number of sequences> <sequencing runs>")
|
||||
}
|
||||
return(args[1])
|
||||
return(c(args[1], args[2]))
|
||||
}
|
||||
|
||||
args <- parse_cli_arguments()
|
||||
repertoire <- generate_repertoire(number_of_sequences = as.integer(args[1]))
|
||||
process_data(repertoire)
|
||||
process_data(repertoire = repertoire, reads = as.integer(args[2]))
|
||||
Reference in New Issue
Block a user