Compare commits

..

2 Commits

Author SHA1 Message Date
53dda36c66
Export the data to FASTQ files 2021-02-26 02:20:11 +01:00
b035c496f7
Filter the relevant columns from immuneSIM output 2021-02-26 02:19:40 +01:00
2 changed files with 25 additions and 0 deletions

View File

@ -6,6 +6,7 @@ mkShell {
buildInputs = [ buildInputs = [
R R
rPackages.immuneSIM rPackages.immuneSIM
rPackages.Biostrings
jdk jdk
# Develoment tools # Develoment tools
rPackages.languageserver rPackages.languageserver

View File

@ -1,4 +1,5 @@
library(immuneSIM) library(immuneSIM)
library(Biostrings)
generate_repertoires <- function(number_of_sequences) { generate_repertoires <- function(number_of_sequences) {
a_chain <- immuneSIM( a_chain <- immuneSIM(
@ -18,6 +19,27 @@ generate_repertoires <- function(number_of_sequences) {
return(list("a_chain" = a_chain, "b_chain" = b_chain)) return(list("a_chain" = a_chain, "b_chain" = b_chain))
} }
process_chain <- function(repertoire) {
sequences <- as.character(repertoire$sequence)
counts <- as.integer(repertoire$counts)
reads <- Biostrings::DNAStringSet(rep(sequences, counts))
names(reads) <- seq_len(length(reads))
reverse_complement <- Biostrings::reverseComplement(reads)
return(reverse_complement)
}
preprocess_data <- function(repertoires) {
filtered_repertoires <- lapply(repertoires, process_chain)
names(filtered_repertoires) <- names(repertoires)
return(filtered_repertoires)
}
save_data <- function(repertoires) {
for (chain in names(repertoires)) {
file_name <- paste("data/", chain, ".fastq", sep = "")
Biostrings::writeXStringSet(repertoires[[chain]], file_name, format = "fastq")
}
}
parse_cli_arguments <- function(args) { parse_cli_arguments <- function(args) {
if (length(args) != 1) { if (length(args) != 1) {
@ -29,3 +51,5 @@ parse_cli_arguments <- function(args) {
args <- commandArgs(trailingOnly = TRUE) args <- commandArgs(trailingOnly = TRUE)
number_of_sequences <- parse_cli_arguments(args) number_of_sequences <- parse_cli_arguments(args)
sim_repertoire <- generate_repertoires(number_of_sequences) sim_repertoire <- generate_repertoires(number_of_sequences)
processed_data <- preprocess_data(sim_repertoire)
save_data(processed_data)