Compare commits

..

No commits in common. "edc4a7242508af92fe07b57ffbde1ec57d7f36b4" and "34722f3ebf53365140a22e1c5a60d8f1a299d6fa" have entirely different histories.

4 changed files with 37 additions and 24 deletions

1
.gitignore vendored
View File

@ -1,2 +1 @@
*.txt *.txt
*.fastq

View File

@ -1,19 +1,21 @@
#!/bin/sh #!/bin/sh
usage() { usage() {
echo "usage: generation.sh <number of sequences> <sequencing runs>" echo "usage: generation.sh <number of sequences>"
exit 1 exit 1
} }
if [ $# != 2 ]; then if [ $# != 1 ]; then
usage usage
fi fi
sequences=$1 sequences=$1
sequencing_runs=$2
data_directory="data/" data_directory="data/"
file="sequence.fastq"
prefix="curesim_" prefix="curesim_"
Rscript src/repertoire.r "$sequences" "$sequencing_runs" Rscript src/repertoire.r "$sequences"
java -jar tools/CuReSim.jar -f "$data_directory$file" -o "$data_directory$prefix$file"
for file in "$data_directory"*.fastq; do
file_name=$(echo "$file" | cut -d / -f 2)
java -jar tools/CuReSim.jar -f "$file" -o "$data_directory$prefix$file_name"
done

View File

@ -8,7 +8,7 @@ mkShell {
rPackages.immuneSIM rPackages.immuneSIM
rPackages.Biostrings rPackages.Biostrings
jdk jdk
# Development tools # Develoment tools
rPackages.languageserver rPackages.languageserver
rPackages.lintr rPackages.lintr
]; ];

View File

@ -2,6 +2,13 @@ library(immuneSIM)
library(Biostrings) library(Biostrings)
generate_repertoires <- function(number_of_sequences) { generate_repertoires <- function(number_of_sequences) {
a_chain <- immuneSIM(
number_of_seqs = number_of_sequences,
species = "hs",
receptor = "tr",
chain = "a",
verbose = TRUE
)
b_chain <- immuneSIM( b_chain <- immuneSIM(
number_of_seqs = number_of_sequences, number_of_seqs = number_of_sequences,
species = "hs", species = "hs",
@ -9,35 +16,40 @@ generate_repertoires <- function(number_of_sequences) {
chain = "b", chain = "b",
verbose = TRUE verbose = TRUE
) )
return(b_chain) return(list("a_chain" = a_chain, "b_chain" = b_chain))
} }
# TODO save also v_call and j_call process_chain <- function(repertoire) {
preprocess_data <- function(repertoire, sequencing_runs) {
sequences <- as.character(repertoire$sequence) sequences <- as.character(repertoire$sequence)
reads <- Biostrings::DNAStringSet(rep(sequences, sequencing_runs)) counts <- as.integer(repertoire$counts)
reads <- Biostrings::DNAStringSet(rep(sequences, counts))
names(reads) <- seq_len(length(reads)) names(reads) <- seq_len(length(reads))
reverse_complement <- Biostrings::reverseComplement(reads) reverse_complement <- Biostrings::reverseComplement(reads)
return(reverse_complement) return(reverse_complement)
} }
save_data <- function(repertoire) { preprocess_data <- function(repertoires) {
file_name <- "data/sequence.fastq" filtered_repertoires <- lapply(repertoires, process_chain)
# TODO Change format to fasta names(filtered_repertoires) <- names(repertoires)
Biostrings::writeXStringSet(repertoire, file_name, format = "fastq") return(filtered_repertoires)
}
save_data <- function(repertoires) {
for (chain in names(repertoires)) {
file_name <- paste("data/", chain, ".fastq", sep = "")
Biostrings::writeXStringSet(repertoires[[chain]], file_name, format = "fastq")
}
} }
parse_cli_arguments <- function(args) { parse_cli_arguments <- function(args) {
if (length(args) != 2) { if (length(args) != 1) {
stop("usage: repertoire.r <number of sequences> <sequencing_runs>") stop("usage: repertoire.r <number of sequences>")
} }
return(c(args[1], args[2])) return(as.integer(args[1]))
} }
args <- commandArgs(trailingOnly = TRUE) args <- commandArgs(trailingOnly = TRUE)
parameters <- parse_cli_arguments(args) number_of_sequences <- parse_cli_arguments(args)
number_of_sequences <- as.integer(parameters[1]) sim_repertoire <- generate_repertoires(number_of_sequences)
sequencing_runs <- as.integer(parameters[2]) processed_data <- preprocess_data(sim_repertoire)
repertoire <- generate_repertoires(number_of_sequences)
processed_data <- preprocess_data(repertoire, sequencing_runs)
save_data(processed_data) save_data(processed_data)