Compare commits
3 Commits
34722f3ebf
...
edc4a72425
Author | SHA1 | Date | |
---|---|---|---|
edc4a72425 | |||
42aadb1e28 | |||
ae5014fd74 |
1
.gitignore
vendored
1
.gitignore
vendored
@ -1 +1,2 @@
|
|||||||
*.txt
|
*.txt
|
||||||
|
*.fastq
|
||||||
|
@ -1,21 +1,19 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
usage() {
|
usage() {
|
||||||
echo "usage: generation.sh <number of sequences>"
|
echo "usage: generation.sh <number of sequences> <sequencing runs>"
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
if [ $# != 1 ]; then
|
if [ $# != 2 ]; then
|
||||||
usage
|
usage
|
||||||
fi
|
fi
|
||||||
|
|
||||||
sequences=$1
|
sequences=$1
|
||||||
|
sequencing_runs=$2
|
||||||
data_directory="data/"
|
data_directory="data/"
|
||||||
|
file="sequence.fastq"
|
||||||
prefix="curesim_"
|
prefix="curesim_"
|
||||||
|
|
||||||
Rscript src/repertoire.r "$sequences"
|
Rscript src/repertoire.r "$sequences" "$sequencing_runs"
|
||||||
|
java -jar tools/CuReSim.jar -f "$data_directory$file" -o "$data_directory$prefix$file"
|
||||||
for file in "$data_directory"*.fastq; do
|
|
||||||
file_name=$(echo "$file" | cut -d / -f 2)
|
|
||||||
java -jar tools/CuReSim.jar -f "$file" -o "$data_directory$prefix$file_name"
|
|
||||||
done
|
|
||||||
|
@ -8,7 +8,7 @@ mkShell {
|
|||||||
rPackages.immuneSIM
|
rPackages.immuneSIM
|
||||||
rPackages.Biostrings
|
rPackages.Biostrings
|
||||||
jdk
|
jdk
|
||||||
# Develoment tools
|
# Development tools
|
||||||
rPackages.languageserver
|
rPackages.languageserver
|
||||||
rPackages.lintr
|
rPackages.lintr
|
||||||
];
|
];
|
||||||
|
@ -2,13 +2,6 @@ library(immuneSIM)
|
|||||||
library(Biostrings)
|
library(Biostrings)
|
||||||
|
|
||||||
generate_repertoires <- function(number_of_sequences) {
|
generate_repertoires <- function(number_of_sequences) {
|
||||||
a_chain <- immuneSIM(
|
|
||||||
number_of_seqs = number_of_sequences,
|
|
||||||
species = "hs",
|
|
||||||
receptor = "tr",
|
|
||||||
chain = "a",
|
|
||||||
verbose = TRUE
|
|
||||||
)
|
|
||||||
b_chain <- immuneSIM(
|
b_chain <- immuneSIM(
|
||||||
number_of_seqs = number_of_sequences,
|
number_of_seqs = number_of_sequences,
|
||||||
species = "hs",
|
species = "hs",
|
||||||
@ -16,40 +9,35 @@ generate_repertoires <- function(number_of_sequences) {
|
|||||||
chain = "b",
|
chain = "b",
|
||||||
verbose = TRUE
|
verbose = TRUE
|
||||||
)
|
)
|
||||||
return(list("a_chain" = a_chain, "b_chain" = b_chain))
|
return(b_chain)
|
||||||
}
|
}
|
||||||
|
|
||||||
process_chain <- function(repertoire) {
|
# TODO save also v_call and j_call
|
||||||
|
preprocess_data <- function(repertoire, sequencing_runs) {
|
||||||
sequences <- as.character(repertoire$sequence)
|
sequences <- as.character(repertoire$sequence)
|
||||||
counts <- as.integer(repertoire$counts)
|
reads <- Biostrings::DNAStringSet(rep(sequences, sequencing_runs))
|
||||||
reads <- Biostrings::DNAStringSet(rep(sequences, counts))
|
|
||||||
names(reads) <- seq_len(length(reads))
|
names(reads) <- seq_len(length(reads))
|
||||||
reverse_complement <- Biostrings::reverseComplement(reads)
|
reverse_complement <- Biostrings::reverseComplement(reads)
|
||||||
return(reverse_complement)
|
return(reverse_complement)
|
||||||
}
|
}
|
||||||
|
|
||||||
preprocess_data <- function(repertoires) {
|
save_data <- function(repertoire) {
|
||||||
filtered_repertoires <- lapply(repertoires, process_chain)
|
file_name <- "data/sequence.fastq"
|
||||||
names(filtered_repertoires) <- names(repertoires)
|
# TODO Change format to fasta
|
||||||
return(filtered_repertoires)
|
Biostrings::writeXStringSet(repertoire, file_name, format = "fastq")
|
||||||
}
|
|
||||||
|
|
||||||
save_data <- function(repertoires) {
|
|
||||||
for (chain in names(repertoires)) {
|
|
||||||
file_name <- paste("data/", chain, ".fastq", sep = "")
|
|
||||||
Biostrings::writeXStringSet(repertoires[[chain]], file_name, format = "fastq")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
parse_cli_arguments <- function(args) {
|
parse_cli_arguments <- function(args) {
|
||||||
if (length(args) != 1) {
|
if (length(args) != 2) {
|
||||||
stop("usage: repertoire.r <number of sequences>")
|
stop("usage: repertoire.r <number of sequences> <sequencing_runs>")
|
||||||
}
|
}
|
||||||
return(as.integer(args[1]))
|
return(c(args[1], args[2]))
|
||||||
}
|
}
|
||||||
|
|
||||||
args <- commandArgs(trailingOnly = TRUE)
|
args <- commandArgs(trailingOnly = TRUE)
|
||||||
number_of_sequences <- parse_cli_arguments(args)
|
parameters <- parse_cli_arguments(args)
|
||||||
sim_repertoire <- generate_repertoires(number_of_sequences)
|
number_of_sequences <- as.integer(parameters[1])
|
||||||
processed_data <- preprocess_data(sim_repertoire)
|
sequencing_runs <- as.integer(parameters[2])
|
||||||
|
repertoire <- generate_repertoires(number_of_sequences)
|
||||||
|
processed_data <- preprocess_data(repertoire, sequencing_runs)
|
||||||
save_data(processed_data)
|
save_data(processed_data)
|
Loading…
Reference in New Issue
Block a user