Amplify VDJ sequences to simplify parsing
This commit is contained in:
		
							parent
							
								
									576597cb04
								
							
						
					
					
						commit
						f81e4af94e
					
				@ -19,5 +19,5 @@ fastq=".fastq"
 | 
				
			|||||||
filename="sequence"
 | 
					filename="sequence"
 | 
				
			||||||
prefix="curesim_"
 | 
					prefix="curesim_"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Rscript src/repertoire.r "$sequences" && java -jar tools/CuReSim.jar -n "$sequencing_runs" -m "$read_mean_size" -sd "$read_variance_size" -f "$data_directory$filename$fasta" -o "$data_directory$prefix$filename$fastq"
 | 
					Rscript src/repertoire.r "$sequences" "$sequencing_runs" && java -jar tools/CuReSim.jar -n "$sequencing_runs" -m "$read_mean_size" -sd "$read_variance_size" -f "$data_directory$filename$fasta" -o "$data_directory$prefix$filename$fastq"
 | 
				
			||||||
rm "$data_directory/log.txt"
 | 
					rm "$data_directory/log.txt"
 | 
				
			||||||
 | 
				
			|||||||
@ -10,13 +10,14 @@ generate_repertoire <- function(number_of_sequences) {
 | 
				
			|||||||
  ))
 | 
					  ))
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
save_data <- function(data) {
 | 
					save_data <- function(data, reads) {
 | 
				
			||||||
  Biostrings::writeXStringSet(data$sequence, "data/sequence.fasta")
 | 
					  Biostrings::writeXStringSet(data$sequence, "data/sequence.fasta")
 | 
				
			||||||
  vdj_sequences <- data[-1]
 | 
					  vdj_sequences <- data[-1]
 | 
				
			||||||
  write.csv(vdj_sequences, "data/vdj_alignment.csv", row.names = FALSE)
 | 
					  amplified_vdj <- vdj_sequences[rep(seq_len(nrow(vdj_sequences)), reads), ]
 | 
				
			||||||
 | 
					  write.csv(amplified_vdj, "data/vdj_alignment.csv", row.names = FALSE)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
process_data <- function(repertoire) {
 | 
					process_data <- function(repertoire, reads) {
 | 
				
			||||||
  columns <- c(
 | 
					  columns <- c(
 | 
				
			||||||
    "sequence", "v_sequence_alignment",
 | 
					    "sequence", "v_sequence_alignment",
 | 
				
			||||||
    "d_sequence_alignment", "j_sequence_alignment"
 | 
					    "d_sequence_alignment", "j_sequence_alignment"
 | 
				
			||||||
@ -24,17 +25,17 @@ process_data <- function(repertoire) {
 | 
				
			|||||||
  data <- repertoire[, columns]
 | 
					  data <- repertoire[, columns]
 | 
				
			||||||
  dna_sequence <- Biostrings::DNAStringSet(data$sequence)
 | 
					  dna_sequence <- Biostrings::DNAStringSet(data$sequence)
 | 
				
			||||||
  data$sequence <- Biostrings::reverseComplement(dna_sequence)
 | 
					  data$sequence <- Biostrings::reverseComplement(dna_sequence)
 | 
				
			||||||
  save_data(data)
 | 
					  save_data(data, reads)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
parse_cli_arguments <- function() {
 | 
					parse_cli_arguments <- function() {
 | 
				
			||||||
  args <- commandArgs(trailingOnly = TRUE)
 | 
					  args <- commandArgs(trailingOnly = TRUE)
 | 
				
			||||||
  if (length(args) != 1) {
 | 
					  if (length(args) != 2) {
 | 
				
			||||||
    stop("usage: repertoire.r <number of sequences>")
 | 
					    stop("usage: repertoire.r <number of sequences> <sequencing runs>")
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  return(args[1])
 | 
					  return(c(args[1], args[2]))
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
args <- parse_cli_arguments()
 | 
					args <- parse_cli_arguments()
 | 
				
			||||||
repertoire <- generate_repertoire(number_of_sequences = as.integer(args[1]))
 | 
					repertoire <- generate_repertoire(number_of_sequences = as.integer(args[1]))
 | 
				
			||||||
process_data(repertoire)
 | 
					process_data(repertoire = repertoire, reads = as.integer(args[2]))
 | 
				
			||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user