library(Biostrings) library(parallel) construct_dataframe <- function(data) { vdj_string_set <- lapply(data, FUN = Biostrings::DNAStringSet) vdj_dataframe <- as.data.frame(vdj_string_set) vdj_dataframe$hvr_region <- paste(vdj_dataframe$v_sequence, vdj_dataframe$d_sequence, vdj_dataframe$j_sequence, sep = "" ) return(vdj_dataframe) } parse_data <- function(files) { reversed_sequences <- Biostrings::readQualityScaledDNAStringSet(files[1]) sequences <- Biostrings::reverseComplement(reversed_sequences) vdj_alignment <- read.csv(files[2]) vdj_dataframe <- construct_dataframe(vdj_alignment) return(list(sequences, vdj_dataframe)) } align_sequence <- function(sequence, vdj_segment) { return(Biostrings::pairwiseAlignment( pattern = sequence, subject = vdj_segment, type = "global-local", gapOpening = 1 )) } perform_alignment <- function(sequences, vdj_segments) { sequence_alignment <- mcmapply(sequences, vdj_segments$hvr_region, FUN = align_sequence, mc.cores = 4 ) return(sequence_alignment) } input_files <- c("data/curesim_sequence.fastq", "data/vdj_alignment.csv") data <- parse_data(input_files) alignment <- perform_alignment(sequences = data[[1]], vdj_segments = data[[2]]) print(alignment)