locigenesis/src/alignment.r

44 lines
1.2 KiB
R

library(Biostrings)
library(parallel)
construct_dataframe <- function(data) {
vdj_string_set <- lapply(data, FUN = Biostrings::DNAStringSet)
vdj_dataframe <- as.data.frame(vdj_string_set)
vdj_dataframe$hvr_region <- paste(vdj_dataframe$v_sequence,
vdj_dataframe$d_sequence, vdj_dataframe$j_sequence,
sep = ""
)
return(vdj_dataframe)
}
parse_data <- function(files) {
reversed_sequences <- Biostrings::readQualityScaledDNAStringSet(files[1])
sequences <- Biostrings::reverseComplement(reversed_sequences)
vdj_alignment <- read.csv(files[2])
vdj_dataframe <- construct_dataframe(vdj_alignment)
return(list(sequences, vdj_dataframe))
}
align_sequence <- function(sequence, vdj_segment) {
return(Biostrings::pairwiseAlignment(
pattern = sequence,
subject = vdj_segment,
type = "global-local",
gapOpening = 1
))
}
perform_alignment <- function(sequences, vdj_segments) {
sequence_alignment <- mcmapply(sequences,
vdj_segments$hvr_region,
FUN = align_sequence,
mc.cores = 4
)
return(sequence_alignment)
}
input_files <- c("data/curesim_sequence.fastq", "data/vdj_alignment.csv")
data <- parse_data(input_files)
alignment <- perform_alignment(sequences = data[[1]], vdj_segments = data[[2]])
print(alignment)