44 lines
1.2 KiB
R
44 lines
1.2 KiB
R
library(Biostrings)
|
|
library(parallel)
|
|
|
|
construct_dataframe <- function(data) {
|
|
vdj_string_set <- lapply(data, FUN = Biostrings::DNAStringSet)
|
|
vdj_dataframe <- as.data.frame(vdj_string_set)
|
|
vdj_dataframe$hvr_region <- paste(vdj_dataframe$v_sequence,
|
|
vdj_dataframe$d_sequence, vdj_dataframe$j_sequence,
|
|
sep = ""
|
|
)
|
|
return(vdj_dataframe)
|
|
}
|
|
|
|
parse_data <- function(files) {
|
|
reversed_sequences <- Biostrings::readQualityScaledDNAStringSet(files[1])
|
|
sequences <- Biostrings::reverseComplement(reversed_sequences)
|
|
vdj_alignment <- read.csv(files[2])
|
|
vdj_dataframe <- construct_dataframe(vdj_alignment)
|
|
return(list(sequences, vdj_dataframe))
|
|
}
|
|
|
|
align_sequence <- function(sequence, vdj_segment) {
|
|
return(Biostrings::pairwiseAlignment(
|
|
pattern = sequence,
|
|
subject = vdj_segment,
|
|
type = "global-local",
|
|
gapOpening = 1
|
|
))
|
|
}
|
|
|
|
|
|
perform_alignment <- function(sequences, vdj_segments) {
|
|
sequence_alignment <- mcmapply(sequences,
|
|
vdj_segments$hvr_region,
|
|
FUN = align_sequence,
|
|
mc.cores = 4
|
|
)
|
|
return(sequence_alignment)
|
|
}
|
|
|
|
input_files <- c("data/curesim_sequence.fastq", "data/vdj_alignment.csv")
|
|
data <- parse_data(input_files)
|
|
alignment <- perform_alignment(sequences = data[[1]], vdj_segments = data[[2]])
|
|
print(alignment) |