#' Parse the HGVS-like syntax of a pathogenic mutation
#'
#' @param pathog.mut A character variable specifying the pathogenic mutation following the HGVS-like syntax for sub, del, ins, delins, or dup.
#' 
#' @return A list of parsed HGVS syntax including mutation type, reference sequence id, mutation start position, mutation end position, mutation reference allele, mutation alternative allele
#' 
#' @examples
#' \dontrun{
#'     parseHGVS()
#' }
#'
#' @noRd
parseHGVS <- function( pathog.mut ){
    
    split.mut <- unlist( strsplit(pathog.mut,":") )
    if(length(split.mut)!=2){
        stop("The format of pathogneic mutation does not follow the general HGVS syntax as \"sequence_identifier : (g.) position(s) change\".")
    }
    
    seq.id <- split.mut[1]
    mut <- sub("^(.*\\.)?", "", split.mut[2])
    
    mut.type <- NA
    mut.start <- NA
    mut.end <- NA
    mut.ref <- NA
    mut.alt <- NA
    
    if( grepl(">",mut) ){
        
        if( grepl("^[0-9]+[ATCG]>[ATCG]$", mut) ){
            mut.type <- "snv"
            mut.start <- as.integer( sub("^([0-9]+)[ATCG]>[ATCG]$", "\\1", mut) )
            mut.end <- mut.start
            mut.ref <- sub("^[0-9]+([ATCG])>[ATCG]$", "\\1", mut)
            mut.alt <- sub("^[0-9]+[ATCG]>([ATCG])$", "\\1", mut)
        }else{
            stop("The format of pathogenic mutation does not follow the HGVS syntax for substitution.")
        }
        
    }else if( grepl("del",mut) & !grepl("ins",mut) ){
        
        if( grepl("^[0-9]+(_[0-9]+)?del([ATCG]*|[0-9]*)$", mut) ){
            mut.type <- "del"
            mut.region <- sub("^([0-9]+(_[0-9]+)?)del([ATCG]*|[0-9]*)$", "\\1", mut) 
            mut.pos <- as.integer( unlist(strsplit(mut.region, "_")) )
            if( length(mut.pos)==1 ){
                mut.pos[2] <- mut.pos[1]
            }
            mut.start <- mut.pos[1]
            mut.end <- mut.pos[2]
            mut.alt <- "-"
            mut.suffix <- sub("^([0-9]+(_[0-9]+)?)del([ATCG]*|[0-9]*)$", "\\3", mut)
            if( grepl("[ATCG]+", mut.suffix) ){
                if( mut.end-mut.start+1 == nchar(mut.suffix) ){
                    mut.ref <- mut.suffix
                }else{
                    stop("The length of deletion sequence in pathogenic mutation does not match the coordinates.")
                }
            }else if( grepl("[0-9]+", mut.suffix) ){
                if( mut.end-mut.start+1 != as.integer(mut.suffix) ){
                    stop("The length of deletion in pothogenic mutation does not match the coordinates.")
                }
            }
        }else{
            stop("The format of pathogenic mutation does not follow the HGVS syntax for deletion.")
        }
        
    }else if( !grepl("del",mut) & grepl("ins",mut) ){
        
        if( grepl("^[0-9]+_[0-9]+ins[ATCG]+$", mut) ){
            mut.type <- "ins"
            mut.region <- sub("^([0-9]+_[0-9]+)ins[ATCG]+$", "\\1", mut) 
            mut.pos <- as.integer( unlist(strsplit(mut.region, "_")) )
            mut.start <- mut.pos[1]
            mut.end <- mut.pos[2]
            mut.ref <- "-"
            mut.alt <- sub("^[0-9]+_[0-9]+ins([ATCG]+)$", "\\1", mut)
            if(mut.end-mut.start!=1){
                stop("The coordinates of insertion in pathogenic mutation are not consecutive.")
            }
        }else{
            stop("The format of pathogenic mutation does not follow the HGVS syntax for insertion.")
        }
        
    }else if( grepl("delins",mut) ){
        
        if( grepl("^[0-9]+(_[0-9]+)?delins[ATCG]+$", mut) ){
            mut.type <- "delins"
            mut.region <- sub("^([0-9]+(_[0-9]+)?)delins[ATCG]+$", "\\1", mut)
            mut.pos <- as.integer( unlist(strsplit(mut.region, "_")) ) 
            if( length(mut.pos)==1 ){
                mut.pos[2] <- mut.pos[1]
            }
            mut.start <- mut.pos[1]
            mut.end <- mut.pos[2] + 1
            mut.alt <- sub("^[0-9]+(_[0-9]+)?delins([ATCG]+)$", "\\2", mut)
        }else{
            stop("The format of pathogenic mutation does not follow the HGVS syntax for deletion-insertion.")
        }
        
    }else if( grepl("dup",mut) ){
        
        if( grepl("^[0-9]+(_[0-9]+)?dup([ATCG]*|[0-9]*)$", mut) ){
            mut.type <- "dup"
            mut.region <- sub("^([0-9]+(_[0-9]+)?)dup([ATCG]*|[0-9]*)$", "\\1", mut)
            mut.pos <- as.integer( unlist(strsplit(mut.region, "_")) )
            if( length(mut.pos)==1 ){
                mut.pos[2] <- mut.pos[1]
            }
            mut.start <- mut.pos[1]
            mut.end <- mut.pos[2]
            mut.ref <- "-"
            mut.suffix <- sub("^([0-9]+(_[0-9]+)?)dup([ATCG]*|[0-9]*)$", "\\3", mut)
            if( grepl("[ATCG]+", mut.suffix) ){
                if( mut.end-mut.start+1 != nchar(mut.suffix) ){
                    stop("The length of duplication sequence in pathogenic mutation does not match the coordinates.")
                }
                mut.alt <- mut.suffix
            }else if( grepl("[0-9]+", mut.suffix) ){
                if( mut.end-mut.start+1 != as.integer(mut.suffix) ){
                    stop("The length of duplication in pathogenic mutation does not match the coordinates.")
                }
            }
        }else{
            stop("The format of pathogenic mutation does not follow the HGVS syntax for duplication.")
        }
        
    }else{
        
        stop("The format of pathogenic mutation does not follow the HGVS syntax for substitution, deletion, insertion, deletion-insertion (delins), or duplication.")
        
    }
    
    parsed.mut <- list(
        type = mut.type,
        seq.id = seq.id, 
        start = mut.start,
        end = mut.end,
        ref = mut.ref,
        alt = mut.alt
    )
    
    return(parsed.mut)
}

