################################################################################# defineCutoff
defineCutoff <- function(df, percent){

    # defines the quantile based on given percent
    #
    # df: a dataframe returned by GOstats
    # percent: a number in (0,1) for percentile
    #
    # q: corresponding quantile

    pvalue <- df$Pvalue
    q <- quantile(pvalue, percent)
    return(q)
}

################################################################################# cvCutoff
cvCutoff <- function(df, percentile = 0.10, stp = 0.01){

    # find the cutoff, must hold one condition =>
    #                 number clusters for remaning genes must be greater equal to 2
    #
    # percentile: a number in (0,1)
    #          percentile of interest
    # stp: step for decreasing percentile a number of (0,1)
    #
    # cutoff: a number in (0,1) indicating the cutoff value for gene significancy

    flag <- TRUE
    while(flag){

        cutoff <- defineCutoff(df = df, percent = percentile)
        tdf <- df[df$Pvalue < cutoff, ]
        numClus <- length(unique(tdf$clusterNum))

        if(numClus <= 1){
            flag <- TRUE
            percentile <- percentile + stp
        }else{
            flag <- FALSE
        }

    }


    return(cutoff)
}

################################################################################# semiLabeling
semiLabeling <-
    function(geneID, df_GO, GOgenes, cutoff = NULL,
    percent = 0.10, stp = 0.01){


    if(is.null(geneID)){ stop("geneID is NULLL")}

    if(!is.data.frame(df_GO)){ stop("df_GO must be a dataframe")}

    if(!is.list(GOgenes)){ stop("GOgenes must be a list")}

    if(percent >= 1 || percent <= 0){
        warning("percent must be in (0,1) \n making percent to default",
                call. = FALSE)
        percent <- 0.10 }
    if(stp >= 1 || stp <= 0){
        warning("stp must be in (0,1) \n making percent to default",
                call. = FALSE)
        stp <- 0.01 }


    geneLabel <- data.frame(geneID = geneID, label = NA)
    totGenes <- nrow(geneLabel)

    if(is.null(cutoff)){
        cutoff <- cvCutoff(df_GO, percentile = percent, stp = stp) }


    message("cutoff value is ", cutoff)
    df_GO <- df_GO[df_GO$Pvalue < cutoff, ]

    clusterNums <- unique(df_GO$clusterNum)

    ## perform the semilabeling
    for(lab in clusterNums){

        sigGenes <- -1
        geneInClus <- c()
        GOIDs <- df_GO$GOID[df_GO$clusterNum == lab]

        caption <- paste0("Cluster", lab, "_GOTermGenes")
        GOTerms <- GOgenes[[caption]]
        for(go in GOIDs){
            sigGenes <- c(sigGenes, GOTerms[[go]])
            sigGenes <- unique(sigGenes)
        }

        geneLabel$label[geneLabel$geneID %in% sigGenes] <- lab

    }

    geneID <- data.frame(geneID = geneID)
    geneLabel <- inner_join(geneLabel, geneID, by = "geneID")

    newList <- list("cutoff" = cutoff, "geneLabel" = geneLabel)

    message("semiLabeling done!..\n")
    return(newList)

}