#' Perform Hierarchical Clustering and tree pruning on a distance matrix
#'
#' Performs Hierarchical Clustering on a distance matrix
#' (i.e. calculated with \code{\link[fastreeR]{vcf2dist}}
#' or \code{\link[fastreeR]{fasta2dist}})
#' and generates a phylogenetic tree with
#' agglomerative Neighbor Joining method (complete linkage)
#' (as in \code{\link[fastreeR]{dist2tree}}).
#' The phylogenetic tree is then pruned with
#' \code{\link[dynamicTreeCut]{cutreeDynamic}} to get clusters
#' (as in \code{\link[fastreeR]{tree2clusters}}).
#'
#' @param inputDist Input distances file location
#' (generated with \code{\link[fastreeR]{vcf2dist}}
#' or \code{\link[fastreeR]{fasta2dist}}).
#' File can be gzip compressed.
#' Or a \code{\link[stats]{dist}} distances object.
#' @param cutHeight Define at which height to cut tree.
#' Default automatically defined.
#' @param minClusterSize Minimum size of clusters. Default 1.
#' @param extra Boolean whether to use extra parameters
#' for the \code{\link[dynamicTreeCut]{cutreeDynamic}}.
#' @param verbose Logical. If TRUE, enables verbose output from the Java backend.
#'
#' @return A list of :
#' \itemize{
#'     \item \code{\link[base]{character} vector} of the generated
#'     phylogenetic tree in Newick format
#'     \item \code{\link[base]{character} vector} of the clusters.
#'     Each row contains data for a cluster, separated by space.
#'     The id of the cluster,
#'     the size of the cluster (number of elements)
#'     and the names of its elements,
#'     Cluster id 0 contains all the objects not assigned
#'     to a cluster (singletons).
#'     Example clusters output :
#'     \tabular{lllll}{
#'         0 \tab 3 \tab Sample1 \tab Sample2 \tab Sample3 \cr
#'         1 \tab 3 \tab Sample4 \tab Sample5 \tab Sample6 \cr
#'         2 \tab 2 \tab Sample7 \tab Sample8 \tab \cr
#'         3 \tab 2 \tab Sample9 \tab Sample0\tab \cr
#'     }
#' }
#' @export
#'
#' @examples
#' my.clust <- dist2clusters(
#'     inputDist =
#'         system.file("extdata", "samples.vcf.dist.gz", package = "fastreeR"),
#'     verbose = TRUE
#' )
#' @author Anestis Gkanogiannis, \email{anestis@@gkanogiannis.com}
#' @references Java implementation:
#' \url{https://github.com/gkanogiannis/BioInfoJava-Utils}

dist2clusters <- function(inputDist, cutHeight = NULL,
                                            minClusterSize = 1, extra = TRUE,
                                            verbose = FALSE) {
    dist2clusters_checkParams(inputDist = inputDist, cutHeight = cutHeight,
                                minClusterSize = minClusterSize, extra = extra,
                                verbose = verbose)

    inputfile <- inputDist

    if(methods::is(inputDist, "character") && R.utils::isGzipped(inputDist)) {
        temp.in <- tempfile(fileext = ".dist")
        on.exit(unlink(temp.in))
        R.utils::gunzip(filename = inputDist, destname = temp.in,remove = FALSE)
        inputfile <- temp.in
    } else if (methods::is(inputDist, "dist")) {
        temp.in <- tempfile(fileext = ".dist")
        on.exit(unlink(temp.in))
        write(paste0(attr(inputDist, "Size"), " 0"), file = temp.in)
        utils::write.table(as.matrix(inputDist), file = temp.in, sep = " ",
            quote = FALSE, row.names = TRUE, col.names = FALSE, append = TRUE)
        inputfile <- temp.in
    }

    hierarchicalcluster <- rJava::.jnew(
        class="com/gkano/bioinfo/tree/HierarchicalCluster",
        verbose,
        class.loader = .rJava.class.loader
    )
    generaltools <- rJava::J(class="com/gkano/bioinfo/var/GeneralTools",
        class.loader = .rJava.class.loader)$getInstance()

    # data[[1]] distances, data[[2]] labels
    data <- generaltools$readDistancesSamples(inputfile)
    treeStr <- hierarchicalcluster$hclusteringTree(data[[2]], data[[1]])

    labelsReordered <- generaltools$reorderLabels(data[[2]], treeStr)
    distancesReordered <- rJava::.jevalArray(
        generaltools$reorderDistances(data[[1]], data[[2]], labelsReordered),
        simplify = TRUE
    )
    return(list(treeStr, fastreeR::tree2clusters(
                        treeStr = treeStr, treeDistances = distancesReordered,
                        treeLabels = labelsReordered, cutHeight = cutHeight,
                        minClusterSize = minClusterSize, extra = extra)))
}

dist2clusters_checkParams <- function(inputDist, cutHeight,
                                      minClusterSize,extra,
                                      verbose){
    if (is.null(inputDist) || (!methods::is(inputDist, "dist") &&
            !methods::is(inputDist, "character")) ||
        (methods::is(inputDist, "character") &&
            (!file.exists(inputDist) || nchar(inputDist)==0))) {
        stop("inputDist parameter must be a valid file location ",
                                                            "or a dist object.")
    }

    if (!is.null(cutHeight) &&
        (!is.numeric(cutHeight) || (is.numeric(cutHeight) && cutHeight<0))) {
        stop("cutHeight parameter must be positive numeric.")
    }

    if ((!is.numeric(minClusterSize) ||
        (is.numeric(minClusterSize) && minClusterSize<1))) {
        stop("threads and minClusterSize parameters must be positive integer.")
    }

    if (!is.logical(extra)){
        stop("extra parameter must be logical.")
    }

    if (!is.logical(verbose)){
        stop("verbose",
             "must be logical.")
    }
}