#' Spotting
#'
#' This function spots missing values within the given data.frame.
#'
#' @param main_data This is the input data.frame read in by the HarmonizR.
#' @param batch_list An overview of the batch groupings in list form (comes
#' from the user).
#' @param block_list An overview of the batch groupings in list form (comes
#' from the blocking function). If blocking is FALSE, this list will be the
#' same as 'batch_list'.
#' @param needed_values The number of values needed to be present in a batch
#' in order to be valid.
#' @param verbosity Toggles the amount of stuff printed out by the HarmonizR 
#' algorithm during execution.
#' @return A list of vectors to pass to the upcoming splitting() function.


# ----- spotting_missing_values(main_data, batch_list, needed_values) ---------
# This function builds the "affiliation_list"
# "affiliation list" is a list of vectors and is the return of this function
spotting_missing_values <- function(main_data, 
                                    batch_list, 
                                    block_list, 
                                    needed_values,
                                    verbosity) {
    
    # The initialization
    missing_value_found <- FALSE
    value_existence_counter <- 0
    temp_vec <- vector()
    affiliation_list <- list()
    
    original_batches_existence_counter <- 0
    # "sum" exists only for control purposes 
    sum <- 0
    sum_counter <- 0
    
    # Iterate through the entire dataset "main_data"
    for (i in seq_len(nrow(main_data))) {
        for (j in seq_len(ncol(main_data))) {
            # If a value is found, increase "value_existence_counter"
            if (is.na(main_data[i, j]) == FALSE) {
                value_existence_counter <- value_existence_counter + 1
                
                original_batches_existence_counter <-
                    original_batches_existence_counter + 1
                sum_counter <- sum_counter + 1
                
            }
            
            # Checks if we are in a new batch based on the original description
            if (is.na(batch_list[j + 1]) || 
                batch_list[j + 1] != batch_list[j]) {
                if (original_batches_existence_counter < needed_values) {
                    missing_value_found <- TRUE
                    
                } 
                original_batches_existence_counter <- 0
            }
            
            # The following if-statement means:
            # The next iteration will look into a new batch
            if (is.na(block_list[j + 1]) || 
                block_list[j + 1] != block_list[j]) {
                # Needed_values is either 2 or 1 based on the chosen ComBat_mode
                if (value_existence_counter < needed_values) {
                    missing_value_found <- TRUE
                }
                # We found a batch with enough valid values
                if (missing_value_found != TRUE) {
                    # Since we can use the protein data for the batch, we add it
                    # to the corresponding vector, which later ends up in the
                    # "affiliation_list"
                    temp_vec[block_list[j]] <- block_list[j]
                    # The else means that we DIDN'T find enough values and the
                    # batch is discarded. Hence, we save the amount of lost
                    # values in "sum"
                } else {
                    sum <- sum + sum_counter
                }
                # Reset "sum_counter" to 0,
                # "missing_value_found" to FALSE
                # and "value_existence_counter" to 0
                sum_counter <- 0
                missing_value_found <- FALSE
                value_existence_counter <- 0
            }
        }
        # Append the vector to "affiliation_list"
        temp_vec <- temp_vec[!is.na(temp_vec)]
        affiliation_list[[i]] <- temp_vec
        # Reset "temp_vec"
        temp_vec <- vector()
    }
    
    if (verbosity >= 2) {
        message(
            "Amount of numerical values lost due to insufficient presence ",
            "in a batch:"
            )
        message(sum)
    }
    
    # Returns a list of vectors ("affiliation_list").
    # Each element/vector in the list is correspondent to a row in "main_data"
    return(affiliation_list)
}