# TrAnSys
# Elaborado por Itamar José G. Nunes et al.

######################################################################################
# Algoritmo auxiliar derivado do original do limma, modificado para passar sinais de
# progresso

library('Biobase')
library('limma')
library('GEOquery')


read.maimagesTrAnSys <- function (files = NULL, source = "generic", path = NULL, ext = NULL, 
                                  names = NULL, columns = NULL, other.columns = NULL, annotation = NULL, 
                                  green.only = FALSE, wt.fun = NULL, verbose = TRUE, sep = "\t", 
                                  quote = NULL, ...) 
{
  source <- match.arg(source, c("generic", "agilent", "agilent.mean", 
                                "agilent.median", "arrayvision", "arrayvision.ARM", "arrayvision.MTM", 
                                "bluefuse", "genepix", "genepix.mean", "genepix.median", 
                                "genepix.custom", "imagene", "imagene9", "quantarray", 
                                "scanarrayexpress", "smd.old", "smd", "spot", "spot.close.open"))
  source2 <- strsplit(source, split = ".", fixed = TRUE)[[1]][1]
  if (source2 == "imagene") 
    return(read.imagene(files = files, path = path, ext = ext, 
                        names = names, columns = columns, other.columns = other.columns, 
                        wt.fun = wt.fun, verbose = verbose, sep = sep, quote = quote, 
                        ...))
  if (is.null(files)) {
    if (is.null(ext)) 
      stop("Must specify input files")
    else {
      extregex <- paste("\\.", ext, "$", sep = "")
      files <- dir(path = ifelse(is.null(path), ".", path), 
                   pattern = extregex)
      files <- sub(extregex, "", files)
    }
  }
  else if (is.data.frame(files)) {
    targets <- files
    files <- files$FileName
    if (is.null(files)) 
      stop("targets frame doesn't contain FileName column")
    if (is.null(names)) 
      names <- targets$Label
  }
  else {
    targets <- NULL
  }
  verbose <- verbose && exists("giveStatus")
  slides <- as.vector(as.character(files))
  if (!is.null(ext)) 
    slides <- paste(slides, ext, sep = ".")
  nslides <- length(slides)
  if (is.null(names)) 
    names <- removeExt(files)
  if (is.null(quote)) 
    if (source2 == "agilent") 
      quote <- ""
  else quote <- "\""
  if (is.null(columns)) {
    if (source2 == "generic") 
      stop("must specify columns for generic input")
    columns <- switch(source, agilent.mean = list(G = "gMeanSignal", 
                                                  Gb = "gBGMedianSignal", R = "rMeanSignal", Rb = "rBGMedianSignal"), 
                      agilent = , agilent.median = list(G = "gMedianSignal", 
                                                        Gb = "gBGMedianSignal", R = "rMedianSignal", 
                                                        Rb = "rBGMedianSignal"), arrayvision = , arrayvision.ARM = list(G = "ARM Dens - Levels", 
                                                                                                                        Gb = "Bkgd", R = "ARM Dens - Levels", Rb = "Bkgd"), 
                      arrayvision.MTM = list(G = "MTM Dens - Levels", Gb = "Bkgd", 
                                             R = "MTM Dens - Levels", Rb = "Bkgd"), bluefuse = list(G = "AMPCH1", 
                                                                                                    R = "AMPCH2"), genepix = , genepix.mean = list(R = "F635 Mean", 
                                                                                                                                                   G = "F532 Mean", Rb = "B635 Median", Gb = "B532 Median"), 
                      genepix.median = list(R = "F635 Median", G = "F532 Median", 
                                            Rb = "B635 Median", Gb = "B532 Median"), genepix.custom = list(R = "F635 Mean", 
                                                                                                           G = "F532 Mean", Rb = "B635", Gb = "B532"), quantarray = list(R = "ch2 Intensity", 
                                                                                                                                                                         G = "ch1 Intensity", Rb = "ch2 Background", Gb = "ch1 Background"), 
                      imagene9 = list(R = "Signal Mean 2", G = "Signal Mean 1", 
                                      Rb = "Background Median 2", Gb = "Background Median 1"), 
                      scanarrayexpress = list(G = "Ch1 Mean", Gb = "Ch1 B Median", 
                                              R = "Ch2 Mean", Rb = "Ch2 B Median"), smd.old = list(G = "CH1I_MEAN", 
                                                                                                   Gb = "CH1B_MEDIAN", R = "CH2I_MEAN", Rb = "CH2B_MEDIAN"), 
                      smd = list(G = "Ch1 Intensity (Mean)", Gb = "Ch1 Background (Median)", 
                                 R = "Ch2 Intensity (Mean)", Rb = "Ch2 Background (Median)"), 
                      spot = list(R = "Rmean", G = "Gmean", Rb = "morphR", 
                                  Gb = "morphG"), spot.close.open = list(R = "Rmean", 
                                                                         G = "Gmean", Rb = "morphR.close.open", Gb = "morphG.close.open"), 
                      NULL)
    if (green.only) {
      columns$R <- columns$Rb <- NULL
      nRG <- 1
      E <- FALSE
    }
    else {
      nRG <- 2
      E <- FALSE
    }
    cnames <- names(columns)
  }
  else {
    columns <- as.list(columns)
    cnames <- names(columns)
    if (is.null(cnames)) {
      if (length(columns) == 1) {
        names(columns) <- "E"
        E <- TRUE
        nRG <- 0
      }
      else {
        stop("columns needs to be a named list")
      }
    }
    else {
      names(columns)[cnames == "Gf"] <- "G"
      names(columns)[cnames == "Rf"] <- "R"
      cnames <- names(columns)
      nRG <- sum(c("R", "G") %in% cnames)
      E <- ("E" %in% cnames)
      if (E && nRG > 0) 
        stop("columns can be R,G for two color data, or E for single channel, but not both")
      if (!E && nRG == 0) 
        stop("columns must specify foreground G or R or E")
      if (!all(cnames %in% c("G", "R", "Gb", "Rb", "E", 
                             "Eb"))) 
        warning("non-standard columns specified")
    }
  }
  if (is.null(annotation)) 
    annotation <- switch(source2, agilent = c("Row", "Col", 
                                              "Start", "Sequence", "SwissProt", "GenBank", "Primate", 
                                              "GenPept", "ProbeUID", "ControlType", "ProbeName", 
                                              "GeneName", "SystematicName", "Description"), arrayvision = c("Spot labels", 
                                                                                                            "ID"), bluefuse = c("ROW", "COL", "SUBGRIDROW", "SUBGRIDCOL", 
                                                                                                                                "BLOCK", "NAME", "ID"), genepix = c("Block", "Row", 
                                                                                                                                                                    "Column", "ID", "Name"), imagene9 = c("Meta Row", 
                                                                                                                                                                                                          "Meta Column", "Row", "Column", "Gene ID"), quantarray = c("Array Row", 
                                                                                                                                                                                                                                                                     "Array Column", "Row", "Column", "Name"), scanarrayexpress = c("Array Row", 
                                                                                                                                                                                                                                                                                                                                    "Array Column", "Spot Row", "Spot Column"), smd = c("Spot", 
                                                                                                                                                                                                                                                                                                                                                                                        "Clone ID", "Gene Symbol", "Gene Name", "Cluster ID", 
                                                                                                                                                                                                                                                                                                                                                                                        "Accession", "Preferred name", "Locuslink ID", "Name", 
                                                                                                                                                                                                                                                                                                                                                                                        "Sequence Type", "X Grid Coordinate (within sector)", 
                                                                                                                                                                                                                                                                                                                                                                                        "Y Grid Coordinate (within sector)", "Sector", "Failed", 
                                                                                                                                                                                                                                                                                                                                                                                        "Plate Number", "Plate Row", "Plate Column", "Clone Source", 
                                                                                                                                                                                                                                                                                                                                                                                        "Is Verified", "Is Contaminated", "Luid"), NULL)
  if (source == "smd.old") 
    annotation <- c("SPOT", "NAME", "Clone ID", "Gene Symbol", 
                    "Gene Name", "Cluster ID", "Accession", "Preferred name", 
                    "SUID")
  fullname <- slides[1]
  if (!is.null(path)) 
    fullname <- file.path(path, fullname)
  required.col <- unique(c(annotation, unlist(columns), other.columns))
  text.to.search <- if (is.null(wt.fun)) 
    ""
  else deparse(wt.fun)
  switch(source2, quantarray = {
    firstfield <- scan(fullname, what = "", sep = "\t", flush = TRUE, 
                       quiet = TRUE, blank.lines.skip = FALSE, multi.line = FALSE, 
                       allowEscapes = FALSE)
    skip <- grep("Begin Data", firstfield)
    if (length(skip) == 0) stop("Cannot find \"Begin Data\" in image output file")
    nspots <- grep("End Data", firstfield) - skip - 2
    obj <- read.columns(fullname, required.col, text.to.search, 
                        skip = skip, sep = sep, quote = quote, stringsAsFactors = FALSE, 
                        fill = TRUE, nrows = nspots, flush = TRUE, ...)
  }, arrayvision = {
    skip <- 1
    cn <- scan(fullname, what = "", sep = sep, quote = quote, 
               skip = 1, nlines = 1, quiet = TRUE, allowEscapes = FALSE)
    fg <- grep(" Dens - ", cn)
    if (length(fg) != 2) stop(paste("Cannot find foreground columns in", 
                                    fullname))
    bg <- grep("^Bkgd$", cn)
    if (length(bg) != 2) stop(paste("Cannot find background columns in", 
                                    fullname))
    columns <- list(R = fg[1], Rb = bg[1], G = fg[2], Gb = bg[2])
    obj <- read.columns(fullname, required.col, text.to.search, 
                        skip = skip, sep = sep, quote = quote, stringsAsFactors = FALSE, 
                        fill = TRUE, flush = TRUE, ...)
    fg <- grep(" Dens - ", names(obj))
    bg <- grep("^Bkgd$", names(obj))
    columns <- list(R = fg[1], Rb = bg[1], G = fg[2], Gb = bg[2])
    nspots <- nrow(obj)
  }, bluefuse = {
    skip <- readGenericHeader(fullname, columns = c(columns$G, 
                                                    columns$R))$NHeaderRecords
    obj <- read.columns(fullname, required.col, text.to.search, 
                        skip = skip, sep = sep, quote = quote, stringsAsFactors = FALSE, 
                        fill = TRUE, flush = TRUE, ...)
    nspots <- nrow(obj)
  }, genepix = {
    h <- readGPRHeader(fullname)
    if (verbose && source == "genepix.custom") cat("Custom background:", 
                                                   h$Background, "\n")
    skip <- h$NHeaderRecords
    obj <- read.columns(fullname, required.col, text.to.search, 
                        skip = skip, sep = sep, quote = quote, stringsAsFactors = FALSE, 
                        fill = TRUE, flush = TRUE, ...)
    nspots <- nrow(obj)
  }, imagene9 = {
    h <- readImaGeneHeader(fullname)
    skip <- h$NHeaderRecords
    FD <- h$"Field Dimensions"
    if (is.null(FD)) stop("Can't find Field Dimensions in ImaGene header")
    nspots <- sum(apply(FD, 1, prod))
    obj <- read.columns(fullname, required.col, text.to.search, 
                        skip = skip, sep = sep, quote = quote, stringsAsFactors = FALSE, 
                        fill = TRUE, flush = TRUE, nrows = nspots, ...)
  }, smd = {
    skip <- readSMDHeader(fullname)$NHeaderRecords
    obj <- read.columns(fullname, required.col, text.to.search, 
                        skip = skip, sep = sep, quote = quote, stringsAsFactors = FALSE, 
                        fill = TRUE, flush = TRUE, ...)
    nspots <- nrow(obj)
  }, {
    skip <- readGenericHeader(fullname, columns = columns, 
                              sep = sep)$NHeaderRecords
    obj <- read.columns(fullname, required.col, text.to.search, 
                        skip = skip, sep = sep, quote = quote, stringsAsFactors = FALSE, 
                        fill = TRUE, flush = TRUE, ...)
    nspots <- nrow(obj)
  })
  Y <- matrix(NA, nspots, nslides)
  colnames(Y) <- names
  RG <- columns
  for (a in cnames) RG[[a]] <- Y
  if (!is.null(wt.fun)) 
    RG$weights <- Y
  if (is.data.frame(targets)) {
    rownames(targets) <- names
    RG$targets <- targets
  }
  else {
    RG$targets <- data.frame(FileName = files, row.names = names, 
                             stringsAsFactors = FALSE)
  }
  if (!is.null(annotation)) {
    j <- match(annotation, colnames(obj), 0)
    if (any(j > 0)) 
      RG$genes <- data.frame(obj[, j, drop = FALSE], check.names = FALSE)
  }
  RG$source <- source
  if (source2 == "agilent") {
    if (!is.null(RG$genes$Row) && !is.null(RG$genes$Col)) {
      nr <- length(unique(RG$genes$Row))
      nc <- length(unique(RG$genes$Col))
      if (nspots == nr * nc) 
        RG$printer <- list(ngrid.r = 1, ngrid.c = 1, 
                           nspot.r = nr, nspot.c = nc)
    }
  }
  if (source2 == "genepix") {
    if (!is.null(RG$genes$Block) && !is.null(RG$genes$Row) && 
        !is.null(RG$genes$Column)) {
      RG$printer <- getLayout(RG$genes, guessdups = FALSE)
      nblocks <- RG$printer$ngrid.r * RG$printer$ngrid.c
      if (!is.na(nblocks) && (nblocks > 1) && !is.null(obj$X)) {
        blocksize <- RG$printer$nspot.r * RG$printer$nspot.c
        i <- (1:(nblocks - 1)) * blocksize
        ngrid.r <- sum(obj$X[i] > obj$X[i + 1]) + 1
        if (!is.na(ngrid.r) && nblocks%%ngrid.r == 0) {
          RG$printer$ngrid.r <- ngrid.r
          RG$printer$ngrid.c <- nblocks/ngrid.r
        }
        else {
          warning("Can't determine number of grid rows")
          RG$printer$ngrid.r <- RG$printer$ngrid.c <- NA
        }
      }
    }
  }
  if (source2 == "imagene9") {
    printer <- list(ngrid.r = FD[1, "Metarows"], ngrid.c = FD[1, 
                                                              "Metacols"], nspot.r = FD[1, "Rows"], nspot.c = FD[1, 
                                                                                                                 "Cols"])
    if (nrow(FD) == 1) {
      RG$printer <- printer
    }
    else {
      printer$ngrid.r <- sum(FD[, "Metarows"])
      if (all(printer$ngrid.c == FD[, "Metacols"]) && all(printer$nspot.r == 
                                                          FD[, "Rows"]) && all(printer$nspot.c == FD[, 
                                                                                                     "Cols"])) 
        RG$printer <- printer
    }
  }
  if (!is.null(other.columns)) {
    other.columns <- as.character(other.columns)
    j <- match(other.columns, colnames(obj), 0)
    if (any(j > 0)) {
      other.columns <- colnames(obj)[j]
      RG$other <- list()
      for (j in other.columns) RG$other[[j]] <- Y
    }
    else {
      other.columns <- NULL
    }
  }
  for (i in 1:nslides) {
    if (i > 1) {
      fullname <- slides[i]
      if (!is.null(path)) 
        fullname <- file.path(path, fullname)
      if (verbose) {
        giveStatus(percent = 100 * i / nslides,
                   message = paste0("Lendo amostra ", i, " de ", nslides, " (", sub(pattern = ".*[/\\]", replacement = "", x = slides[i]), ")"),
                   engMsg = paste0("Reading sample ", i, " of ", nslides, " (", sub(pattern = ".*[/\\]", replacement = "", x = slides[i]), ")"))
      }
      switch(source2, quantarray = {
        firstfield <- scan(fullname, what = "", sep = "\t", 
                           flush = TRUE, quiet = TRUE, blank.lines.skip = FALSE, 
                           multi.line = FALSE, allowEscapes = FALSE)
        skip <- grep("Begin Data", firstfield)
      }, arrayvision = {
        skip <- 1
      }, genepix = {
        skip <- readGPRHeader(fullname)$NHeaderRecords
      }, smd = {
        skip <- readSMDHeader(fullname)$NHeaderRecords
      }, {
        skip <- readGenericHeader(fullname, columns = columns)$NHeaderRecords
      })
      if (verbose && source == "genepix.custom") 
        cat("Custom background:", h$Background, "\n")
      obj <- read.columns(fullname, required.col, text.to.search, 
                          skip = skip, sep = sep, stringsAsFactors = FALSE, 
                          quote = quote, fill = TRUE, nrows = nspots, flush = TRUE, 
                          ...)
    }
    for (a in cnames) RG[[a]][, i] <- obj[, columns[[a]]]
    if (!is.null(wt.fun)) 
      RG$weights[, i] <- wt.fun(obj)
    if (!is.null(other.columns)) 
      for (j in other.columns) {
        RG$other[[j]][, i] <- obj[, j]
      }
  }
  if (nRG == 1) {
    n <- names(RG)
    n[n == "G"] <- "E"
    n[n == "Gb"] <- "Eb"
    n[n == "R"] <- "E"
    n[n == "Rb"] <- "Eb"
    names(RG) <- n
  }
  if (E || nRG == 1) 
    new("EListRaw", RG)
  else new("RGList", RG)
}


# ==================================================================================



read.idatTrAnSys <- function (idatfiles, bgxfile, dateinfo = FALSE, annotation = "Symbol", 
          tolerance = 0L, verbose = TRUE) 
{
  OK <- requireNamespace("illuminaio", quietly = TRUE)
  if (!OK) 
    stop("illuminaio package required but is not installed (or can't be loaded)")
  idatafiles <- as.character(idatfiles)
  nsamples <- length(idatfiles)
  elist <- new("EListRaw")
  elist$source <- "illumina"
  elist$targets <- data.frame(IDATfile = idatfiles, stringsAsFactors = FALSE)
  verbose <- verbose && exists("giveStatus")
  if (dateinfo) 
    elist$targets$DecodeInfo <- elist$targets$ScanInfo <- rep_len("", 
                                                                  nsamples)
  if (verbose){ # Verbose alterado aqui
    giveStatus(percent = 0,
               message = paste0("Lendo anotação: ", sub(pattern = ".*/", replacement = "", x = bgxfile)),
               engMsg = paste0("Reading annotation: ", sub(pattern = ".*/", replacement = "", x = bgxfile)))
  }
    cat("Reading manifest file", bgxfile, "... ")
  bgx <- illuminaio::readBGX(bgxfile)
  if (verbose) 
    cat("Done\n")
  nregprobes <- nrow(bgx$probes)
  nctrlprobes <- nrow(bgx$control)
  nprobes <- nregprobes + nctrlprobes
  commonCols <- intersect(colnames(bgx$probes), colnames(bgx$controls))
  uncommonCols <- colnames(bgx$probes[, -which(colnames(bgx$probes) %in% commonCols)])
  ctrldf <- bgx$controls
  probedf <- bgx$probes
  if (length(uncommonCols) > 0){
    ctrltmp <- data.frame(matrix(replicate(length(uncommonCols), ""), nrow = nrow(ctrldf), ncol = length(uncommonCols)))
    colnames(ctrltmp) <- uncommonCols
    ctrldf <- cbind(ctrldf, ctrltmp)
    ctrldf <- ctrldf[, names(probedf[, intersect(names(probedf), names(ctrldf))])]
  }
  
  elist$genes <- rbind(probedf, ctrldf)
  elist$genes$Status <- "regular"
  elist$genes$Status[(nregprobes + 1):nprobes] <- bgx$controls[, 
                                                               "Reporter_Group_Name"]
  if (!is.null(annotation) && !is.na(annotation)) {
    annotation <- as.character(annotation)
    annotation <- intersect(names(bgx$probes), annotation)
  }
  if (length(annotation)) {
    ac <- annotation %in% names(bgx$controls)
    for (i in 1:length(annotation)) {
      elist$genes[[annotation[i]]] <- NA_character_
      elist$genes[[annotation[i]]][1:nregprobes] <- bgx$probes[[annotation[i]]]
      if (ac[i]) 
        elist$genes[[annotation[i]]][(nregprobes + 1L):nprobes] <- bgx$controls[[annotation[i]]]
    }
  }
  elist$E <- matrix(0, nprobes, nsamples)
  elist$E[] <- NA
  colnames(elist$E) <- removeExt(idatfiles)
  rownames(elist$E) <- elist$genes[, "Array_Address_Id"]
  elist$other$STDEV <- elist$other$NumBeads <- elist$E
  for (j in 1:nsamples) {
    if (verbose) {
      giveStatus(percent = 100 * i / nsamples,
                 message = paste0("Lendo amostra ", j, " de ", nsamples, " (", sub(pattern = ".*[/\\]", replacement = "", x = idatfiles[j]), ")"))
      cat("\t", idatfiles[j], "... ")
    }
    tmp <- illuminaio::readIDAT(idatfiles[j])
    if (verbose) 
      cat("Done\n")
    ind <- match(elist$genes$Array_Address_Id, tmp$Quants$IllumicodeBinData)
    if (anyNA(ind)) {
      nna <- sum(is.na(ind))
      if (nna > tolerance) # Coloca aqui a mensagem de erro do BGX
        stop("Can't match all ids in manifest with those in idat file ", 
             idatfiles[i], "\n", sum(is.na(ind)), " missing - please check that you have the right files, or consider setting 'tolerance'=", 
             sum(is.na(ind)))
      i <- which(!is.na(ind))
      ind <- ind[i]
      elist$E[i, j] <- tmp$Quants$MeanBinData[ind]
      elist$other$STDEV[i, j] <- tmp$Quants$DevBinData[ind]
      elist$other$NumBeads[i, j] <- tmp$Quants$NumGoodBeadsBinData[ind]
    }
    else {
      elist$E[, j] <- tmp$Quants$MeanBinData[ind]
      elist$other$STDEV[, j] <- tmp$Quants$DevBinData[ind]
      elist$other$NumBeads[, j] <- tmp$Quants$NumGoodBeadsBinData[ind]
    }
    if (dateinfo) {
      elist$targets$DecodeInfo[j] = paste(tmp$RunInfo[1, 
                                                      ], collapse = " ")
      elist$targets$ScanInfo[j] = paste(tmp$RunInfo[2, 
                                                    ], collapse = " ")
    }
  }
  if (verbose) 
    cat("Finished reading data.\n")
  return(elist)
}


# ==================================================================================


treatLimma <- function(limmaList, bgMethod = "auto", normExpMethod = "rma", normMethod = "quantile", cyclicMethod = "fast", optionMA = "A")
{
  if(!(optionMA %in% c("M", "A")))
    stop("'optionMA' must be 'M' or 'A'")
  if (inherits(limmaList, "ExpressionSet"))
  {
    limmaList = exprs(limmaList)
  }
  if (class(limmaList)[1] == "matrix") {
    
    limmaList = list("E" = limmaList, "targets" = list(colnames(limmaList)))
    limmaList = new("EListRaw", limmaList)
  }
  bgLimmaList = limma::backgroundCorrect(RG = limmaList, method = bgMethod, normexp.method = normExpMethod)
  normLimmaList = normalizeBetweenArrays(object = bgLimmaList, method = normMethod, cyclic.method = cyclicMethod)
  probeIDNames = intersect(c("ProbeName", "Probe_Id", "ID"), colnames(normLimmaList$genes))[1]
  finalLimmaList = avereps(normLimmaList, ID=normLimmaList$genes[, probeIDNames])
  finalLimmaList$targets[[1]] = sub(pattern = ".*/", x = finalLimmaList$targets[[1]], replacement = "")
  finalLimmaList$targets[[1]] = sub(pattern = "[.].*", x = finalLimmaList$targets[[1]], replacement = "")
  #finalLimmaList$targets[[1]] = sub(pattern = "(GSM[0-9]+)_.+", x = finalLimmaList$targets[[1]], replacement = "")
  if (class(finalLimmaList)[[1]]  == "EList") {
    optionMA = "E"
    colnames(finalLimmaList[["E"]]) = finalLimmaList$targets[[1]]
    
  } else {
    colnames(finalLimmaList[["M"]]) = finalLimmaList$targets[[1]]
    colnames(finalLimmaList[["A"]]) = finalLimmaList$targets[[1]]
  }
  limmaMatrix = finalLimmaList[[optionMA]]
  limmaMatrix = asIntegral(limmaMatrix)
  eset = new("ExpressionSet", exprs = limmaMatrix)
  if (length(names(finalLimmaList$genes)) > 0)
  {
    platfMatrix = finalLimmaList$genes
    platfMatrix = platfMatrix[!is.na(platfMatrix[, probeIDNames]),]
    rownames(platfMatrix) = platfMatrix[,probeIDNames]
    eset = insertGPL(gplMatrix = platfMatrix, exSet = eset)
  }
  return(eset)
}


