# TrAnSys
# Elaborado por Itamar José G. Nunes et al.

######################################################################################
# Algoritmo de tratar as amostras do tipo Raw para arranjos derivados do Limma

######################
# Argumentos iniciais e sub-argumentos:
# 1) SeriesRawMatrix : Arranjo em formato RAW
# 2) optionMA : Se for single-channel, pode ser "M" ou "A", do contrário será "E"
# 3) bgMethod : Método de correção de fundo
# 4) normExpMethod : Método quando bgMethod = "normexp"
# 5) normMethod : Método de normalização
# 6) cyclicMethod : Método quando normMethod = "cyclicloess"
######################


# --> Possibilidades iniciais:
# - backgroundCorrect:
# method = "auto", "none", "subtract", "half", "minimum", "movingmin", "edwards" ou "normexp", onde dual channel só pode ser "none" ou "normexp"
# normexp.method = "saddle", "mle", "rma" ou "rma75", só funciona qunado method for igual a "normexp"
# offset = valor a ser adicionado às intensidades (útil para prevenir log de valores em zero)
# - normalizeBetweenArrays:
# method = "none", "scale", "quantile" ou "cyclicloess" para single-channel. Para dual-channel, além destas do single, ainda pode "Aquantile", "Gquantile", "Rquantile" or "Tquantile"
# targets = <vetor ou matriz indicando os grupos-alvo>, quando method="Tquantile"
# cyclic.method = "fast", "affy" ou "pairs", válido quando method = cyclicloess
#  - normalizeVSN: (Implementar como alternativa)
#  
######################

# Variáveis temporárias (excluir quando tudo tiver pronto):
#samplesDir <- "C:/Users/Tojara/Desktop/lol/AgilentExamples/ComTIFF/"
#sampleFiles <- paste0(samplesDir, list.files(path=samplesDir,
#                                 pattern = "*.txt"))
#bgMethod <- "auto"
#normMethod <- "scale"
#optionMA <- "M" # Pode ser M ou A

######################
# Variáveis obtidas:
# 1) SeriesRawMatrix (EListRaw com os dados Raw)
######################


library("Biobase")

# %%% Sinal: 1%
IException <- "Falha durante o tratamento."

# Abaixo disso, quando tudo for terminado, será deslocado para um R script de tratamento
# Possibilidades Method:"auto", "none", "subtract", "half", "minimum", "movingmin", "edwards" ou "normexp"
# NOTA: Todos os argumentos do method exceto "normexp" só funcionam quando o slot Eb (valores de background) existe
# NOTA2: O normexp.method só vale quando method = "normexp"
# TODO: Explorar as possibilidades com o cyclicloess

giveStatus(percent = 1, message = "Aplicando correção de fundo...")
if (class(SeriesRawMatrix) == "matrix") {
  SeriesRawMatrix <- list("E" = SeriesRawMatrix)
  SeriesRawMatrix <- new("EListRaw", SeriesRawMatrix)
}
SeriesMatrixLimmaList <- limma::backgroundCorrect(SeriesRawMatrix, method = bgMethod, normexp.method = normExpMethod)

giveStatus(percent = 20, message = "Aplicando normalização...")
SeriesMatrixLimmaList <- normalizeBetweenArrays(object = SeriesMatrixLimmaList, method = normMethod)
if ("ProbeName" %in% names(SeriesMatrixLimmaList$genes)) {
  probeIDNames <- "ProbeName"
} else {
  probeIDNames <- "Probe_Id"
}
giveStatus(percent = 50, message = "Verificando e corrigindo possíveis replicatas de sonda...")
SeriesMatrixLimmaList<- avereps(SeriesMatrixLimmaList, ID=SeriesMatrixLimmaList$genes[, probeIDNames])

SeriesMatrixLimmaList$targets[[1]] <- sub(pattern = ".*/", x = SeriesMatrixLimmaList$targets[[1]], replacement = "")
SeriesMatrixLimmaList$targets[[1]] <- sub(pattern = "[.].*", x = SeriesMatrixLimmaList$targets[[1]], replacement = "")
if (class(SeriesMatrixLimmaList)[[1]]  == "EList"){
  optionMA <- "E"
  colnames(SeriesMatrixLimmaList[["E"]]) <- SeriesMatrixLimmaList$targets[[1]]
} else {
  colnames(SeriesMatrixLimmaList[["M"]]) <- SeriesMatrixLimmaList$targets[[1]]
  colnames(SeriesMatrixLimmaList[["A"]]) <- SeriesMatrixLimmaList$targets[[1]]
}


# Completar isso abaixo, modificando para SeriesMatrix, é claro. Precisamos que seja um ExpressionSet, não um EList
# Adaptar para os dois tipos de channels

giveStatus(percent = 90, message = "Preparando e finalizando os dados de expressão gênica...")
IException <- "Falha ao montar o Expression Set."

sampleTitles <- colnames(SeriesMatrixLimmaList[[optionMA]])
colnames(SeriesMatrixLimmaList[[optionMA]]) <- sub(pattern = "[_].*", x = sampleTitles, replacement = "")
sampleTitles <- sub(pattern = ".*?[_]", x = sampleTitles, replacement = "")

sampleData <- c("!Sample_title", "!Sample_geo_accession", "!Sample_status", "!Sample_submission_date", "!Sample_last_update_date", "!Sample_series_id", "!Sample_pubmed_id", "!Sample_summary", "!Sample_overall_design", "!Sample_type", "!Sample_channel_count", "!Sample_source_name_ch1", "!Sample_organism_ch1", "!Sample_characteristics_ch1", "!Sample_treatment_protocol_ch1", "!Sample_growth_protocol_ch1", "!Sample_molecule_ch1", "!Sample_extract_protocol_ch1", "!Sample_label_ch1", "!Sample_label_protocol_ch1", "!Sample_taxid_ch1", "!Sample_hyb_protocol", "!Sample_scan_protocol", "!Sample_description", "!Sample_data_processing", "!Sample_contributor", "!Sample_sample_id", "!Sample_contact_name", "!Sample_contact_email", "!Sample_contact_phone", "!Sample_contact_fax", "!Sample_contact_laboratory", "!Sample_contact_department", "!Sample_contact_institute", "!Sample_contact_address", "!Sample_contact_city", "!Sample_contact_state", "!Sample_contact_zip/postal_code", "!Sample_contact_country", "!Sample_supplementary_file", "!Sample_platform_id", "!Sample_platform_taxid", "!Sample_sample_taxid", "!Sample_data_row_count", "!Sample_relation")
sampleData <- data.frame(matrix(1:(length(sampleData)*ncol(SeriesMatrixLimmaList[[optionMA]])), nrow=length(sampleData)), row.names = sampleData)
colnames(sampleData)<-colnames(SeriesMatrixLimmaList[[optionMA]]) # As colunas têm que ter nomes semelhantes
sampleData["!Sample_geo_accession",]<-colnames(SeriesMatrixLimmaList[[optionMA]])
sampleData["!Sample_title",]<-sampleTitles
phenoData <- new("AnnotatedDataFrame", data = as.data.frame(t(sampleData))) # Nomes de colunas/linhas devem coincidir um com o outro

SeriesExpressionSet <- new("ExpressionSet", exprs = SeriesMatrixLimmaList[[optionMA]],
                    phenoData = phenoData,
                    featureData = new("AnnotatedDataFrame",
                                      data = data.frame(row.names = rownames(SeriesMatrixLimmaList[[optionMA]]))))
if ("genes" %in% names(SeriesMatrixLimmaList) &&
    length(SeriesMatrixLimmaList$genes) > 0) {
  fData(SeriesExpressionSet) <- SeriesMatrixLimmaList$genes
  sampleNames(SeriesExpressionSet@featureData) <- rownames(SeriesMatrixLimmaList[[optionMA]])
}

SeriesMatrix <- exprs(SeriesExpressionSet)

#remove(list = c("bgMethod", "normMethod", "optionMA", "phenoData", "platformPath", "sampleData", "sampleFiles", "samplesDir", "singleChannel"))