#### Algoritmo de abrir o GSE pela internet e exportá-lo em um arquivo separado

# *************************** Sinal ****************

######################
# Argumentos iniciais:
# 1) seriesFile
# 2) 
######################

######################
# Objetos obtidos:
# 1) SeriesMatrix
# 2) SeriesExpressionSet
# 3) GPL (string, ID da plataforma)
######################




seriesFile <- "C:/Users/Tojara/Desktop/lol/GSE57339_non-normalized.txt"
assign("tempTable", read.table(file = seriesFile, sep = "\t", dec = ".", header = FALSE, skip = 1) )


#
library("Biobase")
assign("datamat", read.table(file = seriesFile, sep = "\t", dec = ".", header = TRUE, skip = 0) )
# Só para este caso específico
datamat <- datamat[,c(1, seq(2, ncol(datamat), by = 2))]

datamat <- data.frame(datamat[,-1], row.names = datamat[,1])

SeriesExpressionSet <- new("ExpressionSet", exprs = as.matrix(datamat))
SeriesMatrix <- exprs(SeriesExpressionSet)








#### Algoritmo de abrir os GSMs de um GSE pela internet e exportá-los em um arquivo separado
### Também converte um conjunto de arquivos GSM em um único Meta-GSE


# *************************** Sinal ****************

# Pega os argumentos:

txtarrayDir<-inputsfile


#*** End Sinal ***

# Carrega os pacotes nos camarotes:
library("affy")
library("GEOquery")

setwd(tempdir)


# Lê uma tabela txt, levando em conta os nomes das linhas e das colunas:
arraytxt<-read.table(file = txtarrayDir, header = TRUE, row.names = 1)

# pData padrão que correspondem aos sufixos Series_ e Sample_ da matriz do autor em txt.gz:
SeriespData <- c("!Series_title", "!Series_geo_accession", "!Series_status", "!Series_submission_date", "!Series_last_update_date", "!Series_series_id", "!Series_pubmed_id", "!Series_summary", "!Series_overall_design", "!Series_type", "!Series_contributor", "!Series_sample_id", "!Series_contact_name", "!Series_contact_email", "!Series_contact_phone", "!Series_contact_fax", "!Series_contact_laboratory", "!Series_contact_department", "!Series_contact_institute", "!Series_contact_address", "!Series_contact_city", "!Series_contact_state", "!Series_contact_zip/postal_code", "!Series_contact_country", "!Series_supplementary_file", "!Series_platform_id", "!Series_platform_taxid", "!Series_sample_taxid", "!Series_relation")
SamplepData <- c("!Sample_title", "!Sample_geo_accession", "!Sample_status", "!Sample_submission_date", "!Sample_last_update_date", "!Sample_series_id", "!Sample_type", "!Sample_channel_count", "!Sample_source_name_ch1", "!Sample_organism_ch1", "!Sample_characteristics_ch1", "!Sample_treatment_protocol_ch1", "!Sample_growth_protocol_ch1", "!Sample_molecule_ch1", "!Sample_extract_protocol_ch1", "!Sample_label_ch1", "!Sample_label_protocol_ch1", "!Sample_taxid_ch1", "!Sample_hyb_protocol", "!Sample_scan_protocol", "!Sample_description", "!Sample_description.1", "!Sample_data_processing", "!Sample_platform_id", "!Sample_contact_name", "!Sample_contact_email", "!Sample_contact_phone", "!Sample_contact_fax", "!Sample_contact_laboratory", "!Sample_contact_department", "!Sample_contact_institute", "!Sample_contact_address", "!Sample_contact_city", "!Sample_contact_state", "!Sample_contact_zip/postal_code", "!Sample_contact_country", "!Sample_supplementary_file", "!Sample_data_row_count", "!Sample_relation")
AllpData<-c("!Sample_title", "!Sample_geo_accession", "!Sample_status", "!Sample_submission_date", "!Sample_last_update_date", "!Sample_series_id", "!Sample_pubmed_id", "!Sample_summary", "!Sample_overall_design", "!Sample_type", "!Sample_channel_count", "!Sample_source_name_ch1", "!Sample_organism_ch1", "!Sample_characteristics_ch1", "!Sample_treatment_protocol_ch1", "!Sample_growth_protocol_ch1", "!Sample_molecule_ch1", "!Sample_extract_protocol_ch1", "!Sample_label_ch1", "!Sample_label_protocol_ch1", "!Sample_taxid_ch1", "!Sample_hyb_protocol", "!Sample_scan_protocol", "!Sample_description", "!Sample_data_processing", "!Sample_contributor", "!Sample_sample_id", "!Sample_contact_name", "!Sample_contact_email", "!Sample_contact_phone", "!Sample_contact_fax", "!Sample_contact_laboratory", "!Sample_contact_department", "!Sample_contact_institute", "!Sample_contact_address", "!Sample_contact_city", "!Sample_contact_state", "!Sample_contact_zip/postal_code", "!Sample_contact_country", "!Sample_supplementary_file", "!Sample_platform_id", "!Sample_platform_taxid", "!Sample_sample_taxid", "!Sample_data_row_count", "!Sample_relation")

assay<-data.frame(matrix(1:(length(AllpData)*ncol(arraytxt)), nrow=length(AllpData)), row.names = AllpData)
arraytxt2<-as.matrix(arraytxt)
datamat<-matrix(data=as.numeric(arraytxt2), ncol = ncol(arraytxt2), dimnames =  list(rownames(arraytxt2), colnames(arraytxt2)))
sampledat<-as.data.frame(assay)
colnames(sampledat)<-colnames(datamat)
sampledat["!Sample_geo_accession",]<-colnames(datamat)
sampledat["!Sample_title",]<-colnames(sampledat)
sampledat<-t(sampledat)

GPL = as.character(sampledat[1, grep("platform_id", colnames(sampledat), 
                                     ignore.case = TRUE)])
fd = new("AnnotatedDataFrame", data = data.frame(row.names = rownames(datamat)))
ArrayGSE <- new("ExpressionSet", phenoData = as(as.data.frame(sampledat), "AnnotatedDataFrame"), 
            annotation = GPL, featureData = fd, exprs = as.matrix(datamat))



# Carrega e exporta matriz GSE
MatrixArrayGSE<-assayDataElement(ArrayGSE,"exprs")


# Faz um meta-phenoData
# Título da série
GSEInfo.Title <- c("Tipo","Matriz customizada")
# Código da série
GSEInfo.Accession <- c("Amostras",ncol(MatrixArrayGSE))
# Tipo de análise
GSEInfo.Type <- c("Linhas/Sondas",nrow(MatrixArrayGSE))
# Data da submissão
GSEInfo.Date <- c("Data da análise",format(Sys.time(), "%d/%m/%Y"))


# Remove os prefixos "series"
pDataTable<-rbind(GSEInfo.Title, rbind(GSEInfo.Accession, rbind(GSEInfo.Type, GSEInfo.Date)))
geoPhenoDataDir<-paste(tempdir, "gsephenodata.txt", sep="", collapse="")
write.table(x=pDataTable, file = geoPhenoDataDir, append = FALSE, sep="\t", row.names=F, col.names = F,  quote=F)







geoFileDir<-"None"
gplFileDir<-"None"

# %%% Sinal: 95%
giveSignal(directory = tempdir, percent = 95, message = "Salvando dados e concluindo...")


pseudosamps<-colnames(MatrixArrayGSE)
pseudpDataTitles<-paste0("Amostra ", 1:length(pseudosamps))
pseudpDataNames<-paste0("Amostra customizada '",paste0(pseudosamps,"'"))

GSMspData<-matrix(c(pseudosamps,pseudpDataTitles,pseudpDataNames),nrow = length(pseudosamps))

# Exporta informações sobre as amostras do GSE:
write.table(GSMspData, paste(tempdir, "samplesfile.cfg", sep="", collapse=""), append = FALSE, sep="\t", row.names=F,col.names = F, quote=F)
samplesFileDir<-paste(tempdir, "samplesfile.cfg", sep = "", collapse = "")

#***** Anexo de output - Resultados *****
output.results<-c(geoFileDir, samplesFileDir, geoPhenoDataDir, gplFileDir)
# Escreve resultados:
write.table(output.results, paste(tempdir, "results.cfg", sep="", collapse=""), append = FALSE, sep="\t", row.names=F,col.names = F, quote=F)


#***** Anexo de output - Resultados *****
save.image(file = paste(tempdir, "tempdataset.cfg", sep="", collapse=""))

# END