2020
Gustavo Bassil Heimovski, Rogério Correa Turchetti, Juliano Araujo Wickboldt, Lisandro Zambenedetti Granville, Elias Procópio Duarte Jr
FT-Aurora: A highly available IaaS cloud manager based on replication Journal Article
In: Elsevier Computer Networks, 168 , pp. 107041, 2020, ISSN: 1389-1286.
Abstract Links BibTeX Tags: Cloud Computing Fault Tolerance High Availability Infrastructure as a Service (IaaS)
@article{journal/cn/Heimovski20,
title = {FT-Aurora: A highly available IaaS cloud manager based on replication},
author = {Gustavo Bassil Heimovski and Rogério Correa Turchetti and Juliano Araujo Wickboldt and Lisandro Zambenedetti Granville and Elias Procópio Duarte Jr},
url = {http://www.sciencedirect.com/science/article/pii/S138912861831346X},
doi = {10.1016/j.comnet.2019.107041},
issn = {1389-1286},
year = {2020},
date = {2020-02-26},
journal = {Elsevier Computer Networks},
volume = {168},
pages = {107041},
abstract = {In this work we describe FT-Aurora, a highly available IaaS (Infrastructure as a Service) cloud manager that allows cloud resources to be accessed even if the manager itself crashes. FT-Aurora provides flexible and efficient resource management by supporting network programmability. FT-Aurora is based on clusters of cloud managers running on multiple datacenters. After a manager crashes, the corresponding resources remain accessible from any other manager in the cluster. A cluster consists of a group of managers that use fine-grained multi-master replication to share information. Replicated data and resources include both management information stored at the Aurora database, and information used to keep virtual machine images and processes. Replication and monitoring the multiple Aurora instances are available as services that can be easily activated through a GUI button. Experimental results are presented for both the performance and robustness of FT-Aurora.},
keywords = {Cloud Computing, Fault Tolerance, High Availability, Infrastructure as a Service (IaaS)},
pubstate = {published},
tppubtype = {article}
}
2017
Gustavo Bassil Heimovski, Rogério Correa Turchetti, Juliano Araujo Wickboldt, Lisandro Zambenedetti Granville, Elias Procópio Duarte Jr
Alta Disponibilidade de um Gerenciador de Nuvem IaaS Baseada em Replicação: Experiência & Resultados Inproceedings
In: 35º Simpósio Brasileiro de Redes de Computadores e Sistemas Distribuídos, SBRC 2017, Belém, Brazil, May 15-19, 2017, pp. 30–43, SBC, 2017, ISSN: 2177-496X, (In portuguese).
Abstract Links BibTeX Tags: Cloud Computing Fault Tolerance High Availability Infrastructure as a Service (IaaS)
@inproceedings{conf/sbrc/Heimovski17,
title = {Alta Disponibilidade de um Gerenciador de Nuvem IaaS Baseada em Replicação: Experiência & Resultados},
author = {Gustavo Bassil Heimovski and Rogério Correa Turchetti and Juliano Araujo Wickboldt and Lisandro Zambenedetti Granville and Elias Procópio Duarte Jr},
url = {https://sol.sbc.org.br/index.php/sbrc/article/view/2631},
issn = {2177-496X},
year = {2017},
date = {2017-01-01},
booktitle = {35º Simpósio Brasileiro de Redes de Computadores e Sistemas Distribuídos, SBRC 2017, Belém, Brazil, May 15-19, 2017},
pages = {30--43},
publisher = {SBC},
abstract = {In this work we report the experience of adding a high availability solution to an IaaS (Infrastructure as a Service) cloud platform called Aurora. The proposed solution is based on the multi-master replication of the cloud manager, replicas form a cluster of cloud managers running on multiple datacenters. The implementation also includes a monitoring service for the replicas. The high availability solution is fully integrated to the cloud platform so that activation is done directly by pushing a button of the Graphical User Interface. The performance and robustness of the proposed solution were evaluated experimentally. The time to (1) incorporate a new manager instance to a cluster; (2) recover an instance after a failure and (3) replicate data in different scenarios were measured and are reported in this paper. The impact of the proposed solution cost is evaluated by measuring CPU and network usage. A stress test in which the link delay between two datacenters grows up to the operating limit of the replication solution is also reported. Finally, we present a table for the system availability as a function of the MTBF (Mean Time Between Failures).},
note = {In portuguese},
keywords = {Cloud Computing, Fault Tolerance, High Availability, Infrastructure as a Service (IaaS)},
pubstate = {published},
tppubtype = {inproceedings}
}