2024
1.
J. Aina B. I. Adeika, T. Ibirinde; Pramanik, S.
Ensemble and Transformer Models for Infectious Disease Prediction Conference
2023 IEEE 23rd International Conference on Bioinformatics and Bioengineering (BIBE), Dayton, OH, USA, 2023, 2024.
Abstract | Links | BibTeX | Tags: Infectious diseases;Tuberculosis;Genomics;Predictive models;Transformers;Bioinformatics;Monitoring;Infectious diseases;BERT;XLNET;RoBERTa;Disease Prediction;Ensemble techniques;Genomic sequence
@conference{nokey,
title = {Ensemble and Transformer Models for Infectious Disease Prediction},
author = {B. I. Adeika, J. Aina, T. Ibirinde, T. Adeyemi, M. M. Rahman and S. Pramanik},
url = {https://ieeexplore.ieee.org/document/10431838/authors},
doi = {10.1109/BIBE60311.2023.00068},
year = {2024},
date = {2024-02-19},
urldate = {2024-02-19},
pages = {377-384},
publisher = {2023 IEEE 23rd International Conference on Bioinformatics and Bioengineering (BIBE), Dayton, OH, USA, 2023},
abstract = {Infectious diseases persist as an urgent global challenge, necessitating innovative strides in prediction and monitoring. This study delves into the intricate realm of infectious disease prediction, employing three transformer models—BERT, XLNET, and RoBERTa. The central objective of this research was to craft a framework for infectious disease prediction, significantly enhancing capabilities in disease monitoring, detection, and outbreak response. The approach entailed receiving a set of translated protein sequences from various infectious diseases and leveraging these sequences to predict each disease with the models. This methodology advanced infectious disease prediction and monitoring by expediting the analysis of genomic data, enabling the identification of distinctive patterns, mutations, and signatures associated with specific infectious agents. The dataset comprised genomic sequences from diseases such as Zika, Ebola, SARS-CoV-2, Influenza A, Influenza B, Tuberculosis, along with sequences from non-infected individuals. Model evaluation encompassed essential metrics, including accuracy, precision, recall, and the F1 score. In our quest for heightened precision, we also devised ensemble techniques to harness the collective power of all three models, yielding accuracies of 92% (Majority Voting) and 85% (Weighted Average). Leveraging DNA sequences translated into protein sequences, this study contributed to advancing our understanding and management of infectious diseases on a global scale.},
keywords = {Infectious diseases;Tuberculosis;Genomics;Predictive models;Transformers;Bioinformatics;Monitoring;Infectious diseases;BERT;XLNET;RoBERTa;Disease Prediction;Ensemble techniques;Genomic sequence},
pubstate = {published},
tppubtype = {conference}
}
Infectious diseases persist as an urgent global challenge, necessitating innovative strides in prediction and monitoring. This study delves into the intricate realm of infectious disease prediction, employing three transformer models—BERT, XLNET, and RoBERTa. The central objective of this research was to craft a framework for infectious disease prediction, significantly enhancing capabilities in disease monitoring, detection, and outbreak response. The approach entailed receiving a set of translated protein sequences from various infectious diseases and leveraging these sequences to predict each disease with the models. This methodology advanced infectious disease prediction and monitoring by expediting the analysis of genomic data, enabling the identification of distinctive patterns, mutations, and signatures associated with specific infectious agents. The dataset comprised genomic sequences from diseases such as Zika, Ebola, SARS-CoV-2, Influenza A, Influenza B, Tuberculosis, along with sequences from non-infected individuals. Model evaluation encompassed essential metrics, including accuracy, precision, recall, and the F1 score. In our quest for heightened precision, we also devised ensemble techniques to harness the collective power of all three models, yielding accuracies of 92% (Majority Voting) and 85% (Weighted Average). Leveraging DNA sequences translated into protein sequences, this study contributed to advancing our understanding and management of infectious diseases on a global scale.