@article{MRA, author = {Katharine Linder and Rachel Watson and Keely Ulmer and David Bender and Michael Goodheart and Eric Devor and Jesus Gonzalez Bosquet}, title = { Prediction of Ovarian Cancer with Deep Machine Learning and Alternative Splicing}, journal = {Medical Research Archives}, volume = {11}, number = {11}, year = {2023}, keywords = {}, abstract = {Objective: Early detection of ovarian cancer could lead to improved survival rates, however no method has reliably been able to predict ovarian cancer. The aim of this study is to determine if processing alternative splicing data from high grade serous ovarian cancer patients using machine learning analytics will discriminate high grade serous ovarian cancer from normal fallopian tube samples. The ultimate goal would be to have a model that can predict high grade serous ovarian cancer with a blood test. Methods: This is a case-control study of patients with confirmed high grade serous ovarian cancer and those undergoing salpingectomy for benign indications. RNA-sequencing was performed on all samples. RNA-sequence data was then put into Deep-learning augmented RNA-seq analysis of transcript splicing software suite. Deep-learning augmented RNA-seq analysis of transcript splicing created a model of differential alternative splicing aimed to discriminate between high grade serous ovarian cancer and normal fallopian tube. DEXSeq analysis was used to determine exon-based expression. Initial results with both analytics were then modelled with multivariate lasso regression to create prediction models (performance determined by area under the curve and 95% CI). Models created were the validated using The Cancer Genome Atlas data sets. Results: One hundred and twelve high grade serous ovarian cancer and 12 benign samples were successfully sequenced. Deep-learning augmented RNA-sequencing analysis of transcript splicing identified 998 unique differentially expressed exons between high grade serous ovarian cancer and controls. Multivariate lasso regression analysis identified several exons that predicted high grade serous ovarian cancer with high performance. Specifically, ENSG00000182512:E001 from gene GLRX5 was highly predictive of high grade serous ovarian cancer with an area under the curve of 100%. Conclusions: Application of machine learning analytics to exon differential expression, most likely due to alternative splicing, predicted high grade serous ovarian cancer with high performance. These results were validated in an independent dataset of cases and controls. Differential exon expression from cell-free RNA potentially could be used for early diagnosis of high grade serous ovarian cancer.}, issn = {2375-1924}, doi = {10.18103/mra.v11i11.4602}, url = {https://esmed.org/MRA/mra/article/view/4602} }