@article{c93b15caeaa1412b8670863f66869c09,
title = "Leveraging health systems data to characterize a large effect variant conferring risk for liver disease in Puerto Ricans",
abstract = "The integration of genomic data into health systems offers opportunities to identify genomic factors underlying the continuum of rare and common disease. We applied a population-scale haplotype association approach based on identity-by-descent (IBD) in a large multi-ethnic biobank to a spectrum of disease outcomes derived from electronic health records (EHRs) and uncovered a risk locus for liver disease. We used genome sequencing and in silico approaches to fine-map the signal to a non-coding variant (c.2784−12T>C) in the gene ABCB4. In vitro analysis confirmed the variant disrupted splicing of the ABCB4 pre-mRNA. Four of five homozygotes had evidence of advanced liver disease, and there was a significant association with liver disease among heterozygotes, suggesting the variant is linked to increased risk of liver disease in an allele dose-dependent manner. Population-level screening revealed the variant to be at a carrier rate of 1.95% in Puerto Rican individuals, likely as the result of a Puerto Rican founder effect. This work demonstrates that integrating EHR and genomic data at a population scale can facilitate strategies for understanding the continuum of genomic risk for common diseases, particularly in populations underrepresented in genomic medicine.",
keywords = "electronic health records, identity-by-descent, liver disease, liver serum measures, phenome wide association studies, population genetics, statistical genetics",
author = "Belbin, {Gillian M.} and Stephanie Rutledge and Tetyana Dodatko and Sinead Cullina and Turchin, {Michael C.} and Sumita Kohli and Denis Torre and Yee, {Muh Ching} and Gignoux, {Christopher R.} and Abul-Husn, {Noura S.} and Houten, {Sander M.} and Kenny, {Eimear E.}",
note = "Funding Information: This work was supported in part through the computational resources and staff expertise provided by Scientific Computing at the Icahn School of Medicine at Mount Sinai. Research reported in this paper was supported by the Office of Research Infra-structure of the National Institutes of Health under award numbers R01HG011345, S10OD018522, and S10OD026880. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. We would also like to acknowledge BioMe participants for their contribution to this study. N.S.A.-H. was previously employed by Regeneron Pharmaceuticals, has received an honorarium from Genentech, and serves on the Scientific Advisory Board for Allelica. E.E.K. has received speaker honoraria from Illumina and Regeneron Pharmaceuticals. C.R.G. owns stock in 23andMe, Inc. The remaining authors declare no competing interests. Funding Information: This work was supported in part through the computational resources and staff expertise provided by Scientific Computing at the Icahn School of Medicine at Mount Sinai. Research reported in this paper was supported by the Office of Research Infra-structure of the National Institutes of Health under award numbers R01HG011345 , S10OD018522 , and S10OD026880 . The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. We would also like to acknowledge BioMe participants for their contribution to this study. Publisher Copyright: {\textcopyright} 2021",
year = "2021",
month = nov,
day = "4",
doi = "10.1016/j.ajhg.2021.09.016",
language = "English",
volume = "108",
pages = "2099--2111",
journal = "American Journal of Human Genetics",
issn = "0002-9297",
publisher = "Cell Press",
number = "11",
}