@misc{dennenmoser2020association,
  abstract = {Background Genome-wide association studies (GWAS) attempt to identify links between gene loci and trait expressions. In order to avoid false positives, GWAS methods use information about population structure, which might have disadvantageous effects in association studies. Several methods are used to describe and integrate this additional information in GWAS. However, structures might feature discrete as well as continuous patterns of variation which cannot be identified sufficiently by current (linear) analysis approaches (Diaz-Papkovich et al. 2019). Therefore, GWAS models using non-linear methods (msMDS, NMDS and UMAP) were compared with those using linear methods (PCA, PCoA, iMDS) by calculating the pairwise correlation coefficient of the p-values yielded from GWAS models and the resulting relationships were visualised by UMAP. Material and Methods DNA was isolated from 184 CCP lines derived from two winter wheat CCPs and genotyped using a 20k wheat SNP array (TraitGenetics). The genotyping data, together with the phenotypic data are being used for the GWAS to link allelic changes to trait expressions. GAPIT-related GWAS: general linear model (GLM), mixed linear model (MLM), multi-locus mixed model (MLMM), fixed and random models circulating probability unification (FarmCPU) included K and Q matrix (Wang and Zhang 2019). Covariates were calculated using GAPIT-based PCA on 5822 selected SNPs. Additionally, principal coordinate analysis (PCoA), interval, M-spline, and ordinal (non-metric) multi-dimensional scaling (MDS) using MM algorithm initialised by Torgerson configuration (de Leeuw and Mair 2009), as well as uniform manifold approximation and projection (UMAP) initialised by spectral embedding (McInnes et al. 2018) were calculated using 583 SNPs selected by clumping. Altogether, 76 combinations were compared by calculating Pearson correlation coefficient of the p-values yielded from the GWAS models, converted to Euclidian distances (\(\mathsf{δ_r = \sqrt{1 - ρ}}\)). Results The results of GLM-, MLM, and MLMM-based models tend to cluster together, whereas FarmCPU shows different outcomes. UMAP yielded the best results for correcting PS used in GLM for the plant height. PCA outperformed MDS-based PS methods, and little differences were observed between PS configurations for MLM- and MLMM-based models. In contrast, FarmCPU-based models tend to be conservative: the correction for PS with PCA tends to be too strong. Conclusions The preliminary results are promising and show a potential to use additional covariate methods for GWAS when analysing data derived from diverse CCP lines of wheat. Therefore, further tests and comparison with different environments, GWAS methods, and settings are needed, especially for the fine-tuning of UMAP-based methods.},
  author = {Dennenmoser, Dominic and Baćanović-Šišić, Jelena and Backes, Gunter},
  interhash = {f4a5c9556b991feba5ee54568a101796},
  intrahash = {ff4a628cc6da1518eda45de58c1f6504},
  month = {1},
  title = {Association analysis in lines derived from winter wheat CCPs—comparing four different populations stratification methods},
  uniqueid = {edsbas.D7B17E3C|edsbas},
  year = 2020
}