## GAMETIC RELATIONSHIPS AND A SINGLE SNP # specify the number of individuals N = 500 # specify the number of markers M = 1 # draw allele frequencies from beta distribution p = 0.8 q = 1 - p # simulate the marker data by taking a single draw from a binomial distribution snps <- matrix(0,N,M) for(i in 1:M){ snps[,i] <- rbinom(N,1,p) } z <- apply(snps, 2, scale, scale = FALSE, center = TRUE) w <- apply(snps, 2, scale, scale = TRUE, center = TRUE) ## SCORING FOR WEIGHTING SCHEME 1 ## G1 <- w %*% t(w) g1 <- G1[lower.tri(G1, diag=FALSE)] # take relationships among individuals hist(g1) ## SCORING FOR WEIGHTING SCHEME 2 ## G2 <- z %*% t(z) g2 <- G2[lower.tri(G2, diag=FALSE)] # take relationships among individuals hist(g2) ## PLOT TO COMPARE THE SCORES plot(g1,g2) plot(g1,g2*(1/(p*q))) lines(seq(-8,8,0.5),seq(-8,8,0.5)) ## DIPLOID RELATIONSHIPS AND MANY MARKERS # specify the number of individuals N = 200 # specify the number of markers M = 2000 # draw allele frequencies from beta distribution p = rbeta(M,1,1) p[p>0.5] <- 1 - p[p>0.5] # make all frequencies minor allele frequencies p[p<0.05] <- 0.05 # make 0.1 the lowest frequency q = 1 - p # draw diploid SNP genotypes by taking two draws from a binomial snps <- matrix(0,N,M) for(i in 1:M){ snps[,i] <- rbinom(N,2,p[i]) } # calculate z (mean centred genotypes) and w (centred and scaled) z <- matrix(0,N,M) for(i in 1:M){ z[,i] <- (snps[,i] - (2*p[i])) } w <- matrix(0,N,M) for(i in 1:M){ w[,i] <- (snps[,i] - (2*p[i])) / sqrt(2*p[i]*q[i]) } ## WEIGHTING SCHEME 1 ## G1 <- w %*% t(w) / M g1 <- G1[lower.tri(G1, diag=FALSE)] # take relationships among individuals hist(g1) ## WEIGHTING SCHEME 2 ## # calculate sum 2pq across loci sv <- 0 for(i in 1:M){ x_var <- 2*p[i]*q[i] sv <- sv + x_var } G2 <- z %*% t(z) / sv g2 <- G2[lower.tri(G2, diag=FALSE)] # take relationships among individuals hist(g2) plot(g1,g2) ## GENERAL FORMULAE ### # specify d d = (0) #change to -1 D1 <- diag(1,M,M) diag(D1) <- (2*p*q)^d D2 <- diag(1,M,M) diag(D2) <- (2*p*q)^(1+d) sum_pq <- sum(diag(D2)) G <- z %*% D1 %*% t(z) / sum_pq G[1:5,1:5] G1[1:5,1:5] G2[1:5,1:5]