## GAMETIC RELATIONSHIPS AND A SINGLE SNP

# specify the number of individuals
N = 500

# specify the number of markers
M = 1

# draw allele frequencies from beta distribution
p = 0.8
q = 1 - p

# simulate the marker data by taking a single draw from a binomial distribution
snps <- matrix(0,N,M)
for(i in 1:M){
	snps[,i] <- rbinom(N,1,p)
}

z  <- apply(snps, 2, scale, scale = FALSE, center = TRUE)
w <- apply(snps, 2, scale, scale = TRUE, center = TRUE) 

## SCORING FOR WEIGHTING SCHEME 1 ##
G1 <- w %*% t(w) 
g1 <- G1[lower.tri(G1, diag=FALSE)] # take relationships among individuals
hist(g1)

## SCORING FOR WEIGHTING SCHEME 2 ##
G2 <- z %*% t(z)
g2 <- G2[lower.tri(G2, diag=FALSE)] # take relationships among individuals
hist(g2)

## PLOT TO COMPARE THE SCORES
plot(g1,g2)
plot(g1,g2*(1/(p*q)))
lines(seq(-8,8,0.5),seq(-8,8,0.5))

## DIPLOID RELATIONSHIPS AND MANY MARKERS

# specify the number of individuals
N = 200

# specify the number of markers
M = 2000

# draw allele frequencies from beta distribution
p = rbeta(M,1,1)
p[p>0.5] <- 1 - p[p>0.5] # make all frequencies minor allele frequencies
p[p<0.05] <- 0.05 # make 0.1 the lowest frequency
q = 1 - p

# draw diploid SNP genotypes by taking two draws from a binomial
snps <- matrix(0,N,M)
for(i in 1:M){
	snps[,i] <- rbinom(N,2,p[i])
}

# calculate z (mean centred genotypes) and w (centred and scaled)
z <- matrix(0,N,M)
for(i in 1:M){
	z[,i] <- (snps[,i] - (2*p[i]))
}
w <- matrix(0,N,M)
for(i in 1:M){
	w[,i] <- (snps[,i] - (2*p[i])) / sqrt(2*p[i]*q[i])
}


## WEIGHTING SCHEME 1 ##
G1 <- w %*% t(w) / M
g1 <- G1[lower.tri(G1, diag=FALSE)] # take relationships among individuals
hist(g1)

## WEIGHTING SCHEME 2 ##
# calculate sum 2pq across loci
sv <- 0
for(i in 1:M){
	x_var <- 2*p[i]*q[i]
	sv <- sv + x_var 
}
G2 <- z %*% t(z) / sv
g2 <- G2[lower.tri(G2, diag=FALSE)] # take relationships among individuals
hist(g2)
plot(g1,g2)


## GENERAL FORMULAE ###

# specify d
d = (0) #change to -1

D1 <- diag(1,M,M)
diag(D1) <- (2*p*q)^d
D2 <- diag(1,M,M)
diag(D2) <- (2*p*q)^(1+d)
sum_pq <- sum(diag(D2))

G <- z %*% D1 %*% t(z) / sum_pq

G[1:5,1:5]
G1[1:5,1:5]
G2[1:5,1:5]