# July 5, 2012
# Hanna Jankowski (hkj@mathstat.yorku.ca)
# simulations appearing in manuscript "Convergence of  linear functionals of the Grenander estimator under misspecification" 
# simulations with sample sizes larger than 1000 can be slow


#-----------------------------------------------------------------------------------------#
#-----------------------------------------------------------------------------------------#
# function to compute MLE (Grenander estimator) and its mean
f		<-	function(x) {
	
				n		<-	length(x)
				x		<-	sort(x)
				FF		<-	(0:n)/n
				pts		<-	cbind(c(0,x,max(x)), c(FF,0))
				hpts	<-	chull(pts)
				hpts	<-	sort(hpts)
				hpts	<-	hpts[-length(hpts)]
				dy		<-	diff(pts[hpts,2])
				dx		<-	diff(pts[hpts,1])
				slopes	<-	dy/dx
			    xx      <-  c(0,cumsum(dx)) 
				mean	<-	sum(slopes*dx*(xx[-1]+xx[-length(xx)]))/2
				res		<-	list(dx=dx, slopes=slopes, mean=mean)		
				return(res)	
				}

# given the output of f and a point t, evaluate the MLE at t				
mle_loc	<-	function(t, dx, slopes){
			xx	<-	c(0,cumsum(dx))
			xx	<-	xx[xx<=t]
			mm	<-	length(xx)
			return(slopes[mm])
			}		

# given the output of f evaluate the entropy of the MLE				
mle_ent	<-	function(dx, slopes){
				T0	<-	sum(log(slopes[slopes>0])*slopes[slopes>0]*dx[slopes>0])	
				return(T0)
			}				

#-----------------------------------------------------------------------------------------#
#-----------------------------------------------------------------------------------------#
# Example (2.1) from paper

# the cdf F0
F1a		<-	function(x) 1.5*x
F1b		<-	function(x) 0.75+0.25*(x-0.5)+0.5*(x-0.5)^2
plot(c(0,0.5), F1a(c(0,0.5)), xlim=c(0,1), ylim=c(0,1), type="l", ylab="F0", xlab="x")
plot(F1b, xlim=c(0.5,1), add=TRUE)

# inverse of F0
F1ainv	<-	function(y) (2*y)/3
F1binv	<-	function(y) 0.25+sqrt(2*(y-0.75+1/32))

#-----------------------------------------------------------------------------------------#
#-----------------------------------------------------------------------------------------#
# Example (2.2) from paper

# the cdf F0
F2a		<-	function(x) 0.5+4*(x-0.5)^3
F2b		<-	function(x) 0.496+0.04*(x-0.4)
xx1		<-	seq(0, 0.4, length.out=101)
xx2		<-	seq(0.6, 1, length.out=101)
xx3		<-	seq(0.4, 0.6, length.out=101)
plot(xx1, F2a(xx1), xlim=c(0,1), ylim=c(0,1), type="l", ylab="F0", xlab="x")
lines(xx2, F2a(xx2))
lines(xx3, F2b(xx3))

# inverse of F0
F2inva	<-	function(y) 0.5-(0.25*(0.5-y))^(1/3)
F2invb	<-	function(y) (y-0.496)/0.04+0.4
F2invc	<-	function(y) 0.5+(0.25*(y-0.5))^(1/3)

				
#-----------------------------------------------------------------------------------------#
#-----------------------------------------------------------------------------------------#
# simulations for Figure 2 (top row) in paper


set.seed(3799492)

B	<-	1000
ens	<-	c(10, 25, 50, 1000, 1e+05)
res	<-	rep(0,B*length(ens))
for(j in 1:length(ens)){
	for(i in 1:B){
			u				<-	runif(ens[j])
			x				<-	c(F1ainv(u[u<=0.75]),F1binv(u[u>0.75]))
			mle				<-	f(x)
			res[i+B*(j-1)]	<-	sqrt(ens[j])*(mle_loc(0.75, mle$dx, mle$slopes)-0.5)
			print(i+B*(j-1))
			}
	}
	
y	<-	rep(qnorm((1:B-0.5)/B, mean=0, sd=sqrt(0.75)), length(ens))

#pdf("miss_point_eg1.pdf", height=2, width=9, pointsize=13)

par(las = 1, oma = c(0, 0, 0, 0), cex = 0.9, mar = c(2, 2, 2, 1))
par(mfrow=c(1,length(ens)))
for(j in 1:length(ens)){
		plot_title= paste("n=", ens[j], sep="")
		qqplot(res[B*(j-1)+1:B],y[B*(j-1)+1:B], xlab="", ylab="", main=plot_title, pch=1, cex=0.75, xlim=c(-3.5,3.5), ylim=c(-3.5,3.5), col=grey(0.7))
		abline(a=0,b=1)	
		}

#dev.off()

#-----------------------------------------------------------------------------------------#
#-----------------------------------------------------------------------------------------#
# simulations for Figure 2 (bottom row) in paper


sigma	<-	(3/4)*(4/3-3/4)
sigma	<-	sqrt(sigma)

set.seed(7219775)

B	<-	1000
ens	<-	c(10, 25, 50, 1000, 1e+05)
res	<-	rep(0,B*length(ens))
for(j in 1:length(ens)){
	for(i in 1:B){
			u				<-	sort(runif(ens[j]))
			u1				<-	u[u<=0.496]
			u2				<-	u[which((u>0.496)*(u<0.504)==1)]
			u3				<-	u[u>=0.504]
			x				<-	c(F2inva(u1),F2invb(u2),F2invc(u3))
			mle				<-	f(x)
			res[i+B*(j-1)]	<-	sqrt(ens[j])*(mle_loc(0.75, mle$dx, mle$slopes)-0.75)
			print(i+B*(j-1))
			}
	}
	
y	<-	rep(qnorm((1:B-0.5)/B, mean=0, sd=sigma), length(ens))

#pdf("miss_point_eg2.pdf", height=2, width=9, pointsize=13)

plot_range	<-	c(-2.5,2.5)
par(las = 1, oma = c(0, 0, 0, 0), cex = 0.9, mar = c(2, 2, 2, 1))
par(mfrow=c(1,length(ens)))
for(j in 1:length(ens)){
		plot_title= paste("n=", ens[j], sep="")
		qqplot(res[B*(j-1)+1:B],y[B*(j-1)+1:B], xlab="", ylab="", main=plot_title, pch=1, cex=0.75, xlim=plot_range, ylim=plot_range, col=grey(0.7))
		abline(a=0,b=1)	
		}

#dev.off()

#-----------------------------------------------------------------------------------------#
#-----------------------------------------------------------------------------------------#
# simulations for Figure 3 in paper

m0		<-	12*((0.25)^4/4-(0.25)^3/3+(0.25)^2/8)+(3/8)*(1-(0.25)^2)
m1		<-	12*((0.25)^4/4-(0.25)^3/3+(0.25)^2/8)+45/128
sigma	<-	12*((0.25)^5/5-(0.25)^4/4+(0.25)^3/12)+225/1024-m1^2
sigma	<-	sqrt(sigma)

set.seed(3890871)

B	<-	1000
ens	<-	c(100, 1000, 1e+04, 1e+05, 1e+06)
res	<-	rep(0,B*length(ens))
for(j in 1:length(ens)){
	for(i in 1:B){
			u				<-	sort(runif(ens[j]))
			u1				<-	u[u<=0.496]
			u2				<-	u[which((u>0.496)*(u<0.504)==1)]
			u3				<-	u[u>=0.504]
			x				<-	c(F2inva(u1),F2invb(u2),F2invc(u3))
			mle				<-	f(x)
			res[i+B*(j-1)]	<-	sqrt(ens[j])*(mle$mean-m0)
			print(i+B*(j-1))
			}
	}
	
y	<-	rep(qnorm((1:B-0.5)/B, mean=0, sd=sigma), length(ens))

#pdf("miss_mean.pdf", height=2, width=9, pointsize=13)

plot_range	<-	c(-0.9,0.9)
par(las = 1, oma = c(0, 0, 0, 0), cex = 0.9, mar = c(2, 2.5, 2, 1))
par(mfrow=c(1,length(ens)))
for(j in 1:length(ens)){
		plot_title= paste("n=", ens[j], sep="")
		qqplot(res[B*(j-1)+1:B],y[B*(j-1)+1:B], xlab="", ylab="", main=plot_title, pch=1, cex=0.75, xlim=plot_range, ylim=plot_range, col=grey(0.7))
		abline(a=0,b=1)	
		}

#dev.off()


#-----------------------------------------------------------------------------------------#
#-----------------------------------------------------------------------------------------#
# simulations for Figure 4 in paper

T0		<-	0.75*log(3)-log(2)
sigma	<-	(log(1.5)^2)*0.75+(log(0.5)^2)*0.25 - T0^2
sigma	<-	sqrt(sigma)


set.seed(8787681)

B	<-	1000
ens	<-	c(100, 1000, 10000, 100000)
res	<-	rep(0,B*length(ens))
for(j in 1:length(ens)){
	for(i in 1:B){
			u				<-	runif(ens[j])
			x				<-	c(F1ainv(u[u<=0.75]),F1binv(u[u>0.75]))
			mle				<-	f(x)
			res[i+B*(j-1)]	<-	sqrt(ens[j])*(mle_ent(mle$dx, mle$slopes)-T0)
			}
	}
	
y	<-	rep(qnorm((1:B-0.5)/B, mean=0, sd=sigma), length(ens))

#pdf("miss_entropy.pdf", height=2.5, width=9, pointsize=13)

par(las = 1, oma = c(0, 0, 0, 0), cex = 0.9, mar = c(2, 2, 2, 1))
par(mfrow=c(1,length(ens)))
for(j in 1:length(ens)){
		plot_title= paste("n=", ens[j], sep="")
		qqplot(res[B*(j-1)+1:B],y[B*(j-1)+1:B], xlab="", ylab="", main=plot_title, pch=1, cex=0.75, xlim=c(-2,2), ylim=c(-2,2), col=grey(0.7))
		abline(a=0,b=1)	
		}

#dev.off()