#OK to post homework
#Austin DeCicco, 4/12/26, Assignment 20

#BEGIN REFORMATTED C20
############################################################################################################################################################

with(combinat):

#CH(): The list of the heights in Math640 (Sp. 2026) (w/o Pablo and Jike)
CH:=proc(): [61,63,71,68,54,73]; end proc:

#Av(L): Given a list L of numbers outputs the average (aka mean, aka expectation)
Av:=proc(L): local n, i: n:=nops(L): evalf(add(L[i],i=1..n)/n); end proc:

#SE(L): The classical textbook formula for the standard-error
SE:=proc(L): local n, mu, i: n:=nops(L): mu:=Av(L):
#add the squares of the deviations from the average
evalf(sqrt(add((L[i]-mu)^2,i=1..n))/n);
end proc:

#BSsamp(L): ONE bootstrap sample from the data list L
BSsamp:=proc(L): local n, ra, L1, i: n:=nops(L): ra:=rand(1..n): L1:=[seq(L[ra()],i=1..n)]; end proc:

#BSse(L,B): The Bootstrap estimate of the standard error of the data set L
BSse:=proc(L,B): local M, i, mu: M:=[seq(Av(BSsamp(L)),i=1..B)]: mu:=Av(M): evalf(sqrt(add((M[i]-mu)^2,i=1..B)/(B-1))); end proc:

#FP(pi): The number of fixed points of the permutation pi
FP:=proc(pi): local n, i, co: n:=nops(pi): co:=0: for i from 1 to n do if pi[i]=i then co:=co+1; end if; end do; co; end proc:

#The weight-enumerator of S_n according to x^FP(pi)
GFfpBF:=proc(n,x): local S, pi: S:=permute(n): add(x^FP(pi), pi in S); end proc:

#Av(FP)= Sum(Av({pi[i]=i},i=1..n)}:
#Pr. 1/n pi[i]=i with prob. (n-1)/n pi[i]<>i
#E[{pi[i]=i]}]= 1*1/n+ 0*(1-1/n)= 1/n
#1/n+1/n+...+1/n=1

GFfp:=proc(n,x): local k: expand(add(binomial(n,k)*(x-1)^k*(n-k)!,k=0..n)); end proc:

#PerS(n,K): a random sample of the number of fixed points of K permutations of length n
PerS:=proc(n,K): local i: [seq(FP(randperm(n)),i=1..K)]; end proc:

############################################################################################################################################################
#END REFORMATTED C20

#1. Lucy Martinez asked a very good question: In one bootstrap sample with n data entries, why do we sample (with replacement) exactly n times.
#   Write a procedure GenBSsamp(L,k) that generates a bootstrap sample of length k*nops(L), and generalize BSse(L,B) to compute GenBSse(L,B,k)
#   With L:=CH(), what are GenBSse(L,1000,1/2), GenBSse(L,1000,2), GenBSse(L,1000,1) (alias BSse(L,1000) which of them is closest to SE(L)?

GenBSsamp:=proc(L,k): local n, ra, L1, i: n:=k*nops(L): ra:=rand(1..nops(L)): L1:=[seq(L[ra()],i=1..n)]; end proc:
GenBSse:=proc(L,B,k): local M, i, mu: M:=[seq(Av(GenBSsamp(L,k)),i=1..B)]: mu:=Av(M): evalf(sqrt(add((M[i]-mu)^2,i=1..B)/(B-1))); end proc:
L:=CH():

#Av([seq(GenBSse(L,1000,1/2),i=1..100)]);
#Av([seq(GenBSse(L,1000,2),i=1..100)]);
#Av([seq(GenBSse(L,1000,1),i=1..100)]);
#SE(L);

#Output:
#3.730167415
#1.863843806
#2.634941701
#2.635231383

#As you can see only the mean of GenBSse(L,1000,1) is quickly approaching the true value of SE(L).

#2. Recall (from a previous class) that the number of inversions of a permutation, pi, denoted by inv(pi), is the number of pairs 1 ≤ i < j ≤ n such that pi[i] > pi[j].
#   Another permutation statistic is the major index, defined as the sum of the places 1 ≤ i ≤ n-1 such that pi[i] > pi[i+1]. By picking 1000 random permutations of length 100,
#   estimate the average of inv(pi)*maj(pi) over all permutations of length 100 (BTW, I have no clue what it is supposed to be, there is no explicit formula (that I know of).
#   All I know is that it is between 0 and binomial(100,2)2) What is the estimated standard error?

P2samp:=proc(): local S, i: S:={}: for i from 1 to 1000 do S:=S union {randperm(100)}: end do; S; end proc:

maj:=proc(pi): local n, i, co: co:=0: n:=nops(pi): for i from 1 to n-1 do if pi[i]>pi[i+1] then co:=co+i; end if; end do; co; end proc:

inv:=proc(pi): local n, i, j, co: n:=nops(pi): co:=0: for i from 1 to n do for j from i+1 to n do if pi[i]>pi[j] then co:=co+1; end if; end do; end do; co; end proc:

InvMaj100Est:=proc(): local S, L, s: S:=P2samp(): L:=[seq(inv(s)*maj(s), s in S)]: Av(L); end proc:
InvMaj100SEEst:=proc(): local S, L, s: S:=P2samp(): L:=[seq(inv(s)*maj(s), s in S)]: SE(L); end proc:

#InvMaj100Est();
#Output: 6103350.713

#InvMaj100SEEst();
#Output: 18502.98966