#####################
#
# Population Means
#
#####################
######################
# Exercise 1
######################
# Without using the aov function, conduct the anova test
chemitech = read.csv("Chemitech.csv")
mean.a = mean(chemitech[,1])
mean.b = mean(chemitech[,2])
mean.c = mean(chemitech[,3])
var.a = var(chemitech[,1])
var.b = var(chemitech[,2])
var.c = var(chemitech[,3])
overall.sample.mean = (mean.a + mean.b + mean.c)/3
between = (length(chemitech[,1])*(mean.a-overall.sample.mean)^2 + length(chemitech[,2])*(mean.b-overall.sample.mean)^2 + length(chemitech[,3])*(mean.c-overall.sample.mean)^2)/(3-1)
within = ((length(chemitech[,1])-1)*var.a + (length(chemitech[,2])-1)*var.b + (length(chemitech[,3])-1)*var.c)/(15-3)
f.stat = between/within
# p value
pf(f.stat, df1=2, df2=12, lower.tail = F)
# critical value
qf(0.05, df1=2, df2=12, lower.tail = F)
############
# Exercise 2
############
# The Consumer Reports Restaurant Customer Satisfaction Survey
# studies full-service restraurant chanis.
# One of the variables in the study is meal price, the average amount paid per
# person for dinner and drinks, minus the tip.
# The GrandStrand.csv data show the meal prices obtained from 24 restaurants
# in the Grand Strand section in a city of the US.
# Use .05 significance level to test if there is a significant
# difference among the mean meal price for the three types of restraurants.
# Answer the following questions:
# 1. What is the between-groups estimate of population variance?
# 2. What is the within-groups estimate of population variance?
# 3. What is the F statistic?
# 4. What is you conclusion about the difference among the mean meal price for the three types of restraurants?
grand = read.csv("GrandStrand.csv")
colnames(grand)
View(grand)
stacked.grand = stack(grand)
View(stacked.grand)
anova.grand = aov(values ~ ind, data=stacked.grand)
summary(anova.grand)
######################
#
# Population Proportions
#
######################
############
# Exercise 1
############
# Import AutoLoyalty.csv file.
# Test if the onwers of Impala; Fusion; Accord
# have the same degree of loyalty.
loyalty = read.csv("AutoLoyalty.csv")
contingency.loyalty = table(loyalty$Model, loyalty$Likely.Purchase)
chisq.test(contingency.loyalty)
############
# Exercise 2
############
# Write code for conducting the chi-square test without using
# the chisq.test function.
total.impala = sum(contingency.loyalty[1,])
total.fusion = sum(contingency.loyalty[2,])
total.accord = sum(contingency.loyalty[3,])
total.yes = sum(contingency.loyalty[,2])
total.no = sum(contingency.loyalty[,1])
grand.total = total.impala+total.fusion+total.accord
exp.impala.yes = (total.yes*total.impala)/grand.total
exp.impala.no = (total.no*total.impala)/grand.total
exp.fusion.yes = (total.yes*total.fusion)/grand.total
exp.fusion.no = (total.no*total.fusion)/grand.total
exp.accord.yes = (total.yes*total.accord)/grand.total
exp.accord.no = (total.no*total.accord)/grand.total
diff11 = contingency.loyalty[1,1] - exp.impala.no
diff12 = contingency.loyalty[1,2] - exp.impala.yes
diff21 = contingency.loyalty[2,1] - exp.fusion.no
diff22 = contingency.loyalty[2,2] - exp.fusion.yes
diff31 = contingency.loyalty[3,1] - exp.accord.no
diff32 = contingency.loyalty[3,2] - exp.accord.yes
chisq.stat = sum(diff11^2/exp.impala.no, diff12^2/exp.impala.yes,
diff21^2/exp.fusion.no, diff22^2/exp.fusion.yes,
diff31^2/exp.accord.no, diff32^2/exp.accord.yes)
# p-value
pchisq(chisq.stat, df = 2, lower.tail = F)
# critical value
qchisq(0.05, df = 2, lower.tail = F)
############
# Exercise 3
############
# In a study conducted by Zogby International for the Democrat and Chronicle, more tha 700
# New Yorkers were polled to determine whether the New York state government works.
# Respondents surveyed were asked questions involving pay cuts for state legislators, restrictions on lobbyists,
# terms limits for legislators, and whether state citizens should be able to put
# matters directly on the state ballot for a vote. The results regarding several
# proposed reforms had broad support, crossing all demographic and political lines.
# Suppose that a follow-up survey of 100 individuals who live in the western region of
# New York was conducted. The party affiliation (Democrat, Independent, Republican) of
# each individual surveyed was recorded, as well as the responses to the following
# three questions:
# 1. Should legislative pay be cut for every day the state budget is late? (Yes / No)
# 2. Should there be more restrictions on lobbyists? (Yes / No)
# 3. Should there be term limits requiring that legislators serve a fixed number of years? (Yes / No)
# Import the NYReform.csv data and answer the following questions.
ny = read.csv("NYReform.csv")
View(ny)
# 1. With regard to question 1, test for the independence of
# the response (Yes and No) and party affilication. Use sig. level .05.
contingency.ny.q1 = table(ny$Party, ny$PayCut)
chisq.test(contingency.ny.q1)
# 2. With regard to question 2, test for the independence of
# the response (Yes and No) and party affilication. Use sig. level .05.
contingency.ny.q2 = table(ny$Party, ny$Lobbyists)
chisq.test(contingency.ny.q2)
# 3. With regard to question 3, test for the independence of
# the response (Yes and No) and party affilication. Use sig. level .05.
contingency.ny.q3 = table(ny$Party, ny$TermLimits)
chisq.test(contingency.ny.q3)