# load MASS
library(MASS)
# plot part A
plot(cpus[-1]) # Most variables probably need to be transformed
# plot part B
plot(log(estperf)~log(perf),data=cpus) # Data appear linear
with(cpus, lines(lowess(log(estperf)~log(perf)))) # Smooth line shows greater variation to left of graph and truncation
abline(0,1,col="red")
########################################
library("MASS")
plot(cpus)
# The variables should have their log taken before further analysis
par(mfrow = c(1,2))
with(cpus, plot(log(perf) ~ log(estperf), xlab = 'log(Estimated Performance)', ylab = 'log(Performance)', main = 'log(Performance) vs. log(Estimated Performance)'))
# The points scatter about a straight line. The correlation is 0.9221604
lm.performance <- with(cpus, lm(log(perf) ~ log(estperf)))
res.performance <- resid(lm.performance)
abline(lm.performance)
plot(res.performance ~ log(cpus$estperf), xlab = 'log(Estimated Performance)', ylab = 'Residuals', main = 'Residuals')
# the variability does not seem to increase as performance increases
###########################################
library(MASS)
plot(cpus[,-1]) #scatterplot matrix
#it looks like all variables could be log transformed actually
#(other than name..since its just the computer name..so we removed it)
#create two log transformed variables for estperf and perf
lest<-log(cpus$estperf)
lperf<-log(cpus$perf)
#create linear model of the log transformed variables
mod<-lm(lperf~lest)
#Plot the log transformed variables
plot(lperf~lest)
#add fitted line from model
abline(mod)
#we can see that if we try to fit a straight line it is not quite the right fit
#underests at the low and high performances
#Now lets try adding a LOWESS line (in red)
lines(lowess(lest,lperf),col=2)
#LOWESS line isn't forced to be straight- we see that it curves to
#accomodate for a slight non-linear relationship
par(mfrow=c(2,2))
summary(mod)
plot(mod)
#In this moddel we see that the residuals are not random.
#there is a pattern to them
#there is greater variation in the residuals at the lower end of fitted values
#looking at q-q plot, good in the middle,
#but at the tail ends the fit isnt as good
#Therefore, we can say that est performance predicts performance well at middle
#levels of performance but not a good pred @ low levels
#Is variability in performance the same at each level of performance?
# it appears that there is greater variance at the lower end
#(but there is also more data)
#############################
# This is the first scatterplot matrix :
pairs( ~name+syct+mmin+mmax+cach+chmin+chmax+perf+ estperf)
# This is plotting the scatterplot of estperf as a function of perf :
with(cpus, plot(estperf,perf))
# This is the scatterplot of the log(estperf) and log(perf) :
with(cpus, plot(log(estperf),log(perf)))
And with a lowess curve added:
with(cpus, lines(lowess((log(estperf)~log(perf)))))
# The plot is clustered for low values and then scattered for larger values. Now when we take the logarithm of the values, they are more linear and normalized. The lowess curve is not linear. For lower values, the line is curved and there are more data under the curve. For greater values, the line is linear.
###############################
library( MASS )
plot ( cpus )
plot (log(cpus$estperf), log(cpus$perf))
with(cpus, lines(lowess((log(cpus$perf)~ log(cpus$estperf)))))
# a) maybe try taking the log(mmin) because it looks like it is a repeated multiplication.
# b) looks straight to me.
# no its not teh same variablility
###############################
library(MASS)
plot(cpus[,-1]) #yes the name variable should be transformed because it isn't a numeric value
plot(log(cpus$perf)~log(cpus$estperf),data=cpus,xlab="Estperf",ylab="Perf")
lines(lowess(log(cpus$perf)~log(cpus$estperf))) #yes as shown with are lowess line, our plotted point scatter about a straight line. From examining the graph, there seems to be more variability as performance increases. However, because we did a log transformation, we cannot accurately assess variability.
#################################
library(MASS)
pairs(cpus)
plot(perf ~ estperf, data=cpus)
plot(log(perf) ~ log(estperf), xlim=c(1,8), ylim=c(1,8), data=cpus)
abline(0,1)
lowess(log(perf) ~ log(estperf), f=0.5, data=cpus)