## R Ue5.2 international inequality imprisonment Prognose ## country ... Land ## income_inequality ... Average of the 20:20 income inequality, ## UN Human development reports 2003-2006 ## imprisonment_log ... Natural log of prisoners per 100,000, UN ## Variablenliste in international_inequality.xls #setwd("C:/MH/WU/LV/OEKONOMETRIE_BA/Oe1_WS23/Chp5/EXERCISES/") setwd("C:/Users/hoersaal/Downloads/") # Daten einlesen (nicht ganz einfach) dat <- read.table("international_inequality.csv", sep=";", dec=",", header=TRUE, na.strings = "#NV", fill = TRUE, comment.char="") ## Daten anschauen dim(dat) # Es sind Daten: 23 Laender, 30 Variable # 0-te Zeile sind die names head(dat,4) tail(dat) hist(dat$income_inequality) hist(dat$imprisonment_log) # scatterplot plot(imprisonment_log ~ income_inequality, data=dat) plot(exp(imprisonment_log) ~ income_inequality, data=dat) cbind(dat$country,exp(dat$imprisonment_log)) # Regression imprisonment_log auf income_inequality reg <- lm(imprisonment_log ~ income_inequality + 1, data=dat) summary(reg) ## Prognose fuer Ungarn n <- length(dat$imprisonment_log) income_inequality2 <- c(dat$income_inequality, 4.1) # Wert für Ungarn dazu ## Prognose imprisonment_log_hat2 <- reg$coefficients[1] + reg$coefficients[2]*income_inequality2[n+1] as.numeric(imprisonment_log_hat2) as.numeric(exp(imprisonment_log_hat2)) # ... in Haeftlinge / 100,000 Ew ## Prognoseintervall analog zu K5Ue1_R.txt ## Berechnung der Varianz des Prognosefehlers Kap 5 Folie 16 x <- dat$income_inequality Sxx <- as.numeric( t(x) %*% x ) x2 <- income_inequality2[n+1] # sigma der Residuen ist in summary(reg)$sigma abgelegt. V_pf <- summary(reg)$sigma^2 *( 1 + 1/n + x2 / Sxx ) se_pf <- as.numeric(V_pf^(0.5)) ## Prognosintervall mit Ueberdeckung von 95% y_hat2 <- imprisonment_log_hat2 q_vtlg <- q_norm <- 1.96 # 0.975-Quantil der N(0,1): qnorm(0.975) PI_links <- y_hat2 - q_vtlg*se_pf PI_rechts <- y_hat2 + q_vtlg*se_pf as.numeric(c(PI_links,PI_rechts)) ## Vergleichen sie mit den anderen Ländern as.numeric( c(exp(PI_links), exp(PI_rechts)) ) ## Laenge der PIs as.numeric(c(PI_links,PI_rechts, PI_rechts - PI_links)) ## bei Verwendung der t-Vtlg q_vtlg <- qt(0.975, n-2) ... analog zu oben ab 9 Zeilen darueber