## ---------------------------------------------------------------------------- ## Greene (2003) ## ## data ## availability: electronic ## firms: General Motors, Chrysler, General Electric, Westinghouse, US Steel ## errors: invest_1940 = 261.6, invest_1952 = 645.2, capital_1946 = 232.6 (US Steel) ## ## analysis ## result: mostly exact at precision given (except for several known errata) ## OLS standard errors in Table 13.4 do not agree with Table 14.2 (different adjustments) ## full ML results (Table 14.3) differ rather clearly ## ---------------------------------------------------------------------------- ## ## From the Errata to Greene (2003) ## http://pages.stern.nyu.edu/~wgreene/Text/Errata/ERRATA5.htm ## ## In the last column of Table 14.2, the value 0.01378 should be 0.011378 and the ## value 15857.24 should be 16194.68. (Tamer Kulaksizoglu, Department of Economics, ## Purdue University .) ## ## Numerous additional corrections to the results in Tables 14.1 and 14.2. ## First, in both Tables 14.1 and 14.2, in the row headers, the second b2 should be b3. ## In 14.1, in the second row, (6.2959) should be (6.2588). In the 5th row, 0.3086 should be 0.3085. ## In the 6th row, (0.0221) should be (0.0220). In the covariance and correlation matrix, ## in the first row, 0.257 should be 0.157. In the third row, 0.238 should be 0.138. ## In Table 14.2, in the 4th row, (0.0157) should be (0.0156). In the 7th row, 660.329 ## should be 660.829. In the last column of this table, the value there of 15827.24 should ## be 15708.84, which is the sum of squares divided by 100 instead of 97. It is noted in ## the previous erratum that this should be 16194.84. For reasons to be explained below, ## the appropriate value is 15708.84. In the matrix in the lower part of Table 14.1, all ## of the "Pooled" values must be replaced. The correct values are as follows: ## ## The covariance matrix for the [Pooled estimates] should be ## GM CH GE WE US ## GM 9396.05785 -432.36974 -1322.23642 -865.79114 -897.94965 ## CH -432.36974 167.40311 174.03823 89.33643 260.43069 ## GE -1322.23642 174.03823 3733.26031 1302.23055 -972.14491 ## WE -865.79114 89.33643 1302.23055 564.15649 -180.38462 ## US -897.94965 260.43069 -972.14491 -180.38462 10052.04505 ## ## The correlation matrix should be ## GM CH GE WE US ## GM 1.00000 -.34475 -.22325 -.37605 -.09240 ## CH -.34475 1.00000 .22015 .29070 .20076 ## GE -.22325 .22015 1.00000 .89731 -.15869 ## WE -.37605 .29070 .89731 1.00000 -.07575 ## US -.09240 .20076 -.15869 -.07575 1.00000 ## ## We note how the "Pooled" estimates shown in the rightmost column of Table 14.1 were ## computed. The textbook formula would specify using OLS with the pooled sample, computing ## the residuals, then computing S using E'E/T, as in (14-9). In order to compute the ## Pooled estimates in Table 14.1, I used the "Covariance Structures" model in Section 13.9 ## of the text. The estimator is that in (13-55), however, the estimator of W was not ## based on OLS residuals. Rather, it was computed with the following steps: (1) OLS to ## obtain residuals e. (2) Compute si as the diagonal elements of E'E/T. (3) Compute weighted ## least squares using a groupwise heteroscedasticity approach. (These would be the estimates ## reported in Table 13.4 labeled "Heteroscedastic Feasible GLS.") (4) Residuals are ## recomputed from this heteroscedasticity model and S = E'E/T is recomputed. (5) Full GLS ## is now computed using this matrix S rather than the OLS residuals based matrix. These ## are the values reported in the last column of Table 14.1. The identical values also ## appear in Table 13.4 labeled "Cross-section correlation Feasible GLS." The estimates ## in the matrices above are based on these parameters. The relationship of this estimator ## to the estimator based on OLS residuals throughout has not been shown. Surely it is ## consistent and asymptotically normally distributed. Since the intermediate estimator ## of b is consistent, the final estimator has the same properties as the "textbook" formula. ## The small sample properties would be the only difference, but to our knowledge, this has ## not been explored. (Arne Henningsen, University of Kiel, Germany and William Greene, NYU.) ## ## ---------------------------------------------------------------------------- ## preliminaries source("start.R") ## data pre-processing gr <- subset(Grunfeld, firm %in% c("General Motors", "Chrysler", "General Electric", "Westinghouse", "US Steel")) gr$firm <- factor(gr$firm) gr$invest[c(66, 78)] <- c(261.6, 645.2) gr$capital[72] <- 232.6 pgr <- plm.data(gr, c("firm", "year")) ## NOTE: ## ## In R various fitting functions can be used to fit several models of interest. ## For example, the basic pooled OLS regression can be fitted using either one of ## lm(invest ~ value + capital, data = gr) ## plm(invest ~ value + capital, data = pgr, model = "pooling") ## systemfit(invest ~ value + capital, data = pgr, method = "OLS", pooled = TRUE) ## Below we show how these can be used. In some cases, a certain piece of information ## can be obtained more easily using one or the other fitting method. ################ ## Using lm() ## ################ ## Panel data models (Table 13.4, p. 330) ## Homoskedastic OLS lm_ols_pool <- lm(invest ~ value + capital, data = gr) gsummary(lm_ols_pool, logLik = TRUE, digits = 5) sqrt(diag(vcovHC(lm_ols_pool, type = "HC0"))) ## or: sqrt(diag(sandwich(lm_ols_pool))) ## -> Beck and Katz in plm() section ## Remark: different scaling: summary(lm_ols_pool)$sigma^2 * 97/100 sqrt(diag(vcov(lm_ols_pool)) * 97/100) ## Heteroskedastic FGLS lm_ols_aux <- lm(I(residuals(lm_ols_pool)^2) ~ 0 + firm, data = gr) lm_fgls_pool <- lm(invest ~ value + capital, data = gr, weights = 1/fitted(lm_ols_aux)) gsummary(lm_fgls_pool, digits = 5) ## coefficients are correct, but different scaling for ## standard errors; the scaling used by Greene can be ## reproduced by systemfit (see below) ## Heteroskedastic ML, computed as iterated FGLS lm_fgls_aux <- lm(I(residuals(lm_fgls_pool)^2) ~ 0 + firm, data = gr) sigmas_i <- fitted(lm_fgls_aux) sigmas <- 0 while(any(abs((sigmas_i - sigmas)/sigmas) > 1e-7)) { sigmas <- sigmas_i lm_fgls_pool_i <- lm(invest ~ value + capital, data = gr, weights = 1/sigmas) sigmas_i <- fitted(lm(I(residuals(lm_fgls_pool_i)^2) ~ 0 + firm, data = gr)) } lm_ml_pool <- lm(invest ~ value + capital, data = gr, weights = 1/sigmas) lm_ml_aux <- lm(I(residuals(lm_ml_pool)^2) ~ 0 + firm, data = gr) gsummary(lm_ml_pool, logLik = TRUE, sigma2 = FALSE, R2 = FALSE, digits = 5) sqrt(diag(vcov(lm_ml_pool)) * 97/100) ## pooled sigma^2 is mean(residuals(lm_fgls_pool)^2) ## Cross-section correlation FGLS and ML can ## be reproduced by systemfit, see below ## Group-specific variances (Table 13.5, p. 331) rbind("OLS" = coef(lm_ols_aux), "Het FGLS" = coef(lm_fgls_aux), "Het FGLS (SE)" = sqrt(diag(sandwich(lm_fgls_aux, adjust = TRUE))), "Het ML" = coef(lm_ml_aux)) ## OLS estimation (Table 14.2, p. 351) lm_ols_GM <- lm(invest ~ value + capital, data = gr, subset = firm == "General Motors") lm_ols_CH <- lm(invest ~ value + capital, data = gr, subset = firm == "Chrysler") lm_ols_GE <- lm(invest ~ value + capital, data = gr, subset = firm == "General Electric") lm_ols_WH <- lm(invest ~ value + capital, data = gr, subset = firm == "Westinghouse") lm_ols_US <- lm(invest ~ value + capital, data = gr, subset = firm == "US Steel") gsummary(lm_ols_GM) gsummary(lm_ols_CH) gsummary(lm_ols_GE) gsummary(lm_ols_WH) gsummary(lm_ols_US) gsummary(lm_ols_pool) ## standard errors are ok (scaled differently from Tale 13.4) ## but sigma^2 are still scaled by n rather than n-k (see Greene errata) ## pooled sigma^2 agrees with Errata after rescaling by 97/100 ####################### ## Using systemfit() ## ####################### ## Homoskedastic OLS (Table 13.4 and 14.2) sf_ols_pool <- systemfit(formula = invest ~ value + capital, data = pgr, method = "OLS", pooled = TRUE) gsummary(sf_ols_pool) mean(residuals(sf_ols_pool)^2) ## see also lm_ols_pool above ## Heteroskedastic FGLS (Table 13.4) sf_fgls_pool <- systemfit(invest ~ value + capital, data = pgr, method = "WLS", pooled = TRUE, control = systemfit.control(methodResidCov = "noDfCor")) gsummary(sf_fgls_pool) ## Heteroskedastic ML (Table 13.4) sf_ml_pool <- systemfit(invest ~ value + capital, data = pgr, method = "WLS", pooled = TRUE, control = systemfit.control(methodResidCov = "noDfCor", maxiter = 1000, tol = 1e-10)) gsummary(sf_ml_pool) mean(residuals(sf_ml_pool)^2) ## pooled sigma^2 is mean(residuals(sf_fgls_pool)^2) ## Cross-section correlation FGLS (Table 13.4 and 14.2) sf_sur_pool <- systemfit(invest ~ value + capital, data = pgr, method = "SUR", pooled = TRUE, control = systemfit.control(methodResidCov = "noDfCor", residCovWeighted = TRUE)) gsummary(sf_sur_pool) ## Cross-section correlation ML (Table 13.4) sf_sur_ml_pool <- systemfit(invest ~ value + capital, data = pgr, method = "SUR", pooled = TRUE, control = systemfit.control(methodResidCov = "noDfCor", maxiter = 1000, tol = 1e-10)) gsummary(sf_sur_ml_pool) ## Group-specific variances (Table 13.5) rbind("OLS" = sapply(sf_ols_pool$eq, function(x) mean(x$residuals^2)), "Het FGLS" = sapply(sf_fgls_pool$eq, function(x) mean(x$residuals^2)), "Het ML" = sapply(sf_ml_pool$eq, function(x) mean(x$residuals^2)), "CC FGLS" = sapply(sf_sur_pool$eq, function(x) mean(x$residuals^2)))[,c(3, 1, 2, 5, 4)] ## Cross-section correlation FGLS (Table 14.2) sf_sur <- systemfit(invest ~ value + capital, data = pgr, method = "SUR", control = systemfit.control(methodResidCov = "noDfCor")) gsummary(sf_sur, sigma2 = FALSE, R2 = FALSE, digits = 4) gsummary(sf_sur_pool) ## published residual covariance matrix o <- c(3, 1, 2, 5, 4) round(sf_sur_pool$residCov[o,o], digits = 5) ## errata version round(cov(residuals(sf_sur_pool)[,o]), digits = 5) round(summary(sf_sur_pool)$residCor[o,o], digits = 5) ## OLS (Table 14.2) sf_ols <- systemfit(formula = invest ~ value + capital, data = pgr, method = "OLS") gsummary(sf_ols, R2 = FALSE, digits = 5) gsummary(sf_ols_pool, digits = 5) mean(residuals(sf_ols_pool)^2) ## see also lm_ols_* above ## sigma^2 scaled by 20 and 100 instead of 17 and 97, respectively ################# ## Using plm() ## ################# ## Homoskedastic OLS (Table 13.4, p. 330) plm_ols_pool <- plm(formula = invest ~ value + capital, data = pgr, model = "pooling") gsummary(plm_ols_pool) ## different scaling: 97/100 (see above) ## White standard errors sqrt(diag(vcovHC(plm_ols_pool, method = "white1", type = "HC0"))) ## Beck and Katz standard errors sqrt(diag(vcovBK(plm_ols_pool, cluster = "time"))) ##################################### ## Figures (just for completeness) ## ##################################### ## Figure 14.1, p. 345 o <- c(3, 1, 2, 5, 4) plot(as.vector(as.matrix(residuals(sf_fgls_pool)[, o])), type = "l", ylim = c(-400, 400), lwd = 2, axes = FALSE, ylab = "Residual", xlab = "Firm", xaxs = "i", yaxs = "i") box() axis(1, at = 1:5 * 20 - 9.5, labels = c("General\nMotors", "Chrysler\n", "General\nElectric", "Westinghouse\n", "U.S. Steel\n"), tick = FALSE) axis(2, at = c(-400, -200, 0, 200, 400)) abline(h = 0, lty = 2) abline(v = 1:4 * 20 + 0.5, lty = 2) ## Figure 14.2, p. 346 plot(as.vector(as.matrix(residuals(sf_sur)[, o])), type = "l", ylim = c(-400, 400), lwd = 2, axes = FALSE, ylab = "Residual", xlab = "Firm", xaxs = "i", yaxs = "i") box() axis(1, at = 1:5 * 20 - 9.5, labels = c("General\nMotors", "Chrysler\n", "General\nElectric", "Westinghouse\n", "U.S. Steel\n"), tick = FALSE) axis(2, at = c(-400, -240, -80, 0, 80, 240, 400)) abline(h = 0, lty = 2) abline(v = 1:4 * 20 + 0.5, lty = 2)