library(tidyverse)
library(haven)
library(labelled)
毕业论文实证
1 CFPS数据导入
load("fam-and-person.RData")
2 数据清洗
从现有的 CFPS 数据中提取本文所需的变量,并对缺失、离群等数据进行剔除和调整。
# 整理户主信息
<- person %>%
host_part remove_labels() %>% # 去除CFPS数据中的标签信息
transmute(
pid,Age = age,
Gender = gender,
Health = ifelse(qp201 == 5, 0, 1),
Insurance = ifelse(qp605_a_78 == 1, 0, 1),
Marriage = ifelse(qea0 == 2, 1, 0),
Education = pmax(cfps2018eduy, cfps2018eduy_im, na.rm = TRUE)
%>%
) mutate(across(everything(), ~ replace_na(.x, 0)))
# 整理家庭成员信息
<- person %>%
person_part remove_labels() %>%
transmute(
fid18 = fid18,
= ifelse(between(age, 16, 60) & employ != 1, 1, 0), # 失业为1
就业 = ifelse(coalesce(cfps2018eduy, cfps2018eduy_im) < 9, 1, 0),
教育 = ifelse(qp201 == 5, 1, 0),
健康状况 = ifelse(qp605_a_78 == 1, 1, 0),
医疗保险 = ifelse(qn202 == 0, 1, 0),
信息获取 = ifelse(pd501b < 0, 0, pd501b), # 过去12个月学校教育支出
个人学校教育 = ifelse(pd503r < 0, 0, pd503r), # 过去12个月课外辅导费(元)
个人校外教育 = pmax(pd5total, pd5ckp, pd5total_m, na.rm = TRUE),
个人教育总支出 = ifelse(个人教育总支出 < 0, 0, 个人教育总支出),
个人教育总支出 = ifelse(between(age, 14, 65), 1, 0)
劳动力 %>%
) group_by(fid18) %>%
reframe(
= ifelse(mean(就业, na.rm = TRUE) == 1, 1, 0),
就业 = ifelse(mean(教育, na.rm = TRUE) == 1, 1, 0),
教育 = ifelse(mean(健康状况, na.rm = TRUE) == 1, 1, 0),
健康状况 = ifelse(mean(医疗保险, na.rm = TRUE) == 1, 1, 0),
医疗保险 = ifelse(mean(信息获取, na.rm = TRUE) == 1, 1, 0),
信息获取 = sum(个人学校教育, na.rm = TRUE),
家庭学校教育 = sum(个人校外教育, na.rm = TRUE),
家庭校外教育 = sum(个人教育总支出, na.rm = TRUE),
家庭总教育 Ratio = round(sum(劳动力 == 0) / sum(劳动力 == 1), 2),
Ratio = ifelse(is.infinite(Ratio), 4, Ratio)
%>%
) mutate(across(everything(), ~ replace_na(.x, 0)))
# 整理家庭信息,合并户主及家庭成员信息
# 得到本文数据
<- famecon %>%
data remove_labels() %>%
filter(fa1 == 5 | fa101 == 5) %>% # 农村家庭
filter(familysize18 > 0) %>%
transmute(
fid18 = fid18,
Nonagr = case_when(
== 1 ~ 1,
fo101 == 5 ~ 0,
fo101 .default = NA
),= ifelse(fincome1_per < quantile(fincome1_per, na.rm = TRUE, prob = .4), 1, 0),
收入 = ifelse(houseasset_gross == 0, 1, 0),
资产 = ifelse(fa3 %in% c(1, 2, 5, 6, 7), 1, 0),
饮用水 = ifelse(fa4 %in% c(1, 2), 1, 0),
做饭燃料 = ifelse(fq801 / familysize18 > 12, 0, 1),
居住面积 Size = familysize18,
Allowance = ifelse(fn100 == 1, 1, 0),
resp1pid, Region = case_when(
%in% c(11:13, 21, 31:33, 35, 37, 44, 46) ~ "东部",
provcd18 %in% c(14, 22:23, 34, 36, 41:43) ~ "中部",
provcd18 %in% c(15, 45, 50:54, 61:65) ~ "西部"
provcd18
),Region = factor(Region, levels = c("东部", "中部", "西部")),
provcd18%>%
) drop_na() %>%
left_join(host_part, by = c("resp1pid" = "pid")) %>%
left_join(person_part, by = "fid18") %>%
drop_na() %>%
mutate(
= ifelse(Nonagr == 1, 0, 就业),
就业 Score = round((1/9 * (收入 + 就业 + 资产 + 饮用水 + 做饭燃料 + 居住面积) + 1/12 * (教育 + 健康状况 + 医疗保险 + 信息获取)), 2),
Educationfee = log(家庭总教育 + 1),
Schoolfee = log(家庭学校教育 + 1),
Outfee = log(家庭校外教育 + 1)
%>%
) filter(!(Score == 0 & Schoolfee == 0)) %>%
filter(!(Nonagr == 0 & Schoolfee > quantile(Schoolfee, probs = .95)))
data
# A tibble: 8,216 × 31
fid18 Nonagr 收入 资产 饮用水 做饭燃料 居住面积 Size Allowance resp1pid
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 100724 1 1 0 1 0 1 4 0 130492103
2 100733 1 0 0 0 0 1 1 0 130463104
3 100782 1 0 0 0 0 1 1 0 130517101
4 100810 0 1 0 0 0 1 4 0 100810551
5 100879 1 1 0 0 0 0 3 0 130630103
6 101129 1 1 0 0 0 1 5 1 130896107
7 101130 1 0 0 0 0 1 1 0 130897103
8 101274 1 1 0 0 1 1 3 0 140093103
9 101303 1 0 0 0 0 0 1 0 140122105
10 101711 1 0 0 0 0 1 4 0 140676103
# ℹ 8,206 more rows
# ℹ 21 more variables: Region <fct>, provcd18 <dbl>, Age <dbl>, Gender <dbl>,
# Health <dbl>, Insurance <dbl>, Marriage <dbl>, Education <dbl>, 就业 <dbl>,
# 教育 <dbl>, 健康状况 <dbl>, 医疗保险 <dbl>, 信息获取 <dbl>,
# 家庭学校教育 <dbl>, 家庭校外教育 <dbl>, 家庭总教育 <dbl>, Ratio <dbl>,
# Score <dbl>, Educationfee <dbl>, Schoolfee <dbl>, Outfee <dbl>
3 描述性分析
<- function(df, vars){
calculate_mean_sd %>%
df summarize(across(all_of(vars),
list(mean = mean, sd = sd),
.names = "{col}_{fn}"))
}
%>%
data calculate_mean_sd(c("Educationfee", "Nonagr", "Gender",
"Age", "Health", "Marriage", "Education",
"Insurance", "Allowance", "Size", "Ratio")) %>%
pivot_longer(everything(),
names_sep = "_",
names_to = c("variable", ".value")) %>%
mutate(across(where(is.numeric), ~ round(.x, 2)))
# A tibble: 11 × 3
variable mean sd
<chr> <dbl> <dbl>
1 Educationfee 0.86 2.62
2 Nonagr 0.5 0.5
3 Gender 0.55 0.5
4 Age 52.1 14.5
5 Health 0.79 0.41
6 Marriage 0.83 0.38
7 Education 6.27 4.38
8 Insurance 0.91 0.29
9 Allowance 0.55 0.5
10 Size 3.84 2
11 Ratio 0.69 1.27
4 不同K值变量描述性分析
4.1 k=0.3
<- 0.3
k %>%
data mutate(Repoverty = ifelse(Score >= k, 1, 0)) %>%
group_by(Repoverty) %>%
calculate_mean_sd(c("Educationfee", "Nonagr", "Gender",
"Age", "Health", "Marriage", "Education",
"Insurance", "Allowance", "Size", "Ratio")) %>%
mutate(across(where(is.numeric), ~ round(.x, 2))) %>%
pivot_longer(-1,
names_sep = "_",
names_to = c("variable", ".value")) %>%
mutate(mean_sd = str_c(mean, "\n(", sd, ")")) %>%
pivot_wider(id_cols = variable,
names_from = Repoverty,
values_from = mean_sd)
# A tibble: 11 × 3
variable `0` `1`
<chr> <chr> <chr>
1 Educationfee "0.99\n(2.79)" "0.71\n(2.38)"
2 Nonagr "0.54\n(0.5)" "0.46\n(0.5)"
3 Gender "0.54\n(0.5)" "0.55\n(0.5)"
4 Age "49.39\n(14.1)" "55.44\n(14.39)"
5 Health "0.85\n(0.36)" "0.72\n(0.45)"
6 Marriage "0.86\n(0.35)" "0.79\n(0.41)"
7 Education "7.56\n(4.16)" "4.67\n(4.11)"
8 Insurance "0.93\n(0.26)" "0.88\n(0.32)"
9 Allowance "0.49\n(0.5)" "0.62\n(0.49)"
10 Size "3.9\n(1.91)" "3.77\n(2.1)"
11 Ratio "0.46\n(1.01)" "0.98\n(1.48)"
4.2 k=0.4
<- 0.4
k %>%
data mutate(Repoverty = ifelse(Score >= k, 1, 0)) %>%
group_by(Repoverty) %>%
calculate_mean_sd(c("Educationfee", "Nonagr", "Gender",
"Age", "Health", "Marriage", "Education",
"Insurance", "Allowance", "Size", "Ratio")) %>%
mutate(across(where(is.numeric), ~ round(.x, 2))) %>%
pivot_longer(-1,
names_sep = "_",
names_to = c("variable", ".value")) %>%
mutate(mean_sd = str_c(mean, "\n(", sd, ")")) %>%
pivot_wider(id_cols = variable,
names_from = Repoverty,
values_from = mean_sd)
# A tibble: 11 × 3
variable `0` `1`
<chr> <chr> <chr>
1 Educationfee "0.95\n(2.74)" "0.48\n(1.97)"
2 Nonagr "0.54\n(0.5)" "0.36\n(0.48)"
3 Gender "0.55\n(0.5)" "0.55\n(0.5)"
4 Age "50.74\n(14.26)" "57.61\n(14.4)"
5 Health "0.82\n(0.38)" "0.66\n(0.47)"
6 Marriage "0.85\n(0.36)" "0.74\n(0.44)"
7 Education "6.9\n(4.28)" "3.69\n(3.83)"
8 Insurance "0.92\n(0.27)" "0.86\n(0.35)"
9 Allowance "0.52\n(0.5)" "0.64\n(0.48)"
10 Size "3.91\n(1.96)" "3.55\n(2.14)"
11 Ratio "0.56\n(1.12)" "1.25\n(1.64)"
4.3 k=0.5
<- 0.5
k %>%
data mutate(Repoverty = ifelse(Score >= k, 1, 0)) %>%
group_by(Repoverty) %>%
calculate_mean_sd(c("Educationfee", "Nonagr", "Gender",
"Age", "Health", "Marriage", "Education",
"Insurance", "Allowance", "Size", "Ratio")) %>%
mutate(across(where(is.numeric), ~ round(.x, 2))) %>%
pivot_longer(-1,
names_sep = "_",
names_to = c("variable", ".value")) %>%
mutate(mean_sd = str_c(mean, "\n(", sd, ")")) %>%
pivot_wider(id_cols = variable,
names_from = Repoverty,
values_from = mean_sd)
# A tibble: 11 × 3
variable `0` `1`
<chr> <chr> <chr>
1 Educationfee "0.92\n(2.7)" "0.03\n(0.46)"
2 Nonagr "0.52\n(0.5)" "0.25\n(0.44)"
3 Gender "0.55\n(0.5)" "0.51\n(0.5)"
4 Age "51.4\n(14.36)" "61.55\n(13.73)"
5 Health "0.81\n(0.39)" "0.48\n(0.5)"
6 Marriage "0.84\n(0.37)" "0.68\n(0.47)"
7 Education "6.52\n(4.35)" "2.77\n(3.25)"
8 Insurance "0.92\n(0.28)" "0.81\n(0.39)"
9 Allowance "0.54\n(0.5)" "0.66\n(0.47)"
10 Size "3.91\n(2)" "2.88\n(1.81)"
11 Ratio "0.61\n(1.18)" "1.75\n(1.82)"
最终对上面三个结果进行合并,得到论文中的表 3。
5 回归模型
5.1 主回归模型(k=0.4)
<- 0.4
k <- data %>%
probit_data mutate(Repoverty = ifelse(Score >= k, 1, 0))
<- glm(Repoverty ~ Educationfee +
probit_edu + Age + Health + Marriage + Education +
Gender + Allowance + Size + Ratio + Region,
Insurance family = binomial(link = "probit"),
data = probit_data)
summary(probit_edu)
Call:
glm(formula = Repoverty ~ Educationfee + Gender + Age + Health +
Marriage + Education + Insurance + Allowance + Size + Ratio +
Region, family = binomial(link = "probit"), data = probit_data)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.6667 -0.6647 -0.4540 -0.2802 2.8737
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.202928 0.112538 -1.803 0.0714 .
Educationfee -0.034142 0.007503 -4.551 5.35e-06 ***
Gender 0.146105 0.035661 4.097 4.19e-05 ***
Age 0.001852 0.001541 1.202 0.2295
Health -0.331897 0.039607 -8.380 < 2e-16 ***
Marriage -0.107123 0.046022 -2.328 0.0199 *
Education -0.086578 0.004335 -19.973 < 2e-16 ***
Insurance -0.266796 0.056794 -4.698 2.63e-06 ***
Allowance 0.220714 0.036228 6.092 1.11e-09 ***
Size -0.003814 0.009312 -0.410 0.6821
Ratio 0.122604 0.015308 8.009 1.16e-15 ***
Region中部 -0.033246 0.044138 -0.753 0.4513
Region西部 0.092031 0.042763 2.152 0.0314 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 8143.8 on 8215 degrees of freedom
Residual deviance: 7048.6 on 8203 degrees of freedom
AIC: 7074.6
Number of Fisher Scoring iterations: 5
library(margins)
summary(margins(probit_edu))
factor AME SE z p lower upper
Age 0.0004 0.0004 1.2018 0.2295 -0.0003 0.0012
Allowance 0.0527 0.0086 6.1198 0.0000 0.0358 0.0695
Education -0.0207 0.0010 -21.0489 0.0000 -0.0226 -0.0187
Educationfee -0.0081 0.0018 -4.5606 0.0000 -0.0116 -0.0046
Gender 0.0349 0.0085 4.1045 0.0000 0.0182 0.0515
Health -0.0792 0.0094 -8.4657 0.0000 -0.0975 -0.0609
Insurance -0.0637 0.0135 -4.7129 0.0000 -0.0901 -0.0372
Marriage -0.0256 0.0110 -2.3298 0.0198 -0.0471 -0.0041
Ratio 0.0293 0.0036 8.0976 0.0000 0.0222 0.0363
Region西部 0.0224 0.0104 2.1484 0.0317 0.0020 0.0428
Region中部 -0.0077 0.0102 -0.7546 0.4505 -0.0277 0.0123
Size -0.0009 0.0022 -0.4096 0.6821 -0.0053 0.0034
可以得到家庭教育支出的边际效应为 -0.0081。
5.2 中介效应模型(k=0.4)
<- glm(Nonagr ~ Educationfee +
probit_nonagr + Age + Health + Marriage + Education +
Gender + Allowance + Size + Ratio + Region,
Insurance family = binomial(link = "probit"),
data = probit_data)
summary(probit_nonagr)
Call:
glm(formula = Nonagr ~ Educationfee + Gender + Age + Health +
Marriage + Education + Insurance + Allowance + Size + Ratio +
Region, family = binomial(link = "probit"), data = probit_data)
Deviance Residuals:
Min 1Q Median 3Q Max
-3.7584 -1.0810 0.3792 1.0381 2.4863
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.175649 0.097354 -1.804 0.071195 .
Educationfee 0.022311 0.005674 3.932 8.41e-05 ***
Gender -0.076264 0.030430 -2.506 0.012205 *
Age -0.006303 0.001288 -4.895 9.84e-07 ***
Health 0.040190 0.037410 1.074 0.282687
Marriage -0.032308 0.042593 -0.759 0.448128
Education -0.009192 0.003688 -2.492 0.012698 *
Insurance -0.094678 0.052977 -1.787 0.073913 .
Allowance 0.102858 0.030947 3.324 0.000888 ***
Size 0.165113 0.008336 19.807 < 2e-16 ***
Ratio -0.236237 0.015969 -14.793 < 2e-16 ***
Region中部 0.198323 0.037178 5.334 9.58e-08 ***
Region西部 0.195839 0.036656 5.343 9.16e-08 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 11390 on 8215 degrees of freedom
Residual deviance: 10025 on 8203 degrees of freedom
AIC: 10051
Number of Fisher Scoring iterations: 4
<- glm(Repoverty ~ Educationfee + Nonagr +
probit_renonagr + Age + Health + Marriage + Education +
Gender + Allowance + Size + Ratio + Region,
Insurance family = binomial(link = "probit"),
data = probit_data)
summary(probit_renonagr)
Call:
glm(formula = Repoverty ~ Educationfee + Nonagr + Gender + Age +
Health + Marriage + Education + Insurance + Allowance + Size +
Ratio + Region, family = binomial(link = "probit"), data = probit_data)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.6826 -0.6552 -0.4439 -0.2676 2.8130
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.070138 0.114095 -0.615 0.538733
Educationfee -0.031552 0.007565 -4.170 3.04e-05 ***
Nonagr -0.313968 0.037539 -8.364 < 2e-16 ***
Gender 0.137459 0.035821 3.837 0.000124 ***
Age 0.001070 0.001548 0.691 0.489526
Health -0.330631 0.039777 -8.312 < 2e-16 ***
Marriage -0.110826 0.046260 -2.396 0.016587 *
Education -0.087310 0.004342 -20.108 < 2e-16 ***
Insurance -0.280394 0.057110 -4.910 9.12e-07 ***
Allowance 0.229645 0.036410 6.307 2.84e-10 ***
Size 0.015948 0.009585 1.664 0.096142 .
Ratio 0.103773 0.015514 6.689 2.25e-11 ***
Region中部 -0.017044 0.044389 -0.384 0.700997
Region西部 0.112843 0.043060 2.621 0.008778 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 8143.8 on 8215 degrees of freedom
Residual deviance: 6978.5 on 8202 degrees of freedom
AIC: 7006.5
Number of Fisher Scoring iterations: 5
6 工具变量回归(内生性检验)
library(AER) # 加载AER包
6.1 k=0.3
<- 0.3
k <- data %>%
probit_data mutate(Repoverty = ifelse(Score >= k, 1, 0))
<- probit_data %>%
lvprobit_data mutate(.by = provcd18,
Ivedufee = log((sum(家庭总教育) - 家庭总教育) / (n() - 1) + 1))
# 拟合IV回归模型
<- ivreg(Repoverty ~ Educationfee | Ivedufee + Nonagr +
ivmodel + Age + Health + Marriage + Education +
Gender + Allowance + Size + Ratio + Region,
Insurance data = lvprobit_data)
# 检验是否存在内生性
summary(ivmodel, diagnostics = TRUE)
Call:
ivreg(formula = Repoverty ~ Educationfee | Ivedufee + Nonagr +
Gender + Age + Health + Marriage + Education + Insurance +
Allowance + Size + Ratio + Region, data = lvprobit_data)
Residuals:
Min 1Q Median 3Q Max
-0.5169 -0.5169 0.1125 0.4831 1.4524
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.51693 0.01106 46.740 < 2e-16 ***
Educationfee -0.08131 0.01089 -7.467 9.03e-14 ***
Diagnostic tests:
df1 df2 statistic p-value
Weak instruments 13 8199 27.79 < 2e-16 ***
Wu-Hausman 1 8210 51.18 9.14e-13 ***
Sargan 12 NA 1136.63 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.5303 on 8211 degrees of freedom
Multiple R-Squared: -0.1374, Adjusted R-squared: -0.1375
Wald test: 55.76 on 1 and 8211 DF, p-value: 9.033e-14
6.2 k=0.4
<- 0.4
k <- data %>%
probit_data mutate(Repoverty = ifelse(Score >= k, 1, 0))
<- probit_data %>%
lvprobit_data mutate(.by = provcd18,
Ivedufee = log((sum(家庭总教育) - 家庭总教育) / (n() - 1) + 1))
# 拟合IV回归模型
<- ivreg(Repoverty ~ Educationfee | Ivedufee + Nonagr +
ivmodel + Age + Health + Marriage + Education +
Gender + Allowance + Size + Ratio + Region,
Insurance data = lvprobit_data)
# 检验是否存在内生性
summary(ivmodel, diagnostics = TRUE)
Call:
ivreg(formula = Repoverty ~ Educationfee | Ivedufee + Nonagr +
Gender + Age + Health + Marriage + Education + Insurance +
Allowance + Size + Ratio + Region, data = lvprobit_data)
Residuals:
Min 1Q Median 3Q Max
-0.2887 -0.2887 -0.2887 0.6229 1.8444
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.288679 0.009784 29.50 <2e-16 ***
Educationfee -0.106781 0.009633 -11.09 <2e-16 ***
Diagnostic tests:
df1 df2 statistic p-value
Weak instruments 13 8199 27.79 <2e-16 ***
Wu-Hausman 1 8210 147.31 <2e-16 ***
Sargan 12 NA 710.63 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.4691 on 8211 degrees of freedom
Multiple R-Squared: -0.3928, Adjusted R-squared: -0.393
Wald test: 122.9 on 1 and 8211 DF, p-value: < 2.2e-16
6.3 k=0.5
<- 0.5
k <- data %>%
probit_data mutate(Repoverty = ifelse(Score >= k, 1, 0))
<- probit_data %>%
lvprobit_data mutate(.by = provcd18,
Ivedufee = log((sum(家庭总教育) - 家庭总教育) / (n() - 1) + 1))
# 拟合IV回归模型
<- ivreg(Repoverty ~ Educationfee | Ivedufee + Nonagr +
ivmodel + Age + Health + Marriage + Education +
Gender + Allowance + Size + Ratio + Region,
Insurance data = lvprobit_data)
# 检验是否存在内生性
summary(ivmodel, diagnostics = TRUE)
Call:
ivreg(formula = Repoverty ~ Educationfee | Ivedufee + Nonagr +
Gender + Age + Health + Marriage + Education + Insurance +
Allowance + Size + Ratio + Region, data = lvprobit_data)
Residuals:
Min 1Q Median 3Q Max
-0.1467 -0.1467 -0.1467 -0.1467 1.5488
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.146670 0.006915 21.21 <2e-16 ***
Educationfee -0.090914 0.006808 -13.35 <2e-16 ***
Diagnostic tests:
df1 df2 statistic p-value
Weak instruments 13 8199 27.79 <2e-16 ***
Wu-Hausman 1 8210 276.65 <2e-16 ***
Sargan 12 NA 401.67 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.3316 on 8211 degrees of freedom
Multiple R-Squared: -0.727, Adjusted R-squared: -0.7272
Wald test: 178.3 on 1 and 8211 DF, p-value: < 2.2e-16
7 稳健性检验
7.1 k=0.3
<- 0.3
k <- data %>%
probit_data mutate(Repoverty = ifelse(Score >= k, 1, 0))
<- glm(Repoverty ~ Educationfee +
probit_edu + Age + Health + Marriage + Education +
Gender + Allowance + Size + Ratio + Region,
Insurance family = binomial(link = "probit"),
data = probit_data)
summary(probit_edu)
Call:
glm(formula = Repoverty ~ Educationfee + Gender + Age + Health +
Marriage + Education + Insurance + Allowance + Size + Ratio +
Region, family = binomial(link = "probit"), data = probit_data)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.3387 -0.9550 -0.6006 1.0478 2.4057
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.285156 0.098412 2.898 0.003761 **
Educationfee -0.021324 0.005765 -3.699 0.000217 ***
Gender 0.114242 0.030726 3.718 0.000201 ***
Age 0.004723 0.001309 3.609 0.000307 ***
Health -0.292155 0.036924 -7.912 2.53e-15 ***
Marriage -0.103635 0.042196 -2.456 0.014047 *
Education -0.081042 0.003739 -21.677 < 2e-16 ***
Insurance -0.331956 0.053200 -6.240 4.38e-10 ***
Allowance 0.230230 0.030963 7.436 1.04e-13 ***
Size 0.012157 0.007930 1.533 0.125283
Ratio 0.114665 0.014458 7.931 2.18e-15 ***
Region中部 0.146900 0.037431 3.925 8.69e-05 ***
Region西部 0.325649 0.036970 8.808 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 11296.6 on 8215 degrees of freedom
Residual deviance: 9885.8 on 8203 degrees of freedom
AIC: 9911.8
Number of Fisher Scoring iterations: 4
<- glm(Nonagr ~ Educationfee +
probit_nonagr + Age + Health + Marriage + Education +
Gender + Allowance + Size + Ratio + Region,
Insurance family = binomial(link = "probit"),
data = probit_data)
summary(probit_nonagr)
Call:
glm(formula = Nonagr ~ Educationfee + Gender + Age + Health +
Marriage + Education + Insurance + Allowance + Size + Ratio +
Region, family = binomial(link = "probit"), data = probit_data)
Deviance Residuals:
Min 1Q Median 3Q Max
-3.7584 -1.0810 0.3792 1.0381 2.4863
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.175649 0.097354 -1.804 0.071195 .
Educationfee 0.022311 0.005674 3.932 8.41e-05 ***
Gender -0.076264 0.030430 -2.506 0.012205 *
Age -0.006303 0.001288 -4.895 9.84e-07 ***
Health 0.040190 0.037410 1.074 0.282687
Marriage -0.032308 0.042593 -0.759 0.448128
Education -0.009192 0.003688 -2.492 0.012698 *
Insurance -0.094678 0.052977 -1.787 0.073913 .
Allowance 0.102858 0.030947 3.324 0.000888 ***
Size 0.165113 0.008336 19.807 < 2e-16 ***
Ratio -0.236237 0.015969 -14.793 < 2e-16 ***
Region中部 0.198323 0.037178 5.334 9.58e-08 ***
Region西部 0.195839 0.036656 5.343 9.16e-08 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 11390 on 8215 degrees of freedom
Residual deviance: 10025 on 8203 degrees of freedom
AIC: 10051
Number of Fisher Scoring iterations: 4
<- glm(Repoverty ~ Educationfee + Nonagr +
probit_renonagr + Age + Health + Marriage + Education +
Gender + Allowance + Size + Ratio + Region,
Insurance family = binomial(link = "probit"),
data = probit_data)
summary(probit_renonagr)
Call:
glm(formula = Repoverty ~ Educationfee + Nonagr + Gender + Age +
Health + Marriage + Education + Insurance + Allowance + Size +
Ratio + Region, family = binomial(link = "probit"), data = probit_data)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.3620 -0.9507 -0.6008 1.0490 2.3628
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.351889 0.099556 3.535 0.000408 ***
Educationfee -0.019947 0.005778 -3.452 0.000556 ***
Nonagr -0.147761 0.031785 -4.649 3.34e-06 ***
Gender 0.110441 0.030764 3.590 0.000331 ***
Age 0.004383 0.001312 3.341 0.000834 ***
Health -0.290593 0.036966 -7.861 3.81e-15 ***
Marriage -0.105533 0.042251 -2.498 0.012497 *
Education -0.081554 0.003742 -21.795 < 2e-16 ***
Insurance -0.338226 0.053296 -6.346 2.21e-10 ***
Allowance 0.235217 0.031014 7.584 3.35e-14 ***
Size 0.020905 0.008158 2.562 0.010397 *
Ratio 0.104714 0.014630 7.157 8.23e-13 ***
Region中部 0.156955 0.037529 4.182 2.89e-05 ***
Region西部 0.336589 0.037085 9.076 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 11296.6 on 8215 degrees of freedom
Residual deviance: 9864.2 on 8202 degrees of freedom
AIC: 9892.2
Number of Fisher Scoring iterations: 4
7.2 k=0.5
<- 0.5
k <- data %>%
probit_data mutate(Repoverty = ifelse(Score >= k, 1, 0))
<- glm(Repoverty ~ Educationfee +
probit_edu + Age + Health + Marriage + Education +
Gender + Allowance + Size + Ratio + Region,
Insurance family = binomial(link = "probit"),
data = probit_data)
summary(probit_edu)
Call:
glm(formula = Repoverty ~ Educationfee + Gender + Age + Health +
Marriage + Education + Insurance + Allowance + Size + Ratio +
Region, family = binomial(link = "probit"), data = probit_data)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.5272 -0.3748 -0.2034 -0.1154 3.8503
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.440e-01 1.620e-01 -2.123 0.0337 *
Educationfee -1.467e-01 2.960e-02 -4.956 7.19e-07 ***
Gender 1.049e-01 5.130e-02 2.044 0.0409 *
Age 4.501e-05 2.340e-03 0.019 0.9847
Health -6.217e-01 5.151e-02 -12.069 < 2e-16 ***
Marriage -7.578e-03 6.236e-02 -0.122 0.9033
Education -8.923e-02 6.509e-03 -13.709 < 2e-16 ***
Insurance -4.324e-01 7.205e-02 -6.001 1.96e-09 ***
Allowance 2.589e-01 5.279e-02 4.904 9.37e-07 ***
Size -7.284e-02 1.509e-02 -4.827 1.39e-06 ***
Ratio 1.353e-01 2.027e-02 6.676 2.46e-11 ***
Region中部 4.283e-02 6.199e-02 0.691 0.4896
Region西部 2.749e-02 6.241e-02 0.440 0.6596
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 4094.4 on 8215 degrees of freedom
Residual deviance: 3184.4 on 8203 degrees of freedom
AIC: 3210.4
Number of Fisher Scoring iterations: 8
<- glm(Nonagr ~ Educationfee +
probit_nonagr + Age + Health + Marriage + Education +
Gender + Allowance + Size + Ratio + Region,
Insurance family = binomial(link = "probit"),
data = probit_data)
summary(probit_nonagr)
Call:
glm(formula = Nonagr ~ Educationfee + Gender + Age + Health +
Marriage + Education + Insurance + Allowance + Size + Ratio +
Region, family = binomial(link = "probit"), data = probit_data)
Deviance Residuals:
Min 1Q Median 3Q Max
-3.7584 -1.0810 0.3792 1.0381 2.4863
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.175649 0.097354 -1.804 0.071195 .
Educationfee 0.022311 0.005674 3.932 8.41e-05 ***
Gender -0.076264 0.030430 -2.506 0.012205 *
Age -0.006303 0.001288 -4.895 9.84e-07 ***
Health 0.040190 0.037410 1.074 0.282687
Marriage -0.032308 0.042593 -0.759 0.448128
Education -0.009192 0.003688 -2.492 0.012698 *
Insurance -0.094678 0.052977 -1.787 0.073913 .
Allowance 0.102858 0.030947 3.324 0.000888 ***
Size 0.165113 0.008336 19.807 < 2e-16 ***
Ratio -0.236237 0.015969 -14.793 < 2e-16 ***
Region中部 0.198323 0.037178 5.334 9.58e-08 ***
Region西部 0.195839 0.036656 5.343 9.16e-08 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 11390 on 8215 degrees of freedom
Residual deviance: 10025 on 8203 degrees of freedom
AIC: 10051
Number of Fisher Scoring iterations: 4
<- glm(Repoverty ~ Educationfee + Nonagr +
probit_renonagr + Age + Health + Marriage + Education +
Gender + Allowance + Size + Ratio + Region,
Insurance family = binomial(link = "probit"),
data = probit_data)
summary(probit_renonagr)
Call:
glm(formula = Repoverty ~ Educationfee + Nonagr + Gender + Age +
Health + Marriage + Education + Insurance + Allowance + Size +
Ratio + Region, family = binomial(link = "probit"), data = probit_data)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.5362 -0.3668 -0.2003 -0.1073 3.7236
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.2193492 0.1646580 -1.332 0.18281
Educationfee -0.1482288 0.0301332 -4.919 8.69e-07 ***
Nonagr -0.3255706 0.0571351 -5.698 1.21e-08 ***
Gender 0.0940248 0.0515763 1.823 0.06830 .
Age -0.0009682 0.0023496 -0.412 0.68030
Health -0.6226487 0.0517813 -12.025 < 2e-16 ***
Marriage -0.0112603 0.0628008 -0.179 0.85770
Education -0.0896268 0.0065224 -13.741 < 2e-16 ***
Insurance -0.4458776 0.0725525 -6.146 7.97e-10 ***
Allowance 0.2669269 0.0530878 5.028 4.96e-07 ***
Size -0.0479215 0.0154876 -3.094 0.00197 **
Ratio 0.1193959 0.0204703 5.833 5.46e-09 ***
Region中部 0.0595985 0.0623580 0.956 0.33920
Region西部 0.0484851 0.0628922 0.771 0.44075
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 4094.4 on 8215 degrees of freedom
Residual deviance: 3151.7 on 8202 degrees of freedom
AIC: 3179.7
Number of Fisher Scoring iterations: 8
8 异质性检验
8.1 东部
<- 0.4
k <- data %>%
probit_data mutate(Repoverty = ifelse(Score >= k, 1, 0)) %>%
filter(Region == "东部")
<- glm(Repoverty ~ Educationfee +
probit_edu + Age + Health + Marriage + Education +
Gender + Allowance + Size + Ratio,
Insurance family = binomial(link = "probit"),
data = probit_data)
summary(probit_edu)
Call:
glm(formula = Repoverty ~ Educationfee + Gender + Age + Health +
Marriage + Education + Insurance + Allowance + Size + Ratio,
family = binomial(link = "probit"), data = probit_data)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.8087 -0.6294 -0.4367 -0.2674 2.6936
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.480900 0.184167 -2.611 0.009022 **
Educationfee -0.028633 0.014493 -1.976 0.048199 *
Gender 0.011474 0.058084 0.198 0.843402
Age 0.006552 0.002616 2.505 0.012248 *
Health -0.350706 0.064469 -5.440 5.33e-08 ***
Marriage -0.164711 0.074952 -2.198 0.027982 *
Education -0.074977 0.007439 -10.079 < 2e-16 ***
Insurance -0.296652 0.081183 -3.654 0.000258 ***
Allowance 0.435848 0.056437 7.723 1.14e-14 ***
Size 0.015103 0.015335 0.985 0.324676
Ratio 0.062306 0.023642 2.635 0.008404 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 3013.4 on 3229 degrees of freedom
Residual deviance: 2602.5 on 3219 degrees of freedom
AIC: 2624.5
Number of Fisher Scoring iterations: 5
8.2 中部
<- 0.4
k <- data %>%
probit_data mutate(Repoverty = ifelse(Score >= k, 1, 0)) %>%
filter(Region == "中部")
<- glm(Repoverty ~ Educationfee +
probit_edu + Age + Health + Marriage + Education +
Gender + Allowance + Size + Ratio,
Insurance family = binomial(link = "probit"),
data = probit_data)
summary(probit_edu)
Call:
glm(formula = Repoverty ~ Educationfee + Gender + Age + Health +
Marriage + Education + Insurance + Allowance + Size + Ratio,
family = binomial(link = "probit"), data = probit_data)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.8301 -0.6163 -0.4466 -0.2815 2.6645
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.399540 0.222265 -1.798 0.07224 .
Educationfee -0.049391 0.016634 -2.969 0.00298 **
Gender 0.184534 0.069261 2.664 0.00771 **
Age 0.007143 0.003159 2.261 0.02377 *
Health -0.292702 0.074905 -3.908 9.32e-05 ***
Marriage 0.031026 0.092941 0.334 0.73851
Education -0.074323 0.008350 -8.901 < 2e-16 ***
Insurance -0.460124 0.117847 -3.904 9.45e-05 ***
Allowance 0.049653 0.070392 0.705 0.48057
Size -0.022708 0.018598 -1.221 0.22208
Ratio 0.131129 0.029276 4.479 7.50e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 2179.8 on 2268 degrees of freedom
Residual deviance: 1864.9 on 2258 degrees of freedom
AIC: 1886.9
Number of Fisher Scoring iterations: 5
8.3 西部
<- 0.4
k <- data %>%
probit_data mutate(Repoverty = ifelse(Score >= k, 1, 0)) %>%
filter(Region == "西部")
<- glm(Repoverty ~ Educationfee +
probit_edu + Age + Health + Marriage + Education +
Gender + Allowance + Size + Ratio,
Insurance family = binomial(link = "probit"),
data = probit_data)
summary(probit_edu)
Call:
glm(formula = Repoverty ~ Educationfee + Gender + Age + Health +
Marriage + Education + Insurance + Allowance + Size + Ratio,
family = binomial(link = "probit"), data = probit_data)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.8651 -0.7154 -0.4620 -0.2381 2.8157
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.208166 0.185631 1.121 0.26212
Educationfee -0.030610 0.010588 -2.891 0.00384 **
Gender 0.269974 0.061192 4.412 1.02e-05 ***
Age -0.005673 0.002485 -2.283 0.02240 *
Health -0.360215 0.068854 -5.232 1.68e-07 ***
Marriage -0.142701 0.076581 -1.863 0.06241 .
Education -0.107842 0.007204 -14.969 < 2e-16 ***
Insurance -0.054482 0.109007 -0.500 0.61722
Allowance 0.089322 0.062890 1.420 0.15553
Size -0.010170 0.015347 -0.663 0.50753
Ratio 0.200941 0.029450 6.823 8.90e-12 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 2923.4 on 2716 degrees of freedom
Residual deviance: 2501.3 on 2706 degrees of freedom
AIC: 2523.3
Number of Fisher Scoring iterations: 5