Import.php 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. <?php
  2. namespace Area\Src;
  3. set_time_limit(0);
  4. use Dever;
  5. class Import
  6. {
  7. //private $url = 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2020/';
  8. //private $url = 'http://www.stats.gov.cn/sj/tjbz/tjyqhdmhcxhfdm/2020/';
  9. private $url = 'http://www.stats.gov.cn/sj/tjbz/tjyqhdmhcxhfdm/2022/';
  10. public function getUrl()
  11. {
  12. return $this->url;
  13. }
  14. /**
  15. * 获取国家统计局最新的地区数据
  16. *
  17. * @return mixed
  18. */
  19. public function load()
  20. {
  21. $url = $this->url . 'index.html';
  22. $html = $this->html($url);
  23. preg_match_all('/<td><a href="(.*?)">(.*?)<br \/><\/a><\/td>/i', $html, $result);
  24. Dever::config('base')->hook = true;
  25. # 获取省份
  26. $this->getProvince($result);
  27. return 1;
  28. }
  29. public function getProvince($result)
  30. {
  31. $province = Dever::input('province');
  32. $update = array();
  33. if (isset($result[1]) && isset($result[2]) && $result[2]) {
  34. foreach ($result[2] as $k => $v) {
  35. $update['id'] = $this->id(trim($result[1][$k], '.html'));
  36. $update['name'] = strip_tags($v);
  37. $this->pinyin($update);
  38. $id = $this->upinto('area/province', $update['id'], $update);
  39. # 获取城市
  40. if ($province) {
  41. if ($update['name'] == $province) {
  42. $this->getCity($id, $update['name'], $result[1][$k]);
  43. }
  44. } else {
  45. $this->getCity($id, $update['name'], $result[1][$k]);
  46. }
  47. }
  48. }
  49. }
  50. public function getCity($province, $province_name, $link)
  51. {
  52. $city = Dever::input('city');
  53. $url = $this->url . $link;
  54. $html = $this->html($url);
  55. preg_match_all('/<tr class="citytr"><td><a href="(.*?)">(.*?)<\/a><\/td><td><a href="(.*?)">(.*?)<\/a><\/td><\/tr>/is', $html, $result);
  56. $update = array();
  57. if (isset($result[3]) && isset($result[4]) && $result[4]) {
  58. foreach ($result[4] as $k => $v) {
  59. $v = strip_tags($v);
  60. if ($v == '市辖区') {
  61. $v = $province_name;
  62. }
  63. $update['id'] = $this->id($result[2][$k]);
  64. $update['name'] = $v;
  65. $update['province_id'] = $province;
  66. $this->pinyin($update);
  67. $id = $this->upinto('area/city', $update['id'], $update);
  68. if ($city) {
  69. if ($update['name'] == $city) {
  70. $this->getCounty($province, $id, $result[3][$k]);
  71. }
  72. } else {
  73. $this->getCounty($province, $id, $result[3][$k]);
  74. }
  75. }
  76. }
  77. }
  78. public function getCounty($province, $city, $source_link)
  79. {
  80. $url = $this->url . $source_link;
  81. $temp = explode('/', $source_link);
  82. $link = $temp[0];
  83. $html = $this->html($url);
  84. preg_match_all('/<tr class="countytr"><td><a href="(.*?)">(.*?)<\/a><\/td><td><a href="(.*?)">(.*?)<\/a><\/td><\/tr>/i', $html, $result);
  85. $update = array();
  86. if (isset($result[3]) && isset($result[4]) && $result[4]) {
  87. foreach ($result[4] as $k => $v) {
  88. $update['id'] = $this->id($result[2][$k]);
  89. $update['name'] = strip_tags($v);
  90. $update['city_id'] = $city;
  91. $update['province_id'] = $province;
  92. $update['area'] = $province . ',' . $city;
  93. $this->setLevelCounty($update);
  94. $this->pinyin($update);
  95. $id = $this->upinto('area/county', $update['id'], $update);
  96. # 获取街道
  97. $this->getTown($province, $city, $id, $link . '/' . $result[3][$k]);
  98. }
  99. } else {
  100. $city_info = Dever::db('area/city')->find(array('id' => $city, 'clear' => true));
  101. $update['id'] = $city_info['id'];
  102. $update['name'] = $city_info['name'] . '辖区';
  103. $update['city_id'] = $city;
  104. $update['province_id'] = $province;
  105. $update['area'] = $province . ',' . $city;
  106. $update['type'] = 1;
  107. $update['level'] = 1;
  108. $update['pinyin'] = $city_info['pinyin'];
  109. $update['pinyin_first'] = $city_info['pinyin_first'];
  110. $id = $this->upinto('area/county', $update['id'], $update);
  111. # 获取街道
  112. $this->getTown($province, $city, $id, $source_link, $html);
  113. }
  114. }
  115. public function getTown($province, $city, $county, $link = false, $html = false)
  116. {
  117. if ($link) {
  118. $url = $this->url . $link;
  119. $temp = explode('/', $link);
  120. $link = $temp[0] . '/' . $temp[1];
  121. $html = $this->html($url);
  122. }
  123. if (!$link && !$html) {
  124. return;
  125. }
  126. preg_match_all('/<tr class="towntr"><td><a href="(.*?)">(.*?)<\/a><\/td><td><a href="(.*?)">(.*?)<\/a><\/td><\/tr>/i', $html, $result);
  127. $update = array();
  128. if (isset($result[3]) && isset($result[4]) && $result[4]) {
  129. foreach ($result[4] as $k => $v) {
  130. $update['id'] = $this->id($result[2][$k], 9);
  131. $update['name'] = strip_tags($v);
  132. $update['county_id'] = $county;
  133. $update['city_id'] = $city;
  134. $update['province_id'] = $province;
  135. $update['area'] = $province . ',' . $city . ',' . $county;
  136. $this->pinyin($update);
  137. $id = $this->upinto('area/town', $update['id'], $update);
  138. # 获取社区
  139. //$this->getVillage($province, $city, $county, $id, $link . '/' . $result[3][$k]);
  140. }
  141. }
  142. }
  143. public function getVillage($province, $city, $county, $town, $link)
  144. {
  145. $url = $this->url . $link;
  146. $html = $this->html($url);
  147. preg_match_all('/<tr class="villagetr"><td>(.*?)<\/td><td>(.*?)<\/td><td>(.*?)<\/td><\/tr>/i', $html, $result);
  148. $update = array();
  149. if (isset($result[1]) && isset($result[2]) && isset($result[3])) {
  150. foreach ($result[3] as $k => $v) {
  151. $update['id'] = $this->id($result[1][$k], 12);
  152. $update['code'] = $result[2][$k];
  153. $update['name'] = strip_tags($v);
  154. $update['town_id'] = $town;
  155. $update['county_id'] = $county;
  156. $update['city_id'] = $city;
  157. $update['province_id'] = $province;
  158. $update['area'] = $province . ',' . $city . ',' . $county . ',' . $town;
  159. $this->pinyin($update);
  160. $this->upinto('area/village', $update['id'], $update);
  161. }
  162. }
  163. }
  164. public function id($id, $len = 6)
  165. {
  166. return Dever::load('area/api')->code($id, $len);
  167. }
  168. public function pinyin(&$update)
  169. {
  170. if (Dever::import('pinyin') && $update['name']) {
  171. $update['pinyin'] = Dever::getPinyin($update['name']);
  172. $update['pinyin_first'] = Dever::getPinyinFirst($update['name']);
  173. }
  174. }
  175. public function setLevelCounty(&$update)
  176. {
  177. $num = substr($update['id'], 4);
  178. # type = 1城区 2郊区 3县城 4经济技术开发 5县级市
  179. if ($update['name'] == '门头沟区') {
  180. $update['type'] = 2;
  181. $update['level'] = 2;
  182. } elseif ($num <= 10) {
  183. $update['type'] = 1;
  184. $update['level'] = 1;
  185. } elseif ($num > 10 && $num <= 20) {
  186. $update['type'] = 2;
  187. $update['level'] = 2;
  188. } elseif ($num > 20 && $num <= 70) {
  189. $update['type'] = 3;
  190. $update['level'] = 3;
  191. } elseif ($num > 70 && $num <= 80) {
  192. $update['type'] = 4;
  193. $update['level'] = 2;
  194. } elseif ($num >= 80) {
  195. $update['type'] = 5;
  196. $update['level'] = 2;
  197. }
  198. }
  199. public function upinto($table, $id, $data)
  200. {
  201. $info = Dever::db($table)->one($id);
  202. if (!$info) {
  203. return Dever::db($table)->insert($data);
  204. } else {
  205. $data['where_id'] = $info['id'];
  206. return Dever::db($table)->update($data);
  207. }
  208. }
  209. private function html($url)
  210. {
  211. $html = Dever::curl($url);
  212. //$html = Dever::convert($html, "UTF-8", "GBK");
  213. $html = preg_replace('//', '', $html); // 去掉HTML注释
  214. $html = preg_replace('/\s+/', ' ', $html); // 清除多余的空格
  215. $html = preg_replace('/>\s</', '><', $html); // 去掉标记之间的空格
  216. return $html;
  217. }
  218. }