rabin 2 years ago
parent
commit
69345c0674
1 changed files with 18 additions and 15 deletions
  1. 18 15
      src/Import.php

+ 18 - 15
src/Import.php

@@ -24,9 +24,7 @@ class Import
     {
         $url = $this->url . 'index.html';
         
-        $html = Dever::curl($url);
-
-        //$html = Dever::convert($html, "UTF-8", "GBK");
+        $html = $this->html($url);
 
         preg_match_all('/<td><a href="(.*?)">(.*?)<br \/><\/a><\/td>/i', $html, $result);
 
@@ -67,11 +65,11 @@ class Import
 
         $url = $this->url . $link;
         
-        $html = Dever::curl($url);
+        $html = $this->html($url);
 
-        //$html = Dever::convert($html, "UTF-8", "GBK");
+        preg_match_all('/<tr class="citytr"><td><a href="(.*?)">(.*?)<\/a><\/td><td><a href="(.*?)">(.*?)<\/a><\/td><\/tr>/is', $html, $result);
 
-        preg_match_all('/<tr class="citytr"><td><a href="(.*?)">(.*?)<\/a><\/td><td><a href="(.*?)">(.*?)<\/a><\/td><\/tr>/i', $html, $result);
+        print_r($result);die;
 
         $update = array();
         if (isset($result[3]) && isset($result[4]) && $result[4]) {
@@ -105,9 +103,7 @@ class Import
         $temp = explode('/', $source_link);
         $link = $temp[0];
         
-        $html = Dever::curl($url);
-
-        //$html = Dever::convert($html, "UTF-8", "GBK");
+        $html = $this->html($url);
 
         preg_match_all('/<tr class="countytr"><td><a href="(.*?)">(.*?)<\/a><\/td><td><a href="(.*?)">(.*?)<\/a><\/td><\/tr>/i', $html, $result);
 
@@ -153,9 +149,7 @@ class Import
             $temp = explode('/', $link);
             $link = $temp[0] . '/' . $temp[1];
             
-            $html = Dever::curl($url);
-
-            //$html = Dever::convert($html, "UTF-8", "GBK");
+            $html = $this->html($url);
         }
         if (!$link && !$html) {
             return;
@@ -185,9 +179,7 @@ class Import
     {
         $url = $this->url . $link;
         
-        $html = Dever::curl($url);
-
-        //$html = Dever::convert($html, "UTF-8", "GBK");
+        $html = $this->html($url);
 
         preg_match_all('/<tr class="villagetr"><td>(.*?)<\/td><td>(.*?)<\/td><td>(.*?)<\/td><\/tr>/i', $html, $result);
 
@@ -257,4 +249,15 @@ class Import
             return Dever::db($table)->update($data);
         }
     }
+
+    private function html($url)
+    {
+        $html = Dever::curl($url);
+
+        //$html = Dever::convert($html, "UTF-8", "GBK");
+        $html = preg_replace('//', '', $html); // 去掉HTML注释
+        $html = preg_replace('/\s+/', ' ', $html); // 清除多余的空格
+        $html = preg_replace('/>\s</', '><', $html); // 去掉标记之间的空格
+        return $html;
+    }
 }