ef24fef16c144311f7570b985127fa8d0720f9c4.svn-base 4.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
  2. <html>
  3. <head>
  4. <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  5. <title>PHP Simple HTML DOM Parser: FAQ</title>
  6. <link href="css/default.css" rel="stylesheet" type="text/css">
  7. </head>
  8. <body>
  9. <h1><a name="top"></a>PHP Simple HTML DOM Parser Manual</h1>
  10. <div id="content">
  11. <h2>FAQ</h2>
  12. <ul>
  13. <li><a href="manual.htm">Back</a></li>
  14. <li><a href="#find">Problem with finding</a></li>
  15. <li><a href="#hosting">Problem with hosting</a></li>
  16. <li><a href="#proxy">Behind a proxy</a></li>
  17. <li><a href="#memory_leak">Memory leak!</a></li>
  18. </ul>
  19. <div>
  20. <a name="find"></a>
  21. <h2>Problem with finding</h2>
  22. <a class="top" href="#top">Top</a>
  23. <div class="code"> <span class="var">Q:</span> Element not found in such case:<br>
  24. $html-&gt;find('div[style=<span class="var">padding: 0px 2px;</span>] span[class=rf]');<br>
  25. <br>
  26. <span class="var">A:</span> If there is blank in selectors, quote it!  <br>
  27. $html-&gt;find('div[style=<strong class="var">&quot;padding: 0px 2px;&quot;</strong>] span[class=rf]');</div>
  28. <a name="hosting"></a>
  29. <h2>Problem with hosting</h2>
  30. <a class="top" href="#top">Top</a>
  31. <div class="code"> <span class="var">Q:</span> On my local server everything works fine, but when I put it on my esternal server it doesn't work. <br>
  32. <br>
  33. <span class="var">A:</span> The "file_get_dom" function is a wrapper of "file_get_contents" function,  you must set "<strong>allow_url_fopen</strong>" as <strong>TRUE</strong> in "php.ini" to allow accessing files via HTTP or FTP. However, some hosting venders disabled PHP's "allow_url_fopen" flag for security issues... PHP provides excellent support for "curl" library to do the same job, Use curl to get the page, then call "str_get_dom" to create DOM object. <br>
  34. <br>
  35. Example: <br>
  36.  <br>
  37. $curl = curl_init(); <br>
  38. curl_setopt(<span class="var">$curl, CURLOPT_URL, 'http://????????'</span>);  <br>
  39. curl_setopt(<span class="var">$curl, CURLOPT_RETURNTRANSFER, 1</span>);  <br>
  40. curl_setopt(<span class="var">$curl, CURLOPT_CONNECTTIMEOUT, 10</span>);  <br>
  41. $str = curl_exec(<span class="var">$curl</span>);  <br>
  42. curl_close($curl);  <br>
  43.  <br>
  44. $html= <strong>str_get_html</strong>($str); <br>
  45. ...  </div>
  46. <a name="proxy"></a>
  47. <div>
  48. <h2>Behind a proxy</h2>
  49. <a class="top" href="#top">Top</a>
  50. <div class="code"> <span class="var">Q:</span> My server is behind a Proxy and i can't use file_get_contents b/c it returns a unauthorized error.<br>
  51. <br>
  52. <span class="var">A:</span> Thanks for Shaggy to provide the solution: <br>
  53.  <br>
  54. <span class="comment">// Define a context for HTTP. </span><br>
  55. $context = array<br>
  56. ( <br>
  57. &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <span class="var">'http'</span> =&gt; array<br>
  58. &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; ( <br>
  59. &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <span class="var">'proxy'</span> =&gt; <span class="var">'addresseproxy:portproxy'</span>, <span class="comment">// This needs to be the server and the port of the NTLM Authentication Proxy Server. </span><br>
  60. &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<span class="var"> 'request_fulluri'</span> =&gt; true, <br>
  61. &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; ), <br>
  62. ); <br>
  63. <br>
  64. $context = <strong>stream_context_create</strong>($context); <br>
  65.  <br>
  66. $html= <strong>file_get_html</strong>(<span class="var">'http://www.php.net'</span>, <span class="var">false</span>, <span class="var">$context</span>); <br>
  67. ...<br>
  68. </div>
  69. </div>
  70. <a name="memory_leak"></a>
  71. <h2>Memory leak!</h2>
  72. <a class="top" href="#top">Top</a>
  73. <div class="code"> <span class="var">Q:</span> This script is leaking memory seriously... After it finished running, it's not cleaning up dom object properly from memory.. <br>
  74. <br>
  75. <span class="var">A:</span> Due to php5 circular references memory leak, after creating DOM object, you must call $dom-&gt;clear() to free memory if call file_get_dom() more then once. <br>
  76. <br>
  77. Example: <br>
  78. <br>
  79. $html = file_get_html(...); <br>
  80. <span class="comment">// do something... </span><br>
  81. $html-&gt;clear(); <br>
  82. unset($html);</div>
  83. <br>
  84. Author: S.C. Chen (me578022@gmail.com)<br>
  85. Original idea is from Jose Solorzano's <a href="http://php-html.sourceforge.net/">HTML Parser for PHP 4</a>. <br>
  86. Contributions by: Yousuke Kumakura, Vadim Voituk, Antcs<br>
  87. </div>
  88. </div>
  89. </body>
  90. </html>
  91. <!--$Rev: 165 $-->