12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091 |
- <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
- <html>
- <head>
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
- <title>PHP Simple HTML DOM Parser: FAQ</title>
- <link href="css/default.css" rel="stylesheet" type="text/css">
- </head>
- <body>
- <h1><a name="top"></a>PHP Simple HTML DOM Parser Manual</h1>
- <div id="content">
- <h2>FAQ</h2>
- <ul>
- <li><a href="manual.htm">Back</a></li>
- <li><a href="#find">Problem with finding</a></li>
- <li><a href="#hosting">Problem with hosting</a></li>
- <li><a href="#proxy">Behind a proxy</a></li>
- <li><a href="#memory_leak">Memory leak!</a></li>
- </ul>
- <div>
- <a name="find"></a>
- <h2>Problem with finding</h2>
- <a class="top" href="#top">Top</a>
- <div class="code"> <span class="var">Q:</span> Element not found in such case:<br>
- $html->find('div[style=<span class="var">padding: 0px 2px;</span>] span[class=rf]');<br>
- <br>
- <span class="var">A:</span> If there is blank in selectors, quote it! <br>
- $html->find('div[style=<strong class="var">"padding: 0px 2px;"</strong>] span[class=rf]');</div>
- <a name="hosting"></a>
- <h2>Problem with hosting</h2>
- <a class="top" href="#top">Top</a>
- <div class="code"> <span class="var">Q:</span> On my local server everything works fine, but when I put it on my esternal server it doesn't work. <br>
- <br>
- <span class="var">A:</span> The "file_get_dom" function is a wrapper of "file_get_contents" function, you must set "<strong>allow_url_fopen</strong>" as <strong>TRUE</strong> in "php.ini" to allow accessing files via HTTP or FTP. However, some hosting venders disabled PHP's "allow_url_fopen" flag for security issues... PHP provides excellent support for "curl" library to do the same job, Use curl to get the page, then call "str_get_dom" to create DOM object. <br>
- <br>
- Example: <br>
- <br>
- $curl = curl_init(); <br>
- curl_setopt(<span class="var">$curl, CURLOPT_URL, 'http://????????'</span>); <br>
- curl_setopt(<span class="var">$curl, CURLOPT_RETURNTRANSFER, 1</span>); <br>
- curl_setopt(<span class="var">$curl, CURLOPT_CONNECTTIMEOUT, 10</span>); <br>
- $str = curl_exec(<span class="var">$curl</span>); <br>
- curl_close($curl); <br>
- <br>
- $html= <strong>str_get_html</strong>($str); <br>
- ... </div>
- <a name="proxy"></a>
- <div>
- <h2>Behind a proxy</h2>
- <a class="top" href="#top">Top</a>
- <div class="code"> <span class="var">Q:</span> My server is behind a Proxy and i can't use file_get_contents b/c it returns a unauthorized error.<br>
- <br>
- <span class="var">A:</span> Thanks for Shaggy to provide the solution: <br>
- <br>
- <span class="comment">// Define a context for HTTP. </span><br>
- $context = array<br>
- ( <br>
- <span class="var">'http'</span> => array<br>
- ( <br>
- <span class="var">'proxy'</span> => <span class="var">'addresseproxy:portproxy'</span>, <span class="comment">// This needs to be the server and the port of the NTLM Authentication Proxy Server. </span><br>
- <span class="var"> 'request_fulluri'</span> => true, <br>
- ), <br>
- ); <br>
- <br>
- $context = <strong>stream_context_create</strong>($context); <br>
- <br>
- $html= <strong>file_get_html</strong>(<span class="var">'http://www.php.net'</span>, <span class="var">false</span>, <span class="var">$context</span>); <br>
- ...<br>
- </div>
- </div>
- <a name="memory_leak"></a>
- <h2>Memory leak!</h2>
- <a class="top" href="#top">Top</a>
- <div class="code"> <span class="var">Q:</span> This script is leaking memory seriously... After it finished running, it's not cleaning up dom object properly from memory.. <br>
- <br>
- <span class="var">A:</span> Due to php5 circular references memory leak, after creating DOM object, you must call $dom->clear() to free memory if call file_get_dom() more then once. <br>
- <br>
- Example: <br>
- <br>
- $html = file_get_html(...); <br>
- <span class="comment">// do something... </span><br>
- $html->clear(); <br>
- unset($html);</div>
- <br>
- Author: S.C. Chen (me578022@gmail.com)<br>
- Original idea is from Jose Solorzano's <a href="http://php-html.sourceforge.net/">HTML Parser for PHP 4</a>. <br>
- Contributions by: Yousuke Kumakura, Vadim Voituk, Antcs<br>
- </div>
- </div>
- </body>
- </html>
- <!--$Rev: 165 $-->
|