c52ec765735d90f575de5f7c1a1c9eac83859968.svn-base 1.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. <?php
  2. include_once('../../simple_html_dom.php');
  3. function scraping_IMDB($url) {
  4. // create HTML DOM
  5. $html = file_get_html($url);
  6. // get title
  7. $ret['Title'] = $html->find('title', 0)->innertext;
  8. // get rating
  9. $ret['Rating'] = $html->find('div[class="general rating"] b', 0)->innertext;
  10. // get overview
  11. foreach($html->find('div[class="info"]') as $div) {
  12. // skip user comments
  13. if($div->find('h5', 0)->innertext=='User Comments:')
  14. return $ret;
  15. $key = '';
  16. $val = '';
  17. foreach($div->find('*') as $node) {
  18. if ($node->tag=='h5')
  19. $key = $node->plaintext;
  20. if ($node->tag=='a' && $node->plaintext!='more')
  21. $val .= trim(str_replace("\n", '', $node->plaintext));
  22. if ($node->tag=='text')
  23. $val .= trim(str_replace("\n", '', $node->plaintext));
  24. }
  25. $ret[$key] = $val;
  26. }
  27. // clean up memory
  28. $html->clear();
  29. unset($html);
  30. return $ret;
  31. }
  32. // -----------------------------------------------------------------------------
  33. // test it!
  34. $ret = scraping_IMDB('http://imdb.com/title/tt0335266/');
  35. foreach($ret as $k=>$v)
  36. echo '<strong>'.$k.' </strong>'.$v.'<br>';
  37. ?>