54a6bd2084d45213741d0d920a618a28e100e119.svn-base 1016 B

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. <?php
  2. include_once('../../simple_html_dom.php');
  3. function scraping_digg() {
  4. // create HTML DOM
  5. $html = file_get_html('http://digg.com/');
  6. // get news block
  7. foreach($html->find('div.news-summary') as $article) {
  8. // get title
  9. $item['title'] = trim($article->find('h3', 0)->plaintext);
  10. // get details
  11. $item['details'] = trim($article->find('p', 0)->plaintext);
  12. // get intro
  13. $item['diggs'] = trim($article->find('li a strong', 0)->plaintext);
  14. $ret[] = $item;
  15. }
  16. // clean up memory
  17. $html->clear();
  18. unset($html);
  19. return $ret;
  20. }
  21. // -----------------------------------------------------------------------------
  22. // test it!
  23. // "http://digg.com" will check user_agent header...
  24. ini_set('user_agent', 'My-Application/2.5');
  25. $ret = scraping_digg();
  26. foreach($ret as $v) {
  27. echo $v['title'].'<br>';
  28. echo '<ul>';
  29. echo '<li>'.$v['details'].'</li>';
  30. echo '<li>Diggs: '.$v['diggs'].'</li>';
  31. echo '</ul>';
  32. }
  33. ?>