| 
<?
/*
 Author: Alexey G. Piyanin (e-mail: drdrzlo at mail dot ru)
 Date:   Jun 7 2006
 Title:  Get page part
 */
 include('SAXParser.php');
 
 function begin($tag,$attributes,$readSize){
 global $stack,$t,$isBeginNews,$news,$currentNewsIndex;
 if (!in_array($tag,$t)) array_unshift($stack,$tag);
 if ($isBeginNews){
 if ($tag=='a' && join('/',$stack)=='a/font/td/tr/table/td/tr/table/td/tr/table/font/center/body/html'){
 $news[$currentNewsIndex]['href'] = $attributes['href'];
 }elseif($currentNewsIndex>0 && $tag=='table' && join('/',$stack)=='table/font/td/tr/table/td/tr/table/td/tr/table/font/center/body/html'){
 return -1;
 }
 }
 }
 
 function endTag($tag,$readSize){
 global $stack,$isBeginNews,$news,$currentNewsIndex;
 if ($isBeginNews && $tag=='a' && join('/',$stack)=='a/font/td/tr/table/td/tr/table/td/tr/table/font/center/body/html'){
 $currentNewsIndex++;
 }
 while(reset($stack)!=$tag && !empty($stack)) array_shift($stack);
 array_shift($stack);
 }
 
 function character($str){
 global $stack,$isBeginNews,$news,$currentNewsIndex;
 //----
 if (!$isBeginNews){
 if (join('/',$stack)=='font/a/b/td/tr/table/td/tr/table/td/tr/table/font/center/body/html' && strtolower($str)=='in the news') $isBeginNews = true; // begin "In the News" part
 }else{
 if (join('/',$stack)=='a/font/td/tr/table/td/tr/table/td/tr/table/font/center/body/html')
 $news[$currentNewsIndex]['text'] = $str;
 }
 }
 
 $t = array('br','meta','img','spacer','input','base','hr','link',);
 $stack = array();
 $URL = 'http://yahoo.com';
 
 $isBeginNews = false;
 
 $currentNewsIndex = 0;
 $news = array();
 $parser = new HTML_SAXParser();
 $parser->initFunc('begin','endTag','character');?>
 <html>
 <body>
 <center>Source page:<br><iframe src="<?=$URL?>" width="600" height="400" ></iframe><br><br></center>
 News list (part "In the News"):<br>
 <?$parser->parse($URL);
 foreach($news as $row){?>
 <a href="<?=$URL.'/'.$row['href']?>" target="_blank"><?=$row['text']?></a><br>
 <?}?>
 </body></html>
 |