|
Участник форума
Регистрация: 01.03.2006
Сообщений: 169
Провел на форуме: 519496
Репутация:
78
|
|
Такая проблема. Скрипт для граббинга новостей с newsland. Но почему то при нажатии на кнопку, он производит граббинг 2 раза. Не пойму почему(((
PHP код:
<html>
<form method="post">
<input type="submit" name="sub" value="Грабить новости">
</form>
</html>
<?php
if (isset($_POST['sub']))
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "http://newsland.ru/index/index/ord/22/cat/64/");
curl_setopt($ch, CURLOPT_HEADER, FALSE);
curl_setopt($ch, CURLOPT_NOBODY, FALSE);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($ch, CURLOPT_USERAGENT, "MSIE 7.0");
curl_setopt($ch, CURLOPT_TIMEOUT, 20);
$text = curl_exec($ch);
curl_close($ch);
preg_match_all("#<a href=\"\/News\/Detail\/id\/(.+)\">#",$text,$results);
for($i=0;$i<count($results[0]);$i++)
{
$urlsis="http://newsland.ru".str_replace(array('<a href="','">'),null,$results[0][$i]);
if ($urlsis!="http://newsland.ru/News/Detail/id/284971/cat/78")$urls[]=$urlsis;
}
for($j=0;$j<count($urls);$j++)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $urls[$j]);
curl_setopt($ch, CURLOPT_HEADER, FALSE);
curl_setopt($ch, CURLOPT_NOBODY, FALSE);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($ch, CURLOPT_USERAGENT, "MSIE 7.0");
curl_setopt($ch, CURLOPT_TIMEOUT, 20);
$text_news = curl_exec($ch);
curl_close($ch);
preg_match('#<h1 class="mt10 grey">(.*?)</h1>(.*?)<div class="bodytext mt5">(.*?)</div></div>#s',$text_news,$result_news);
$result_news[0]=iconv("UTF-8", "WINDOWS-1251",$result_news[0]);
preg_match('#<img src="(.+)" title="(.+)" alt="(.+)" class="floatr mt10">#',$result_news[0],$result_image);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "http://newsland.ru".$result_image[1]);
curl_setopt($ch, CURLOPT_HEADER, FALSE);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($ch, CURLOPT_USERAGENT, "MSIE 7.0");
curl_setopt($ch, CURLOPT_TIMEOUT, 20);
$result_load_image = curl_exec($ch);
curl_close($ch);
$name_image=substr($result_image[1],strrpos($result_image[1],"/")+1);
$good_image = fopen("./rssnews_image/".$name_image, "w");
fputs($good_image, $result_load_image);
$src="<div style=\"float: left;\"><img src=\"./rssnews_image/".$name_image."\" title=\"".$result_image[2]."\" alt=\"".$result_image[2]."\" class=\"imagestyle\"></div>";
$time=date(d."-".m."-".y)."\r\n";
$name=date(d."-".m."-".y."-".H."-".i."-".s);
$title=$result_image[2]."\r\n";
$body=str_replace(array('<div class="bodytext mt5">',$result_image[0],'</div>','<div class="newsctrl f12 mt10"><div>','</div></div>'),null,$result_news[0]);
$body=preg_replace('#<h1 class="mt10 grey">(.+)</h1>#',null,$body);
$body=str_replace('<a','<br><br><a',$body);
$good = fopen("./rssnews_/".$name.".txt", "a");
fwrite($good,$time.$title.$src."\r\n".trim($body));
fclose($good);
unset($body);
unset($name);
unset($title);
unset($src);
unset($time);
unset($text_news);
unset($result_news);
unset($result_load_image);
unset($good);
unset($good_image);
sleep(3);
}
}
?>
|