Показать сообщение отдельно

  #4  
Старый 08.09.2011, 00:18
xek
Guest
Сообщений: n/a
Провел на форуме:
8246

Репутация: 0
По умолчанию

Google URL Grabber

Граббинг по дорку:

php grabber.php index.php?id=

Продолжить граббинг со страницы 5:

php grabber.php index.php?id= 5

Граббинг по стране US со страницы 10:

php grabber.php index.php?id= .uk 10

Собранные ссылки будут в links.txt

Если надо могу выложить класс по работе с socks, прикрутить его к грабберу будет просто.

Код:
Code:
showInfo();
		if (count($this->argv) argv[0] . ' ?id=' . " .com 15\r\n";
			die;
		}
		
		$site = '';
		if (count($this->argv) == 3) {
			if (is_numeric($this->argv[2])) {
				$this->page = $this->argv[2];
			} else {
				$site = 'site:' . $this->argv[2];
			}
		} elseif (count($this->argv) == 4) {
			$site = 'site:' . $this->argv[2];
			$this->page = $this->argv[3];
		}
		$this->google_dork = 'allinurl:' . $this->argv[1] . ' filetype:php ' . $site;
	}
	
	public function getLinks()
	{
		if (file_exists($this->links_filename)) {
			$handle = fopen($this->links_filename, 'r');
			$content = fread($handle, filesize($this->links_filename));
			fclose($handle);
			
			if (preg_match_all('/(.*?)\r\n/s', $content, $match)) {
				foreach ($match[1] as $link) {
					$this->grabbed_links[] = $link;
				}
			}
			echo 'Grabbed links: ' . count($this->grabbed_links) . "\r\n\r\n";
		}
	}

	public function writeToFile($str = '', $option) 
	{
		$handle = fopen($this->links_filename, $option);
		fwrite($handle, $str);
		fclose($handle);
	}
	
	public function getContent($query) 
	{
		$content = '';
		
		$fp = fsockopen($this->google, 80, $errno, $errstr, 30);
			
		if ($fp != false) {
			$header  = "GET " . $query . " HTTP/1.0\r\n";
			$header .= "User-Agent: Opera/9.80 (Windows NT 5.1; U; ru) Presto/2.9.168 Version/11.50\r\n";
			$header .= "Host: " . $this->google . "\r\n";
			$header .= "Connection: Close\r\n\r\n";
	
			fwrite($fp, $header);
			$start = microtime(true);
			
			while (!feof($fp) && (microtime(true) - $start) timeout) {
				$content .= fgets($fp);
			}
			fclose($fp);
			
			return $content;
		}
		return false;
	}
	
	public function grabbing()
	{
		$nextPageExist = true;
		$next_page_regexp = '//s';
		$url_regexp = '/google_dork) . '&btnG=Search&num=100&start=';
		$query = '/search?hl=en&as_q=' . urlencode($this->google_dork) . '&as_qdr=all&num=100&start=';
		$page = $this->page;
		$grabbed_links = $this->grabbed_links;
		
		while ($nextPageExist) {
			$_query = $query . $page . '00';
			$content = $this->getContent($_query);
			$page++;
			
			if (empty($content)) {
				echo "\r\n" . 'Stopped by socket error!' . "\r\n";
				echo 'On page: ' . --$page . "\r\n";
			}
			
			if (preg_match_all($url_regexp, $content, $match)) {
				foreach ($match[1] as $_url) {
					$linkExist = false;
					$url = html_entity_decode(urldecode($_url)) . "\r\n";
					if (!strstr($url, '=')) continue;

					preg_match('/(?P.*?\..*?)\//s', $url, $match);
					foreach ($grabbed_links as $links) {
						if (strstr($links, $match['domain']) !== false) {
							$linkExist = true;
							break;
						}
					}
					if (!$linkExist) {
						$grabbed_links[] = $url;
					} else { continue; }
					
					echo $url;
					$this->writeTofile($url, 'a');
				}
			}

			if (preg_match('/' . $this->google . '\/sorry\//s', $content)) {
				echo "\r\n" . 'Stopped by AntiBot System!' . "\r\n";
				echo 'On page: ' . --$page . "\r\n";
				break;
			}
			

			if (!preg_match($next_page_regexp, $content)) $nextPageExist = false;
		}
	}
	
	public function __construct($argv)
	{
		$this->argv = $argv;
		
		$this->getParameters();
		$this->getLinks();
		$this->grabbing();
	}
}

$googleURLGrabber = new googleURLGrabber($argv);
 
Ответить с цитированием