PDA

Просмотр полной версии : граббер для devki.ws =))


sn0w
03.10.2009, 18:38
как сказать то...)) ценителям женской красоты посвящается)

скачивает картинки) сразу говорю - НЕ ПОРНОГРАФИЯ
фотки хорошего качества, так что не особо быстро льются.

exe тут - http://www.rapidshare.ru/1196291


/*
* devki.ws image grabber by sn0w (c) 2009, for education purposes only =)
* antichat.ru
* getitshot v 1.0
*/


#include <stdio.h>
#include <conio.h>
#include <windows.h>
#include <wininet.h>
#pragma comment(lib,"wininet")


BOOL g_bCreateFolders;
//////////////////////////////////////////////////////////////////////////
void* halloc(size_t size)
{
return HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size);
}

void* hrealloc(void* oldmem, size_t size)
{
return HeapReAlloc(GetProcessHeap(), 0, oldmem, size);
}

size_t hsize(void *mem)
{
return HeapSize(GetProcessHeap(),0, mem);
}

void hfree(void* mem)
{
HeapFree(GetProcessHeap(),0,mem);
}

/////////////////////////////////////////////////////////////////////////////
LPVOID Inet_GET(IN LPSTR host, IN LPSTR request, OUT LPDWORD retlen, IN BOOL KeepConnection)
{
LPCSTR szAccept[] = {"*/*", NULL};
LPCSTR szUserAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.1; ru; rv:1.9.1.3) Gecko/20090824 Firefox/3.5.3";

*retlen = 0;

HINTERNET hInternet = InternetOpen(szUserAgent, INTERNET_OPEN_TYPE_DIRECT, NULL, NULL,0);

//InternetSetPerSiteCookieDecision(host, COOKIE_STATE_ACCEPT);

DWORD flags = INTERNET_FLAG_RELOAD | INTERNET_FLAG_PRAGMA_NOCACHE;
if(KeepConnection) flags |= INTERNET_FLAG_KEEP_CONNECTION;
HINTERNET hConnect = InternetConnect(hInternet, host, 80, NULL, NULL, INTERNET_SERVICE_HTTP, 0, 0);
HINTERNET hRequest = HttpOpenRequest(hConnect, "GET", request, NULL, NULL, szAccept, flags, 0);

HttpSendRequest(hRequest, 0, 0, NULL, 0);

LPVOID outbuff = halloc(4096);
LPVOID p = outbuff;

DWORD size = 0, curr = 0;

while(InternetReadFile(hRequest, p, 4096, &curr)==TRUE && curr !=0){
size += curr;
outbuff = hrealloc(outbuff, size + 4096);
p = (LPVOID)((DWORD)outbuff+size);
}

InternetCloseHandle(hRequest);
InternetCloseHandle(hConnect);
InternetCloseHandle(hInternet);

*retlen = size;
return outbuff;
}
//////////////////////////////////////////////////////////////////////////
LPVOID Get_Image(char *url, DWORD *len)
{
return Inet_GET("pix01.devki.ws", url, len, 0);
}

//////////////////////////////////////////////////////////////////////////
void parse_page(char *page, int len)
{

if(!len) return;

// string to find: "XXXXXXXXXXXXXXXXX_slideshow.html">
char *ptr;
char fulllink[256], lastlink[256];
int linklen;

memset(fulllink, 0, sizeof(fulllink));
memset(lastlink, 0, sizeof(lastlink));

ptr = strstr(page, "_slideshow.html\"");
if(!ptr) return;

FILE *pf = fopen("pagelinks.txt", "a+");

do{
linklen = 0;

while(*--ptr!='\"') linklen++;
ptr++;
linklen+=15;

memset(fulllink, 0, sizeof(fulllink));
strncpy(fulllink, ptr, linklen);
ptr += linklen;

if(strcmp(lastlink,fulllink))
fprintf(pf, "%s\n", fulllink);

strcpy(lastlink, fulllink);

}while(ptr = strstr(ptr, "_slideshow.html\""));

fclose(pf);

}

//////////////////////////////////////////////////////////////////////////

void string_replace(char *str, char *substr, char *with)
{
//str = aaaaabbbbbccccc
//substr = aabb
//with = XYX
char *copy = (char*)halloc(strlen(str));

for(int i=0,y=0; i < strlen(str); i++, y++)
if(!strncmp(&str[i], substr,strlen(substr))){
strncpy(&copy[y], with, strlen(with));
y+=strlen(with)-1;
i+=strlen(substr)-1;
}else{
copy[y]=str[i];
}
strcpy(str, copy);
hfree(copy);
}


//////////////////////////////////////////////////////////////////////////
void process_single_entry(char *s_entry)
{
// terminate 0d/0a with 0
char *p = s_entry;
while(*p++) if(*p=='\x0d' || *p=='\x0a') *p = 0;

// now download it
LPVOID page;
DWORD page_size;
char req_tmpl[] = "/%s";
char req[256];
int url_length;
char file_path[256], directory[256];

static int nNumberImage = 0;
static int nNumberGallery = 0;

nNumberGallery++;

printf("downloading is in progress, gallery #%d (ETA unavailable)...\n", nNumberGallery);

sprintf_s(req, req_tmpl, s_entry);
page = Inet_GET("devki.ws", req, &page_size, 0);


if(g_bCreateFolders){
GetCurrentDirectory(sizeof(file_path), file_path);
sprintf_s(directory, "%s\\%.5d", file_path, nNumberGallery);
CreateDirectory(directory, 0);
}

// check the page
// <p><div class="sandbox"><a href="brunettes_g6305_slideshow.html">
// <script language="JavaScript">
// document.write(decodeURIComponent("%3Cimg%20src%3D%22http%3A%2F%2Fpix01.devki.ws%2F56 0b1152%2F6302%2F137.jpg%22%20border%3D%220%22%20al t%3D%22Evelyn%20Lory%2C%D0%BF%D0%BE%D1%80%D0%BD%D0 %BE%20%D0%B2%D0%B8%D0%B4%D0%B5%D0%BE%20%D1%81%D0%B E%20%D0%B7%D0%B2%D0%B5%D0%B7%D0%B4%D0%B0%D0%BC%D0% B8%22%20%2F%3E"));
// </script></a></div></p>
while(TRUE){

char *sandbox_str, *pnext_page_url;
char next_page_url[256];

sandbox_str = strstr((char*)page, "sandbox\"><a href=\"");
memset(next_page_url,0, sizeof(next_page_url));

// no sandbox tag, abort operation
if(!sandbox_str){
hfree(page);
nNumberImage = 0;
return;
}

pnext_page_url = sandbox_str + 18;

// fillup next_page_url
char *pnpage = next_page_url;
while(*pnext_page_url!='\"')
*pnpage++=*pnext_page_url++;

// the next url is from the different gallery, abort operation
if(strncmp(s_entry, next_page_url , strlen(s_entry)-20)){
hfree(page);
nNumberImage = 0;
return;
}

char *image_url = strstr((char*)page, "pix01.devki.ws");

if(!image_url){
hfree(page);
nNumberImage = 0;
return;
}

image_url+=14;

char *url_end = strstr(image_url, ".jpg");
char current_image_url[256];

url_end += 4;

memset(current_image_url, 0, sizeof(current_image_url));
strncpy(current_image_url, image_url, url_end - image_url);

string_replace(current_image_url, "%2F", "/");
hfree(page);

// got it.
// next page location: next_page_url
// current image link: current_image_url

nNumberImage++;

DWORD jpeglen;
LPVOID jpegdata;
HANDLE hFile;
char file_name[256];

if(g_bCreateFolders){
sprintf_s(file_name, "%s\\%.8d_%.4d.jpg", directory, nNumberGallery, nNumberImage);
}else{
sprintf_s(file_name, "%.8d_%.4d.jpg", nNumberGallery, nNumberImage);
}

printf("\tdownloading image #%d...\r", nNumberImage);
jpegdata = Get_Image(current_image_url, &jpeglen);

hFile = CreateFile(file_name, GENERIC_WRITE,FILE_SHARE_WRITE,0,CREATE_ALWAYS,0,0 );
WriteFile(hFile, jpegdata, jpeglen, &jpeglen, 0);
CloseHandle(hFile);
hfree(jpegdata);

page = Inet_GET("devki.ws", next_page_url, &page_size, 0);
}

// this place is unreachable %)

}


//////////////////////////////////////////////////////////////////////////
int main(int argc, char **argv)
{
DWORD len;
LPVOID buff;
HANDLE hFile;

printf(" -= devki.ws grabber by sn0w ;) =-\n\n");

char s_page_tmpl[] ="/engine.php?mod=galleries&act=list&category=53&page=%d";
char s_page[256];


// 1. Accept cookies etc
printf("logging on devki.ws:80...\n");
buff = Inet_GET("devki.ws", "/", &len,0);
hfree(buff);
if(!len){
printf("connection failed. press any key to exit\n");
goto end_prog;
}


char retch;
printf("press Y if you want to create directory for each gallery\n");
retch = getch();
g_bCreateFolders = FALSE;
if(retch == 'Y' || retch == 'y')
g_bCreateFolders = TRUE;

DeleteFile("pagelinks.txt");

// 2. Get page 1-25 and parse links for the slide shows (its a better way to view them step by step)
// to file pagelinks.txt
for(int i=1; i<=25; i++){
printf("\tparsing page %d...\r",i);
sprintf_s(s_page, s_page_tmpl, i);
buff = Inet_GET("devki.ws", s_page, &len,0);
parse_page((char*)buff, len);
hfree(buff);
}

printf("\n");

// 3. Now start deep parsing for the each gallery
FILE *pf = fopen("pagelinks.txt", "r");
char page_str[256];

while(!feof(pf)){
fgets(page_str, sizeof(page_str), pf);
process_single_entry(page_str);
}
fclose(pf);


// All done.
printf("\nok.");
end_prog:
_getch();

return 0;
}



а блин баг нашел - последнюю фотку из галереи каждой не заливает.)

superboy4
03.10.2009, 18:43
спасибо, хвалю за знания в си

Ins3t
03.10.2009, 19:02
Снег, ты бы лутше порнокачалку подновил, а то народ жалуется на неработоспособность :D

sn0w
03.10.2009, 19:02
так аналог) тока более эстетичный)))))

Dark_Scorpicore
07.10.2009, 09:21
Благодарю за хороший пример :)
Очень помогло (не в скачивании прона, а в изучении си)
xD

АлексDevil
30.04.2010, 17:46
сайт больше не работает

АлексDevil
28.05.2010, 22:09
кто знаете этот они брали сеты?