/* * 没有解析服务器返回码 * 仅返回服务器发送的资源数据 * 数据大小最大支持 MAX_RCVBUF_LEN (800*1024) * */ #include #include #include #include #include #include #include #include #include #include #define DEFAULT_PORT 80 #define CONTENT_LEN_STR "Content-Length" #define CHUNKED_STR "Transfer-Encoding: chunked" #define MAX_URL_LEN 2048 #define MAX_BUF_LEN 2048 #define MAX_IP_LEN 16 #define MAX_RCVBUF_LEN (800*1024) #define CHUNK_SIZE_LEN 6 #define CHUNK_END_STR "\r\n" #define CHUNK_END_STR_LEN 2 #define REQUEST_FORMART "GET %s HTTP/1.1\r\nHost:%s\r\nConnection: close\r\nUser-Agent: Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.34 (KHTML, like Gecko) rekonq/1.1 Safari/534.34\r\nAccept: text/ihtml,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\nAccept-Encoding: \r\nAccept-Charset:utf-8,*;q=0.5\r\nAccept-Language: zh-CN, en-US; q=0.8, en; q=0.6\r\n\r\n" struct url_parse_t { char host[MAX_URL_LEN]; unsigned short port; char resource[MAX_URL_LEN]; }; static int parse_url(char *url, struct url_parse_t *url_parse) { char *temp; char *parse_p = url; char host[1024] = {0}; char resource[1024] = {0}; int port = 80; if (temp = strstr(parse_p, "http://")) { parse_p = url + 7; } if (temp = strstr(parse_p, "/")) { strncpy(host, parse_p, temp - parse_p); strcpy(resource, temp); } else { strcpy(host, parse_p); resource[0] = '/'; } if (temp = strstr(host, ":")) { port = atoi(temp + 1); if (port <= 0) { return -1; } } strcpy(url_parse->host, host); strcpy(url_parse->resource, resource); url_parse->port = port; return 0; } static int parse_proxy(char *proxy, char *ip, unsigned short *port) { if (sscanf(proxy, "%[^':']:%hd", ip, port) != 2) { return -1; } return 0; } static int read_full(int fd, char *buf, int content_len, int proxy_flag) { int templen = 0; int left_bytes = content_len; while(left_bytes) { if ((templen = read(fd, buf + content_len - left_bytes, left_bytes)) < 0) { return -1; } else if (!templen && proxy_flag) { break; } left_bytes -= templen; } return content_len; } //recved_bytes保存已接收的属于本块的数据 //用于确定chunk-size // //return value: //0 for chunk-size == 0 传输结束 //大于0 正常结束 本块接收完毕 可以开始接收下一块 //-1 接收异常 static int read_chunk(int fd, char *buf, const char *recved_bytes, int recved_size) { char *temp; char temp_buf[MAX_BUF_LEN]; const char *recved_p; int chunk_len; int rcvsize; int bufoffset = 0; //printf("in read chunk\n"); if (recved_bytes) { //如果没有\r\n说明chunk-size没有接收完 需要继续接收 if (!(temp = strstr(recved_bytes, "\r\n"))) { memcpy(temp_buf, recved_bytes, recved_size); if ((rcvsize = read_full(fd, temp_buf + recved_size, CHUNK_SIZE_LEN, 0)) < 0) { return -1; } recved_size += CHUNK_SIZE_LEN; recved_p = temp_buf; } else { recved_p = recved_bytes; } } else { if ((rcvsize = read_full(fd, temp_buf, CHUNK_SIZE_LEN/2, 0)) < 0) { return -1; } if (!strncmp(temp_buf, "0\r\n", CHUNK_SIZE_LEN/2)) { return 0; } else { if (rcvsize = read_full(fd, temp_buf + CHUNK_SIZE_LEN/2, CHUNK_SIZE_LEN - CHUNK_SIZE_LEN/2, 0) < 0) { return -1; } } recved_p = temp_buf; recved_size = CHUNK_SIZE_LEN; } sscanf(recved_p, "%x", &chunk_len); if (chunk_len < 1000) { printf("%s$$$$$", recved_p); } if (!chunk_len) { return chunk_len; } if (!(temp = strstr(recved_p, CHUNK_END_STR))) { return -1; } if (temp + CHUNK_END_STR_LEN == recved_p + recved_size) { recved_size = 0; bufoffset = 0; } else if (temp + CHUNK_END_STR_LEN > recved_p + recved_size) { return -1; } else { bufoffset = recved_size - (temp + CHUNK_END_STR_LEN - recved_p); memcpy(buf, temp + CHUNK_END_STR_LEN, bufoffset); } if ((rcvsize = read_full(fd, buf + bufoffset, chunk_len - bufoffset, 0)) < 0) { return -1; } if (*(buf + bufoffset + rcvsize - 1) != '\n' && *(buf + bufoffset + rcvsize - 2) != '\r') { //clear the end "\r\n" if (read_full(fd, temp_buf, CHUNK_END_STR_LEN, 0) < 0) { return -1; } } *(buf + chunk_len) = 0; return chunk_len; } static int recv_page(int fd, char *buf, int proxy_flag, struct url_parse_t *parsed_url) { char rcvbuf[MAX_BUF_LEN]; char resourcebuf[MAX_URL_LEN]; char *temp; int content_len = MAX_RCVBUF_LEN - 1; int bufoffset = 0; int templen; int rcvsize; int chunk_return; if (proxy_flag) { sprintf(resourcebuf, "http://%s/%s", parsed_url->host, parsed_url->resource); } else { sprintf(resourcebuf, "%s", parsed_url->resource); } //sprintf(rcvbuf, REQUEST_FORMART, resourcebuf, parsed_url->host, parsed_url->port); sprintf(rcvbuf, REQUEST_FORMART, resourcebuf, parsed_url->host); if (write(fd, rcvbuf, strlen(rcvbuf)) < 0) { return -1; } rcvsize = read(fd, rcvbuf, sizeof(rcvbuf)); printf("rcvsize:%d, response:%s\n", rcvsize, rcvbuf); /* * if "chunked" * loop: * send bytes num --n first * then n bytes data * do loop * stop when bytes num == 0 end with "\r\n" * then end all data end with "\r\n" */ if (!proxy_flag) { if ((temp = strstr(rcvbuf, CONTENT_LEN_STR))) { sscanf(temp, "Content-Length:%d", &content_len); } else if (temp = strstr(rcvbuf, CHUNKED_STR)){ content_len = 0; } else { return -1; } } temp = strstr(rcvbuf, "\r\n\r\n"); //in case recv done. if (content_len && content_len <= rcvsize - (temp + 4 - rcvbuf)) { memcpy(buf, temp + 4, content_len); buf[content_len] = 0; return 0; } //content_len == 0 表示chunked模式 if (!proxy_flag && !content_len) { if (temp) { temp += 4; bufoffset = 0; } while((chunk_return = read_chunk(fd, buf + bufoffset, temp, rcvsize - (temp - rcvbuf))) >= 0) { if (chunk_return == 0) { break; } else { temp = NULL; bufoffset += chunk_return; } } } else { bufoffset = rcvsize - (temp + 4 - rcvbuf); memcpy(buf, temp + 4, bufoffset); if ((rcvsize = read_full(fd, buf + bufoffset, content_len - bufoffset, proxy_flag)) < 0) { return -1; } bufoffset += rcvsize; } buf[bufoffset] = 0; return 0; } char *geturl(char *url, char *proxy) { static char rcvbuf[MAX_RCVBUF_LEN]; int fd, content_len = 0; char *temp_p; char ipbuf[MAX_IP_LEN]; struct sockaddr_in saddr; int addrlen = sizeof(struct sockaddr_in); unsigned short port; int proxy_flag = 0; struct hostent *hostinfo_p; struct url_parse_t parsed_url; saddr.sin_family = AF_INET; bzero(rcvbuf, MAX_RCVBUF_LEN); if (proxy) { if (parse_proxy(proxy, ipbuf, &port) < 0) { fprintf(stderr, "parse proxy error\n"); } else { saddr.sin_addr.s_addr = inet_addr(ipbuf); saddr.sin_port = htons(port); proxy_flag = 1; } } if (parse_url(url, &parsed_url) < 0) { fprintf(stderr, "parse url error"); return NULL; } if (!proxy_flag) { hostinfo_p = gethostbyname(parsed_url.host); saddr.sin_addr = *(struct in_addr *)(hostinfo_p->h_addr); saddr.sin_port = htons(parsed_url.port); } if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) { perror("socket error"); return NULL; } if (connect(fd, (struct sockaddr *)&saddr, addrlen) < 0) { perror("connect error"); return NULL; } if (recv_page(fd, rcvbuf, proxy_flag, &parsed_url) < 0) { return NULL; } return rcvbuf; } int main(int argc, char **argv) { char *temp; if (argc != 2) { fprintf(stderr, "%s host\n", argv[0]); exit(1); } temp = geturl(argv[1], "110.4.12.170:80"); //temp = geturl(argv[1], NULL); if (temp) { printf("%s\n", temp); } else { printf("temp NULL\n"); } return 0; }