最近在拉萨无聊,阅读开源社区(oschina.net)的python代码,好多人都在爬妹子图片,选取了两个地址,将妹子图爬了下来,两个网站分别是http://meizitu.com/ 和 http://sexy.faceks.com/,图片挺不错的。非常喜爱的煎蛋网站上,妹子参差不齐,就没怎么考虑。
还是偷懒,使用的BeautifulSoup,用别人的轮子,轧自己的马路!
妹子图网站下载源码如下:
import urllib2 from bs4 import BeautifulSoup import logging import os log_file = "/home/fish/log/beauty.log" logger = logging.getLogger() logger.addHandler(logging.FileHandler(log_file)) logger.setLevel(logging.DEBUG) def get_img_url(n): img_url = [] url = "http://www.meizitu.com/a/%d.html" % n logger.error(url) try: soup = BeautifulSoup(urllib2.urlopen(url).read()) img = soup.find('div', id = 'picture').p for img_div in BeautifulSoup(str(img)).find_all('img'): img_url.append(img_div['src']) except Exception, e: logger.error(e) return img_url def save_to_disk(): for i in range(4768, 3524, -1): flag = 1 for t in get_img_url(i): cmd = 'wget -c ' + t + ' -O ' + str(i) + '--' + str(flag) + '.jpg' flag += 1 logger.error(cmd) os.system(cmd) #------------test--------------# #print(get_img_url(4751)) save_to_disk()
性感美女图片下载源码如下:
import urllib2 from bs4 import BeautifulSoup import logging import os log_file = "/home/fish/log/sex.log" logger = logging.getLogger() logger.addHandler(logging.FileHandler(log_file)) logger.setLevel(logging.DEBUG) def get_img_url(url): img_url = [] logger.error(url) try: soup = BeautifulSoup(urllib2.urlopen(url).read()) img = soup.findAll('div', 'pic') for img_div in img: img_url.append(img_div.a['bigimgsrc']) except Exception, e: logger.error(e) return img_url def locate_page_url(n): url = "http://sexy.faceks.com/?page=%d" %n logger.error(url) page_url = [] try: soup = BeautifulSoup(urllib2.urlopen(url).read()) img = soup.findAll('div', 'pic') for img_div in img: page_url.append(img_div.a['href']) except Exception, e: logger.error(e) return page_url def save_to_disk(): for i in range(3, 26): for page in locate_page_url(i): for img in get_img_url(page): cmd = 'wget -c ' + img + ' -O ' + str(img[-20:]) os.system(cmd) logger.error(cmd) #------------test---------------# #print locate_page_url(1) #print get_img_url("http://sexy.faceks.com/post/2c9c66_54a3875") if __name__ == '__main__': save_to_disk()