最近在拉萨无聊,阅读开源社区(oschina.net)的python代码,好多人都在爬妹子图片,选取了两个地址,将妹子图爬了下来,两个网站分别是http://meizitu.com/ 和 http://sexy.faceks.com/,图片挺不错的。非常喜爱的煎蛋网站上,妹子参差不齐,就没怎么考虑。

还是偷懒,使用的BeautifulSoup,用别人的轮子,轧自己的马路!

妹子图网站下载源码如下:

import urllib2
from bs4 import BeautifulSoup
import logging
import os

log_file = "/home/fish/log/beauty.log"
logger = logging.getLogger()
logger.addHandler(logging.FileHandler(log_file))
logger.setLevel(logging.DEBUG)

def get_img_url(n):
    img_url = []
    url = "http://www.meizitu.com/a/%d.html" % n 
    logger.error(url)
    try:
        soup = BeautifulSoup(urllib2.urlopen(url).read())
        img = soup.find('div', id = 'picture').p
        for img_div in BeautifulSoup(str(img)).find_all('img'):
            img_url.append(img_div['src'])
    except Exception, e:
        logger.error(e)
    return img_url


def save_to_disk():
    for i in range(4768, 3524, -1): 
        flag = 1
        for t in get_img_url(i):
            cmd = 'wget -c ' + t + ' -O ' + str(i) + '--' + str(flag) + '.jpg'
            flag += 1
            logger.error(cmd)
            os.system(cmd)
            
#------------test--------------#
#print(get_img_url(4751))
save_to_disk()

性感美女图片下载源码如下:

import urllib2
from bs4 import BeautifulSoup
import logging
import os

log_file = "/home/fish/log/sex.log"
logger = logging.getLogger()
logger.addHandler(logging.FileHandler(log_file))
logger.setLevel(logging.DEBUG)

def get_img_url(url):
    img_url = []
    logger.error(url)
    try:
        soup = BeautifulSoup(urllib2.urlopen(url).read())
        img = soup.findAll('div', 'pic')
        for img_div in img:
            img_url.append(img_div.a['bigimgsrc'])
    except Exception, e:
        logger.error(e)
    return img_url



def locate_page_url(n):
    url = "http://sexy.faceks.com/?page=%d" %n
    logger.error(url)
    page_url = []
    try:
        soup = BeautifulSoup(urllib2.urlopen(url).read())
        img = soup.findAll('div', 'pic')
        for img_div in img:
            page_url.append(img_div.a['href'])
    except Exception, e:
        logger.error(e)
    return page_url

def save_to_disk():
    for i in range(3, 26): 
        for page in locate_page_url(i):
            for img in get_img_url(page):
                cmd = 'wget -c ' + img + ' -O ' + str(img[-20:])               
                os.system(cmd)
                logger.error(cmd)

#------------test---------------#
#print locate_page_url(1)
#print get_img_url("http://sexy.faceks.com/post/2c9c66_54a3875")
if __name__ == '__main__':
    save_to_disk()


blog comments powered by Disqus

Published

13 January 2015

Tags