IT虾米网

python图片小爬虫

qq123 2018年06月24日 编程语言 876 0
import re   
import urllib   
import os   
   
def rename(name):    
    name = name + '.jpg'   
    return name     
   
def getHtml(url):   
    page = urllib.urlopen(url)   
    html = page.read()   
    return html   
   
def getImg(html):   
    reg = r'src="(.+?\.jpg)" pic_ext'   
    imgre = re.compile(reg)   
    imglist = re.findall(imgre,html)   
       
       
    os.chdir("E:\\pic")     
    os.getcwd()    
    x=1   
    for imgurl in imglist:   
        img=urllib.urlopen(imgurl)   
             
           
        name=str(x)     
        name = rename(name)     
        print(name)    
        x=x+1   
           
        f=open(name,'wb')   
        f.write(img.read())    
        f.close()   
    
    
    
       
html = getHtml("http://tieba.baidu.com/p/3553148164")   
getImg(html)   
print 'pic save!'  

评论关闭
IT虾米网

微信公众号号:IT虾米 (左侧二维码扫一扫)欢迎添加!