Re: [範例] 無名小站相簿 grabber

看板Python作者 (keep your faith !)時間19年前 (2006/02/15 13:31), 編輯推噓0(000)
留言0則, 0人參與, 最新討論串2/4 (看更多)
我是初學者啦 小小修改一下 使用方法相同 修正:第一張相片捉不到 以及改用python的file物件 如此可在windows下使用 import urllib2 import re import sys import os global filename, addr addr = "http://www.wretch.cc/album" filename = 1 def get_pic(path): global filename url = re.sub('&amp;', r'&', addr+path) r = urllib2.Request(url) r.add_header('Referer', addr) r.add_header('User-Agent', 'Mozilla 5.0') cont = urllib2.urlopen(r) c = cont.readlines() for line in c: if re.match('.*<img id=\'DisplayImage\'.*?></a>', line): mat = re.findall('(http.*?)\'', line) if mat: req = urllib2.Request(mat[0]) req.add_header('Referer', 'http://www.wretch.cc/album') print "Request: " + mat[0] r = urllib2.urlopen(req) f = file(str(filename) + '.jpg', 'wb') f.write(r.read()) print "Writing: "+str(filename) + '.jpg' f.close() filename = filename + 1 def grab(lines): flag = 0; for line in lines: if re.match(".*show.php.*", line): flag = flag +1 mat = re.findall('<a href=".(.*?)".*><.*?</a>', line) if mat: get_pic(mat[0]) if flag == 0: return True else: return False if __name__ == "__main__": user = sys.argv[1] book = sys.argv[2] url = addr+"/album.php?id="+user+"&book="+book filename = 1 i = 1 while True: url2 = url+"&page="+str(i) req = urllib2.Request(url2) req.add_header('Referer', addr) req.add_header('User-Agent', 'Mozilla 5.0') page = urllib2.urlopen(req) if (grab(page.readlines())): break; i = i + 1 -- ※ 發信站: 批踢踢實業坊(ptt.cc) ◆ From: 220.132.117.142 ※ 編輯: ellinas 來自: 220.132.117.142 (02/15 13:33)
文章代碼(AID): #13yholzn (Python)
文章代碼(AID): #13yholzn (Python)