Re: [範例] 無名小站相簿 grabber
我是初學者啦 小小修改一下
使用方法相同
修正:第一張相片捉不到 以及改用python的file物件 如此可在windows下使用
import urllib2
import re
import sys
import os
global filename, addr
addr = "http://www.wretch.cc/album"
filename = 1
def get_pic(path):
global filename
url = re.sub('&', r'&', addr+path)
r = urllib2.Request(url)
r.add_header('Referer', addr)
r.add_header('User-Agent', 'Mozilla 5.0')
cont = urllib2.urlopen(r)
c = cont.readlines()
for line in c:
if re.match('.*<img id=\'DisplayImage\'.*?></a>', line):
mat = re.findall('(http.*?)\'', line)
if mat:
req = urllib2.Request(mat[0])
req.add_header('Referer', 'http://www.wretch.cc/album')
print "Request: " + mat[0]
r = urllib2.urlopen(req)
f = file(str(filename) + '.jpg', 'wb')
f.write(r.read())
print "Writing: "+str(filename) + '.jpg'
f.close()
filename = filename + 1
def grab(lines):
flag = 0;
for line in lines:
if re.match(".*show.php.*", line):
flag = flag +1
mat = re.findall('<a href=".(.*?)".*><.*?</a>', line)
if mat:
get_pic(mat[0])
if flag == 0: return True
else: return False
if __name__ == "__main__":
user = sys.argv[1]
book = sys.argv[2]
url = addr+"/album.php?id="+user+"&book="+book
filename = 1
i = 1
while True:
url2 = url+"&page="+str(i)
req = urllib2.Request(url2)
req.add_header('Referer', addr)
req.add_header('User-Agent', 'Mozilla 5.0')
page = urllib2.urlopen(req)
if (grab(page.readlines())):
break;
i = i + 1
--
※ 發信站: 批踢踢實業坊(ptt.cc)
◆ From: 220.132.117.142
※ 編輯: ellinas 來自: 220.132.117.142 (02/15 13:33)
討論串 (同標題文章)
以下文章回應了本文:
完整討論串 (本文為第 2 之 4 篇):
Python 近期熱門文章
PTT數位生活區 即時熱門文章