[問題] 請教一個程式作業

看板Python作者 (99527)時間11年前 (2014/06/30 16:28), 11年前編輯推噓2(204)
留言6則, 4人參與, 最新討論串1/1
這是期末考的補救機會作業 老師要我完成的是剩下的部份,底下說明以後的部分 import urllib.request from bs4 import BeautifulSoup def getText(url, encoding='utf-8'): #url = 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/lw1939-pale-in-comparison/1825297.html' html = urllib.request.urlopen(urllib.request.Request(url)) soup = BeautifulSoup(html, from_encoding=encoding) # kill all script and style elements for script in soup(["script", "style"]): script.extract() # rip it out # get text text = soup.get_text() # break into lines and remove leading and trailing space on each lines = (line.strip() for line in text.splitlines()) # break multi-headlines into a line each chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) # drop blank lines text = '\n'.join(chunk for chunk in chunks if chunk) return text import re def getVOA(url): res=getText(url) lst = re.split(r'\n', res) text='' first_hint=False #'列印' second_hint=False #'美國之音' start=True for e in lst: if re.match(r'列印', e): if second_hint: second_hint=False else: first_hint=True continue if first_hint and re.match(r'美國之音', e): second_hint=True continue if second_hint and re.match(r'學個詞-\d+-\w+', e): start=True if second_hint and start: text+=e return text urls=['http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base64-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNTktc3RpY2tlci1zaG9jaw~~/1943689.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base60-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNTgtZ3JhY2UtcGVyaW9k/1943688.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNTctY2l2aWwtd2Fy/1943687.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNTYtZGlzcGFyYWdl/1943685.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNTUtcHJvaGliaXQ~/1939100.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base52-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNTQtc3dpdGNo/1939098.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base52-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNTMtdm9pY2U~/1939094.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base52-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNTItbWFzY290/1939093.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNTEtZXhjaGFuZ2U~/1939092.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base60-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNTAtYnJlYWR3aW5uZXI~/1935520.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNDktYW5vbnltb3Vz/1935516.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base52-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNDgtZHJhZnQ~/1935513.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNDctaWRlbnRpZnk~/1935511.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base60-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNDYtbmF0aW9ud2lkZQ~~/1935509.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTMzNy1jaGFyaXR5LQ~~/1933985.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base60-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNDUtY29udHJpYnV0aW9u/1928911.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base60-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNDQtY29udGFnaW91cw~~/1928909.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base52-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNDMtYXNzZXNz/1928907.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNDItZ3JhZmZpdGk~/1928906.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNDEtZnVuZGluZw~~/1928904.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base64-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwNDAtYWNjb21wbGlzaG1lbnQ~/1925331.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base64-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMzktcHVibGljLXRyYW5zaXQ~/1925330.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMzgtZGF0YWJhc2U~/1925329.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMzctaGVhcmluZw~~/1925327.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base52-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMzYtcmFudA~~/1925325.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/media/video/1936377.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMzUtcHJvZm91bmQ~/1919322.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base52-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMzQtcGxhbi1i/1919321.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base52-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMzMtdG94aWM~/1919314.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base64-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMzItb24tdGhlLWJyaW5rLW9m/1919312.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base52-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMzEtY29tcGVs/1919311.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMzAtbWF4LW91dA~~/1914530.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base60-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMjktc2NyZWVuLXRpbWU~/1914527.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base60-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMjgtdW5leHBlY3RlZA~~/1914522.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMjctZGl2ZXJzZQ~~/1914519.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base60-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMjYtd2lkZS1yYW5naW5n/1914515.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base60-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMjUtYXQtbm8tY2hhcmdl/1914512.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base60-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMjQtcmVoYWItY2VudGVy/1914508.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMjMtY29tcGxhaW50/1914506.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMjItc3VzcGljaW9u/1914504.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base52-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMjEtb3V0bGF3/1914503.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/media/video/1936263.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base52-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMjAtbGV0aGFs/1904640.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base52-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMTktcG9pc2Vk/1904636.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMTgtbWFyaXRpbWU~/1904632.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base64-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMTctc3VzdGFpbmFiaWxpdHk~/1904630.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base48-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMTYtYmFy/1904626.html', 'http://www.voafanti.com/gate/big5/www.voachinese.com/content/-ifbase4-base56-JWU1JWFkJWE2JWU0JWI4JWFhJWU4JWFmJThkLTIwMTUtc2tlbGV0b24~/1899500.html'] #撰寫迴圈將urls中的每一個連結的文字內容個別存入一個文字檔 #文字檔檔名以連結的檔名為檔名, 附檔名則將html改成txt. #例如urls[0]的檔名為1943689, 故存成的文字檔必須是1943689.txt #以下示範程式可以顯示 1943689的文字內容, 但你要寫迴圈來批次 #讀取與寫入內容. 完成後請email給我程式檔及所擷取的文字檔.(可以用zip壓縮) 想請教怎麼把文字檔檔名儲存成每個網址後面的數字 迴圈的部分也不太懂到底該怎麼寫.... 各位可以救救我嗎 ------------------------------- 我這樣寫可以嗎 for name in urls: print(name[-12:-5]) 然後要怎麼將每個網址的文字內容都另存一個文字檔 -- ※ 發信站: 批踢踢實業坊(ptt.cc), 來自: 118.232.162.101 ※ 文章網址: http://www.ptt.cc/bbs/Python/M.1404116894.A.FEC.html ※ 編輯: jk808631 (118.232.162.101), 06/30/2014 16:43:21

06/30 16:43, , 1F
已經是補救機會還只能問人, 我看還是明年再來對你比較好
06/30 16:43, 1F

06/30 21:47, , 2F
google "python 另存一個文字檔" 很困難嗎?
06/30 21:47, 2F

06/30 21:52, , 3F
for name in urls: ... 那樣可以,只要你假定網址都那個規格
06/30 21:52, 3F

07/01 17:00, , 4F
你迴圈用open應該就能直接建立新的文件 然後再寫進去
07/01 17:00, 4F

07/01 17:02, , 5F
比如f = open('hello.txt','w') 會建立一個叫hello的檔
07/01 17:02, 5F

07/01 17:03, , 6F
菇狗一下 "文件讀寫 python" 應該會有很多資料
07/01 17:03, 6F
文章代碼(AID): #1JiH-U_i (Python)
文章代碼(AID): #1JiH-U_i (Python)