[問題] Django Postgresql 寫入資料停滯

看板Python作者 (BLG_Eric)時間9年前 (2016/10/04 23:37), 編輯推噓2(201)
留言3則, 3人參與, 最新討論串1/1
各位大神好 最近小弟在寫一個上傳檔案模組 功能是把上傳上來的csv xls xlsx...等檔案轉成postgresql table 每次測試小檔案的時候都沒甚麼問題 大約幾秒鐘就完成上傳並存入postgresql 但是最近試著上傳了一個100多mb(13萬多筆)的xlsx資料時網頁會卡住 看了之後發現檔案上傳很快 幾秒鐘就好了 可是後續的匯入postgresql卻遲遲沒反應 卡住了 資料庫也沒有任何新增的table(本人不才 沒做timeout設定) 希望各位能幫我看一下程式碼 感謝 以下是upload.py的程式碼(應該是沒什麼問題 還是附上): # -*- coding: utf-8 -*- from django.shortcuts import render_to_response from django.template import RequestContext from django.http import HttpResponseRedirect from django.contrib import messages from django.conf import settings from django.db import connection from django.views.decorators.csrf import csrf_exempt import csv import sys import os import random import psycopg2 from .models import Document,Folder from .forms import DocumentForm from data.write import * def upload(request,fkey): # upload multiple files and create new table into postgresql tct = Document.objects.count() # Table count fct = Folder.objects.count() # Folder count f = DocumentForm() folder = Folder.objects.filter(groupkey=fkey).values_list('grouptitle', flat=True).first() if request.method == 'POST': f = DocumentForm(request.POST, request.FILES) if f.is_valid(): file_count = 0 #Count how many files in this upload tc = 0 # Count title split for u in request.FILES.getlist('file'): file_count = file_count+1 check_title = request.POST['title'] # get titles (one or multiple) info = request.POST['details'] tag = request.POST['tags'] oauth = request.POST['public'] source = request.POST['source'] print(file_count) print(len(check_title.split(","))) if len(check_title.split(",")) != file_count: # check if title amount and file amount are the same messages.warning(request,"Title Format Error!!") else: title = check_title.split(",") for up in request.FILES.getlist('file'): # multiple file uplaod loop samefilercheck = Document.objects.filter(title=title[tc],folder=fkey).count() #set a variable to check if same file exists if samefilercheck > 0: messages.warning(request,"Same File Already Exists!!") return HttpResponseRedirect(reverse('data.views.upload',kwargs={'fkey':fkey})) else: pname = settings.MEDIAPATH+'\%s\\' % folder+up.name.replace(" ","_").replace("(","").replace(")","") #up.name = up.name.replace('','_') filekey = ''.join([random.SystemRandom().choice('abcdefghijklmnopqrstuvwxyz0123456789') for i in range(25)]) n,ext = os.path.splitext(up.name) newdoc = Document(title=title[tc],filekey=filekey,path= u'%s' % pname,filename= u'%s' % up.name,tablename= u'%s' % n,filetype=ext,info=info,tag=tag,oauth=oauth,source=source,folder=fkey,version='1.0') newdoc.file=handle_uploaded_file(up,pname) newdoc.save() tc = tc+1 messages.warning(request,"Upload Success!!!") tc =0 for filename in request.FILES.getlist('file'): # call different function to write file to table documents = Document.objects.get(filename=filename) # judge the filetype by its filetype if documents.filetype =='.csv': csvwritein(documents) if documents.filetype =='.xls': xlswritein(documents) if documents.filetype =='.xlsx': xlsxwritein(documents) if documents.filetype =='.json': jsonwritein(documents) url = '/datasets/%s' % (fkey) return HttpResponseRedirect(url) else: # if fail,return to empty form messages.warning(request,"Upload Fail!!!") f = DocumentForm() return render_to_response('upload.html',RequestContext(request,locals())) def handle_uploaded_file(f,pn): with open(pn, 'wb+') as destination: for chunk in f.chunks(): destination.write(chunk) return pn 以下是寫入postgresql的write.py程式碼(太長所以只擷取xlsx的部分): from django.shortcuts import render_to_response from django.template import RequestContext from django.http import HttpResponseRedirect from django.core.urlresolvers import reverse from django.contrib import messages from django.conf import settings from django.db import connection from django.views.decorators.csrf import csrf_exempt import re import sys import random import psycopg2 import xlrd import openpyxl as pyxl from .models import Document from .forms import DocumentForm def xlsxwritein(doc): # write into database for file type xlsx xlsxt = 0 # check if action is first time conn = psycopg2.connect("dbname='apidb' user='postgres' host='localhost' password='eric40502' port='8000'") maincur = conn.cursor() readcur = conn.cursor() writecur = conn.cursor() readcur.execute("select exists(select * from information_schema.tables where table_name='%s')" % doc.tablename) # check if same file is already in database check = readcur.fetchone()[0] row_id = 1 # used for following id field wb = pyxl.load_workbook(doc.path) sheetnames = wb.get_sheet_names() ws = wb.get_sheet_by_name(sheetnames[0]) print(ws.max_row) for rown in range(ws.max_row): if xlsxt == 0: if check == True: #if table exists,rename tablename in postgresql and change the settings checktrue(doc) else: tablename = '"%s"' % doc.tablename field = [ws.cell(row=1,column=col_index).value for col_index in range(1,ws.max_column+1)] maincur.execute("CREATE TABLE %s (id SERIAL PRIMARY KEY);" % tablename) for coln in range(ws.max_column): field[coln] = '"%s"' % field[coln] # change number to string if field[coln] == 'ID': field[coln] = 'original_id' print(field) print(field[coln]) maincur.execute("ALTER TABLE %s ADD %s CITEXT;" % (tablename,field[coln])) xlsxt = xlsxt+1 elif xlsxt > 0 and check == False: # not first time(insert data) and check no same file exists for coln in range(ws.max_column): if coln == 0: writecur.execute("INSERT INTO %s (%s) VALUES ('%s');" %(tablename,field[coln],str(ws.cell(row=rown,column=coln+1).value))) else: writecur.execute("UPDATE %s SET %s = '%s' WHERE id = '%d';" %(tablename,field[coln],str(ws.cell(row=rown+1,column=coln+1).value),row_id)) xlsxt = xlsxt+1 row_id = row_id+1 else: break conn.commit() maincur.close() readcur.close() writecur.close() conn.close() xlsxt = 0 -- ※ 發信站: 批踢踢實業坊(ptt.cc), 來自: 123.192.211.60 ※ 文章網址: https://www.ptt.cc/bbs/Python/M.1475595478.A.217.html

10/05 05:47, , 1F
正確方式要用 celery 方式丟給背景處理比較不會有問題
10/05 05:47, 1F

10/05 13:09, , 2F
上傳下載 和 寫入 速度差很多吧
10/05 13:09, 2F

10/05 15:59, , 3F
感謝各位回答 但是想問為什麼會卡住? 是因為CPU?
10/05 15:59, 3F
文章代碼(AID): #1NyypM8N (Python)
文章代碼(AID): #1NyypM8N (Python)