2014-03-05 1 views
2

그래서 일련의 파일을 가져 와서 함께 할 수있는 바느질을하고 코드를 그려 봅니다.Python Memory Ussage

for paths,dirs,files in os.walk(start_path): 
for d in dirs: 
    path = start_path + changedir + d 
    pathpart = d 

    os.chdir(path) 
    for file in glob.glob("*-0.dat"): 
     tempname = file.split("-") 
     fileName1 = str(tempname[0] + "-" + tempname[1]+ "-") 
     gc.collect() 

     Chan2 = [] 
     Chan1 = [] 
     temp_1 = [] 
     temp_2 = [] 
     temp_3 = [] 
     Data_Sets1 = [] 
     Data_Sets2 = [] 
     Headers = [] 


     for fileNumber in range(0,45): 
      fileName = fileName1 + str(fileNumber) + fileName3 
      header, data1, data2 = u.unpackFile(path,fileName) 

      if header == None: 
       logging.warning("curropted file found at " + fileName) 
       Data_Sets1.append(temp_1) 
       Data_Sets2.append(temp_2) 
       Headers.append(temp_3) 
       temp_1 = [] 
       temp_2 = [] 
       temp_3 = [] 
      else: 
       logging.info(fileName + " is good!") 
       temp_3.append(header) 
       for i in range(0,10000): 
        temp_1.append(data1[i]) 
        temp_2.append(data2[i]) 

     Data_Sets1.append(temp_1) 
     Data_Sets2.append(temp_2) 
     Headers.append(temp_3) 
     temp_1 = [] 
     temp_2 = [] 
     temp_3 = [] 

     del temp_1 
     del temp_2 
     del temp_3 

     lengths = [] 
     for i in range(len(Data_Sets1)): 
      lengths.append(len(Data_Sets1[i])) 
     index = lengths.index(max(lengths)) 

     Chan1 = Data_Sets1[index] 
     Chan2 = Data_Sets2[index] 
     Start_Header = Headers[index] 
     if (len(Chan1) == 0 | len(Chan2) == 0): 
      continue 
     try: 
      Date = Start_Header[index][0] 
      Time = Start_Header[index][1] 
     except IndexError: 
      logging.critical("file " + fileName + " is unusuable") 
      continue 
     """ 
     Clostest_Power = int(np.log(len(Chan1))/np.log(2)) 
     Length = 2 ** Clostest_Power 
     logging.debug("Length of the file is " + str(Length)) 
     Chan1 = Chan1[0:Length] 
     Chan2 = Chan2[0:Length] 
     """ 
     logging.debug("Length of channels is " + str(len(Chan1))) 

     window = np.hanning(Window_Width) 

     t= s.Time_Array(len(Chan1),Sample_Rate) 


     window2 = np.hanning(len(Chan1)) 

     Noise_Frequincies = [] 
     for i in range(1,125): 
      Noise_Frequincies.append(60.0*float(i)) 
     Noise_Frequincies.append(180.0) 

     filter1 = s.Noise_Reduction(Sample_Rate,Noise_Frequincies,Chan1) 
     filter2 = s.Noise_Reduction(Sample_Rate,Noise_Frequincies,Chan2) 

     logging.info("Starting the plots") 


     fig1, (ax1, ax2) = plt.subplots(nrows=2) 


     spec1, freqs1, time1 = mlab.specgram(filter1, NFFT=Window_Width, Fs=Sample_Rate, window=window, noverlap=Over_Lap) 


     im1 = ax1.imshow(spec1, cmap=cm.get_cmap("rainbow"), norm=colors.LogNorm(), origin='lower', 
      extent=[t[0], t[len(t)-1], freqs1.min(), 8000],aspect='auto',vmin=1e-5,vmax=1e5) 

     ax1.set_title(str(Date) + "-" + str(Time) + " Channel 1") 
     ax1.set_ylabel("Freqency Hz") 



     spec2, freqs2, time2 = mlab.specgram(filter2, NFFT=Window_Width, Fs=Sample_Rate, window=window, noverlap=Over_Lap) 

     im2 = ax2.imshow(spec2, cmap=cm.get_cmap("rainbow"), norm=colors.LogNorm(), origin='lower', 
      extent=[t[0], t[len(t)-1], freqs2.min(), 8000],aspect='auto',vmin=1e-5,vmax=1e5) 

     cax1, kw1 = matplotlib.colorbar.make_axes(ax1) 
     colorbar(im1,cax=cax1,**kw1) 
     cax2, kw2 = matplotlib.colorbar.make_axes(ax2) 
     colorbar(im2,cax=cax2,**kw2) 

     ax2.set_title(str(Date) + "-" + str(Time) + " Channel 2") 
     ax2.set_ylabel("Freqency Hz") 


     save1 = save_path+pathpart + changedir+specgram_path 
     if not os.path.exists(save1): 
      os.makedirs(save1) 
     savefig(os.path.join(save1,str(Date) + "-" + str(Time) + "-Power_Spec1.png")) 

     logging.info("Spectrogram path is " + save1) 




     fig2, (ax4,ax6) = plt.subplots(nrows=2) 
     final_fft = [] 
     fft = s.Full_FFT(filter1,window2) 
     for i in range(0,len(fft)): 
      final_fft.append(np.absolute(fft[i])) 



     freqs = [] 
     for i in range(0,len(final_fft)): 
      freqs.append(i*Sample_Rate/float(len(final_fft))) 

     ax4.plot(freqs, final_fft) 

     new_fft = [] 
     new = s.Full_FFT(filter2,window2) 
     for i in range(0,len(new)): 
      new_fft.append(np.absolute(new[i])) 
     ax6.plot(freqs,new_fft) 


     save2 = save_path+pathpart+ changedir + freq_path 
     logging.info("Frequency path is " + save2) 
     if not os.path.exists(save2): 
      os.makedirs(save2) 

     savefig(os.path.join(save2,str(Date) + "-" + str(Time) + "-Freq.png")) 

     ax4.set_title(str(Date) + "-" + str(Time) +" Channel 1") 
     ax4.set_xlabel("Bins") 
     ax4.set_ylabel("Power") 


     ax6.set_title(str(Date) + "-" + str(Time) + " Channnel 2") 
     ax6.set_xlabel("Bins") 
     ax6.set_ylabel("Power") 

     fig3, (ax7, ax9) = plt.subplots(nrows=2) 
     ax7.plot(t,filter1) 
     ax9.plot(t,filter2) 

     save3 = save_path+pathpart + changedir +signal_path 
     if not os.path.exists(save3): 
      os.makedirs(save3) 
     savefig(os.path.join(save3,str(Date) + "-" + str(Time) + "-Signal.png")) 

     logging.info("Signal path is " + save3) 

     fig1.clf() 
     fig2.clf() 
     fig3.clf() 
     matplotlib.pyplot.clf() 
     close('all') 
     gc.collect() 

이 필요하고 여기에 압축을 푸는 코드

def unpackFile(path,fileName): 
header = "" 
startKey = "" 
dataList = [] 
chan1 = [] 
chan2 = [] 
found = False 
logging.info("Starting file " + fileName) 
if not os.path.isfile(os.path.join(path,fileName)): 
    logging.warning("could not find "+fileName) 
    return None, None, None 
try: 
    contents = open(os.path.join(path,fileName),'rb') 
except IOError: 
    logging.warning(fileName + " Not found") 
    return None, None, None 



#looks for the closing bracket in the header of the file 
filesize = os.path.getsize(os.path.join(path,fileName)) 
if filesize < 1000: 
    logging.warning(fileName + " is below 1000 bytes") 
    contents.close() 
    contents = None 
    return None, None, None 
while found==False: 
    char = contents.read(1) 
    #print char 
    header = header + char 
    if char == "}": 
     #Once the close bracket is found, the next 10 characters should be the start key 
     startKey = contents.read(10) 
     #header = header + startKey 
     #print("found the }") 
     found = True 
if startKey=="Data_Start": 
    logging.info("Found start key for file "+fileName) 
else: 
    logging.warning("No start key found " + fileName + " is corrupted") 
    contents.close() 
    contents = None 
    return None, None, None 
#Looks for the end key in the file 
try: 
    logging.debug("Reading the data") 
    data = contents.read(40000) 
    #endKey = data[len(data)-10:len(data)] 
    endKey = contents.read() 
except IOError: 
    logging.warning("IOE error trying to read the end key") 
    endKey="" 
    contents.close() 
    contents = None 
    return None, None, None 

if endKey == "Data_Stop ": 
    logging.debug("Found end key ") 
else: 
    logging.debug("No end key found in" +fileName) 
#Unpacks the data from binary into signed ints 
for i in range(0,len(data),2): 
    value = data[i:i+2] 
    if len(value)==2: 
     number = struct.unpack('>h',data[i:i+2]) 
     #print number 
     dataList.append(number[0]) 
    else: 
     break 
logging.debug("total points found is " + str(len(dataList))) 
#Splits data into two channels 
for j in range(0,len(dataList)): 
    if j%2==0: 
     chan2.append(dataList[j]) 
     #if dataList[j] != 0: 
      #print("chan2 has a non 0 " + str(j)) 
    else: 
     chan1.append(dataList[j]) 
#Checks to make sure both channels contain 10000 data points. If this is not true the file is curppted 
if len(chan2)!=10000: 
    logging.warning("Chanel 2 did not containg the right number of data points, " + fileName + " is corupted") 
    contents.close() 
    contents = None 
    return None, None, None 
if len(chan1)!=10000: 
    logging.warning("Chanel 1 did not containg the right number of data points, " + fileName + " is corupted") 
    contents.close() 
    contents = None 
    return None, None, None 
contents.close() 
contents = None 

header = header[1:len(header)-1] 
header_parts = header.split(',') 
return header_parts,chan1,chan2 

어딘가에 경우 나는이 더 읽기 쉽게하기 위해 내 코드의 대부분을 게시하도록하겠습니다, 더 추가 할 수있는 메모리 누수입니다 , 나는 어디 있는지 모른다. 디렉토리를 거쳐 데이터 세트를 골라 내고 코드를 작성하는 코드를 얻으려고합니다. 몇 분 후에 이것은 몇 GB의 램을 먹습니다. 그 (것)들을 감소시키는 어떤 팁?

+2

때로는 [* 최소, 완료, 테스트 및 읽을 수있는] 코드를 끓이는 연습 (http://stackoverflow.com/ help/mcve)를 사용하면 스스로 답변을 찾을 수 있습니다. 문제를 시연하면서이 코드를 얼마나 작게 만들 수 있습니까? – mhlester

+0

디렉토리를 제거하면 문제가 발생하지 않습니다. 내 문제는 치명적인 오류가 아니며, 각 루프 메모리에서 어떤 이유로 든 해제되지 않습니다. –

답변

1

귀하의 경우에는 운영 체제에 청소를 맡기는 것이 좋습니다. 따라서 메모리를 별도의 프로세스로 누설하는 코드를 실행합니다.

from multiprocessing import Process, Queue 

def memory_leaking_code(arg1, q): 
    """Your memory leaking code goes here""" 
    print arg1 
    q.put('data from memory_leaking_code()') 

def main(): 
    q = Queue() 
    p = Process(target=memory_leaking_code, args=('data to memory_leaking_code()', q)) 
    p.start() 
    print q.get() 
    if p.is_alive(): 
     p.terminate() 

main() 
+0

필자가 피할 수없는 많은 기능을 사용하고 있습니다. 나는 과거에 그걸 조사하려고 노력 했었고 나는 피클을 피클하는 법에 대해 명확한 답을 찾을 수 없었다. –

+0

피클 할 필요가 없습니다. 'unpackFile()'함수를'multiprocessing.Process()'를 통해 실행하고 운영체제가이 함수가 누출/잠김 상태 일 수있는 메모리를 비우도록하십시오. –

+0

포장을 풀 때 기능 오류를 피클링 할 수 없습니다. –