2016-08-18 5 views
0

샘플 데이터 :병합하는 방법 행, CSV

 
id, Name, mail, data1, data2, data3 
1, Name1, [email protected], abc, 14, de 
1, Name1, [email protected], fgh, 25, kl 
1, Name1, [email protected], mno, 38, pq 
2, Name2, [email protected], abc, 14, d 

내가 첫 번째 필드를 선택하는 스크립트를 썼다는 중복을 취소 할 수있는 고유 한 문자열입니다. 그러나 필드 date1-3의 데이터는 반복되지 않으므로 결과를 작성해야합니다.

1, mail1 com, "abc, 14, de, fgh, 25, kl, mno, 38, pq "

배열의 행을 병합하는 방법은 무엇입니까? 내 코드는 작동하지 :

import sys 
import csv 

in_fln = sys.argv[1] 
# You can replace here and choose any delimiter: 
csv.register_dialect('dlm', delimiter=',') 
csv.register_dialect('dmt', delimiter=';') 
# if this .csv file do: 
if (in_fln[-3:]) == "csv": 
    out_fln = 'out' + in_fln 
    inputf = open(in_fln, 'r') 
    seen = [] 
    outfile = [] 
    nout = {} 
    #rowun = [] 
    try: 
     reader = csv.reader(inputf, dialect='dlm') 
     # select by ContactID 
     for row in reader: 
      if row[0] not in seen: 
       #IT'S work byt temp comment 
       #rowun = '"' + (row[-4]) + ', ' + (row[-3]) + ', ' + (row[-2]) + '"' 
       #outfile.append(row[:-5]+[rowun]) 
       outfile.append(row[:-4]) 
       rowun = (row[0]) 
       nout[rowun] = (row[-4:-1]) 
       seen.append(row[0]) 
       print (type(row)) 
      else: 
       #rowun = '"' + (row[-4]) + ', ' + (row[-3]) + ', ' + (row[-2]) + '"'    
       #nout.insert(-1,(row[-4:-1])) 
       print (type(row)) 
       rowun = (row[0]) 
       rowun2 = {rowun:(row[-4:-1])} 
       nout.update(rowun2) 


    finally: 
     #print (nout) 
     #print (outfile[:-1]) 
     #csv.writer(open(('nout' + in_fln), 'w', newline='')).writerows(nout) 
     csv.writer(open(out_fln, 'w', newline=''), dialect='dlm').writerows(outfile) 
     inputf.close() 
     print ("All done") 

답변

0

이 트릭을 할해야합니다.

from collections import defaultdict 
import pandas as pd 


# recreate your example 
df = pd.DataFrame([[1, 'Name1', '[email protected]', 'abc', 14, 'de'], 
        [1, 'Name1', '[email protected]', 'fgh', 25, 'kl'], 
        [1, 'Name1', '[email protected]', 'mno', 38, 'pq'], 
        [2, 'Name2', '[email protected]', 'abc', 14, 'd'] 
        ], columns=['id', 'Name', 'mail', 'data1', 'data2','data3']) 

res = defaultdict(list) 

for ind, row in df.iterrows(): 
    key = (row['id'], row['Name'], row['mail']) 
    value = (row['data1'], row['data2'], row['data3']) 
    res[key].append(value) 

for key, value in res.items(): 
    print(key, value) 

# gives 
# (2, 'Name2', '[email protected]') [('abc', 14, 'd')] 
# (1, 'Name1', '[email protected]') [('abc', 14, 'de'), ('fgh', 25, 'kl'), ('mno', 38, 'pq')] 
0

내 자신의 버전은 BETER 매우 가까이 : 이제

모든 작업!

#!/usr/bin/env python3 
import csv, re 
import os, sys 
in_fln = sys.argv[1] 
# You can replace here and choose any delimiter: 
#csv.register_dialect('dlm', delimiter=',') 
dm = ',' 
seen = [] 
# if this .csv file do: 
if (in_fln[-3:]) == "csv": 
    out_fln = 'out' + in_fln 
    #create the full structure: output_rows 
    infile = csv.reader(open(in_fln, 'r'), delimiter=dm, quotechar='"') 
    output_rows = [] 
    for row in infile: 
     a = 0 
     if row[0] not in seen: 
      seen.append(row[0]) 
      output_rows.append(row[:-4]) 
      #rowun = '"' + row[-4] + ', ' + row[-3] + ', ' + row[-2] + '"' 
      rowun = row[-4] + ', ' + row[-3] + ', ' + row[-2] 
      output_rows.append([rowun]) 
     else: 
      #output_rows.append([row[-4], row[-3], row[-2]]) 
      #rowun = '"' + row[-4] + ', ' + row[-3] + ', ' + row[-2] + '"' 
      rowun = row[-4] + ', ' + row[-3] + ', ' + row[-2] 
      #output_rows.insert(-1,[rowun]) 
      #rowun = str(rowun) 
      #print (rowun) 
      output_rows[-1].append(rowun) 
    #Finally save it to a file 
    csv.writer(open(out_fln, 'w', newline=''), delimiter=dm, quotechar='"').writerows(output_rows) 
chng = [ 
['","',','], # chng "," on , 
['\n"',',"'], # Del new str 
] 
input_file = open(out_fln).read() 
output_file = open(out_fln,'w') 

for string in chng: 
    input_file = re.sub(str(string[0]),str(string[1]),input_file) 

output_file.write(input_file) 
output_file.close() 
print ("All done") 
관련 문제