2017-11-09 2 views
0

쓰기 I가 I이 regex하여이 문자열을 분할 할 수 있습니다 방법분할 문자열과 새로운 라인

[[["RE 3364",1140509724,714348396,"84/149614/18/19/80","6",8,"","Eberswalde Hbf",[[-35,-27,-1064,"4","82",null,null],[711,639,2823,"5","81",null,null],[1151,1043,5155,"5","83",null,null],[2383,2230,11893,"5","83",null,null],[4019,3731,20530,"5","82",null,null],[5637,5232,29168,"5","83",null,null],[7273,6733,37806,"","0",null,null]],"Berlin-Lichtenberg","8010036","Bernau(b Berlin)","8013470","09.11.17","-1",null,"1:37","1:18",null,null,"4",null,null],["RB 18642",354496333,422441800,"84/147727/18/19/80","14",8,"","Nauen",[[6329,-1007,-4763,"14","66",null,null],[4962,-791,403,"14","66",null,null],[3686,-594,5192,"14","66",null,null],[3227,-522,6914,"14","66",null,null],[1942,-324,11757,"14","66",null,null],[872,-144,15793,"14","66",null,null],[-1932,314,26394,"20","62",null,null],[-2076,224,27147,"13","126",null,null],[-3425,593,30000,"","0",null,null],[-3425,593,31389,"14","121",null,null],[-4099,710,32779,"14","121",null,null],[-6939,1168,38664,"","0",null,null]],"Berlin-Spandau","8010404","Albrechtshof","8080040","09.11.17","-1",null,"1:32","1:29",null,null,"4",null,null],["01:29:30",2,35000,5000,"guiV=4.1.3&","20171109","69869174432dcbb13e038c953c9a7cc9","09.11.17","11:06:30",0]],[]] 

(BeautifulSoup로에서)는 다음과 같은 문자열? \[\"+[A-Z] (완전히 정확하지는 않음) 분할 된 문자열을 새 줄에 쓰시겠습니까?

[[["RE 3364",1140509724,714348396,"84/149614/18/19/80","6",8,"","Eberswalde Hbf",[[-35,-27,-1064,"4","82",null,null],[711,639,2823,"5","81",null,null],[1151,1043,5155,"5","83",null,null],[2383,2230,11893,"5","83",null,null],[4019,3731,20530,"5","82",null,null],[5637,5232,29168,"5","83",null,null],[7273,6733,37806,"","0",null,null]],"Berlin-Lichtenberg","8010036","Bernau(b Berlin)","8013470","09.11.17","-1",null,"1:37","1:18",null,null,"4",null,null] 

["RB 18642",354496333,422441800,"84/147727/18/19/80","14",8,"","Nauen",[[6329,-1007,-4763,"14","66",null,null],[4962,-791,403,"14","66",null,null],[3686,-594,5192,"14","66",null,null],[3227,-522,6914,"14","66",null,null],[1942,-324,11757,"14","66",null,null],[872,-144,15793,"14","66",null,null],[-1932,314,26394,"20","62",null,null],[-2076,224,27147,"13","126",null,null],[-3425,593,30000,"","0",null,null],[-3425,593,31389,"14","121",null,null],[-4099,710,32779,"14","121",null,null],[-6939,1168,38664,"","0",null,null]],"Berlin-Spandau","8010404","Albrechtshof","8080040","09.11.17","-1",null,"1:32","1:29",null,null,"4",null,null],["01:29:30",2,35000,5000,"guiV=4.1.3&","20171109","69869174432dcbb13e038c953c9a7cc9","09.11.17","11:06:30",0]],[]] 

그런 다음이 정규식과 re.split를 사용하여 새 행에 선을 작성합니다.

+0

문자열 또는 목록 목록입니까? –

+0

그 문자열. 문자열의'[]'는리스트와 관련이 없습니다. –

답변

0

, json.loads() 사용할 수 있습니다 :

[u'RE 3364', 1140509724, 714348396, u'84/149614/18/19/80', u'6', 8, u'', u'Eberswalde Hbf', [[-35, -27, -1064, u'4', u'82', None, None], [711, 639, 2823, u'5', u'81', None, None], [1151, 1043, 5155, u'5', u'83', None, None], [2383, 2230, 11893, u'5', u'83', None, None], [4019, 3731, 20530, u'5', u'82', None, None], [5637, 5232, 29168, u'5', u'83', None, None], [7273, 6733, 37806, u'', u'0', None, None]], u'Berlin-Lichtenberg', u'8010036', u'Bernau(b Berlin)', u'8013470', u'09.11.17', u'-1', None, u'1:37', u'1:18', None, None, u'4', None, None] 
:

import json 

text = """[[["RE 3364",1140509724,714348396,"84/149614/18/19/80","6",8,"","Eberswalde Hbf",[[-35,-27,-1064,"4","82",null,null],[711,639,2823,"5","81",null,null],[1151,1043,5155,"5","83",null,null],[2383,2230,11893,"5","83",null,null],[4019,3731,20530,"5","82",null,null],[5637,5232,29168,"5","83",null,null],[7273,6733,37806,"","0",null,null]],"Berlin-Lichtenberg","8010036","Bernau(b Berlin)","8013470","09.11.17","-1",null,"1:37","1:18",null,null,"4",null,null]""" 
data = json.loads(text[2:])  
print data  

당신에게 다음과 같은 출력을주기


Mark Amery's 대답에서 도움을

['RE 3364', 1140509724, 714348396, '84/149614/18/19/80', '6', 8, '', 'Eberswalde Hbf', [[-35, -27, -1064, '4', '82', None, None], [711, 639, 2823, '5', '81', None, None], [1151, 1043, 5155, '5', '83', None, None], [2383, 2230, 11893, '5', '83', None, None], [4019, 3731, 20530, '5', '82', None, None], [5637, 5232, 29168, '5', '83', None, None], [7273, 6733, 37806, '', '0', None, None]], 'Berlin-Lichtenberg', '8010036', 'Bernau(b Berlin)', '8013470', '09.11.17', '-1', None, '1:37', '1:18', None, None, '4', None, None] 

: 당신을주기

def to_strings(nested): 
    if isinstance(nested, dict): 
     return {to_strings(key): to_strings(value) for key, value in nested.iteritems()} 
    elif isinstance(nested, list): 
     return [to_strings(element) for element in nested] 
    elif isinstance(nested, unicode): 
     return nested.encode('utf-8') 
    else: 
     return nested 

print to_strings(data) 

: (210)

는 유니 코드에서 반환 된 구조를 변환하려면, 다음과 같은 기능을 사용할 수 있습니다.

+0

데이터에 'u'가없는 다른 방법이 없다면? –

+0

'u'는 문자열이 파이썬에서 유니 코드 형식으로 저장되었음을 나타냅니다. 비 유니 코드 문자열로 변환 될 수 있지만 잠재적으로 특정 문자가 손실 될 수 있습니다. –

0

아무 것도 나누지 말고 정규식을 작성하지 마십시오. 문자열 일 수도 있지만 JSON처럼 보입니다.

그래서 json.loads으로 읽어 : 첫 번째 두 개의 브래킷을 건너 뛸 경우 다음과 같이

>>> import json 
>>> json.loads('[[["RE 3364",1140509724,714348396,"84/149614/18/19/80","6",8,"","Eberswalde Hbf",[[-35,-27,-1064,"4","82",null,null],[711,639,2823,"5","81",null,null],[1151,1043,5155,"5","83",null,null],[2383,2230,11893,"5","83",null,null],[4019,3731,20530,"5","82",null,null],[5637,5232,29168,"5","83",null,null],[7273,6733,37806,"","0",null,null]],"Berlin-Lichtenberg","8010036","Bernau(b Berlin)","8013470","09.11.17","-1",null,"1:37","1:18",null,null,"4",null,null],["RB 18642",354496333,422441800,"84/147727/18/19/80","14",8,"","Nauen",[[6329,-1007,-4763,"14","66",null,null],[4962,-791,403,"14","66",null,null],[3686,-594,5192,"14","66",null,null],[3227,-522,6914,"14","66",null,null],[1942,-324,11757,"14","66",null,null],[872,-144,15793,"14","66",null,null],[-1932,314,26394,"20","62",null,null],[-2076,224,27147,"13","126",null,null],[-3425,593,30000,"","0",null,null],[-3425,593,31389,"14","121",null,null],[-4099,710,32779,"14","121",null,null],[-6939,1168,38664,"","0",null,null]],"Berlin-Spandau","8010404","Albrechtshof","8080040","09.11.17","-1",null,"1:32","1:29",null,null,"4",null,null],["01:29:30",2,35000,5000,"guiV=4.1.3&","20171109","69869174432dcbb13e038c953c9a7cc9","09.11.17","11:06:30",0]],[]]') 
[[['RE 3364', 1140509724, 714348396, '84/149614/18/19/80', '6', 8, '', 'Eberswalde Hbf', [[-35, -27, -1064, '4', '82', None, None], [711, 639, 2823, '5', '81', None, None], [1151, 1043, 5155, '5', '83', None, None], [2383, 2230, 11893, '5', '83', None, None], [4019, 3731, 20530, '5', '82', None, None], [5637, 5232, 29168, '5', '83', None, None], [7273, 6733, 37806, '', '0', None, None]], 'Berlin-Lichtenberg', '8010036', 'Bernau(b Berlin)', '8013470', '09.11.17', '-1', None, '1:37', '1:18', None, None, '4', None, None], ['RB 18642', 354496333, 422441800, '84/147727/18/19/80', '14', 8, '', 'Nauen', [[6329, -1007, -4763, '14', '66', None, None], [4962, -791, 403, '14', '66', None, None], [3686, -594, 5192, '14', '66', None, None], [3227, -522, 6914, '14', '66', None, None], [1942, -324, 11757, '14', '66', None, None], [872, -144, 15793, '14', '66', None, None], [-1932, 314, 26394, '20', '62', None, None], [-2076, 224, 27147, '13', '126', None, None], [-3425, 593, 30000, '', '0', None, None], [-3425, 593, 31389, '14', '121', None, None], [-4099, 710, 32779, '14', '121', None, None], [-6939, 1168, 38664, '', '0', None, None]], 'Berlin-Spandau', '8010404', 'Albrechtshof', '8080040', '09.11.17', '-1', None, '1:32', '1:29', None, None, '4', None, None], ['01:29:30', 2, 35000, 5000, 'guiV=4.1.3&', '20171109', '69869174432dcbb13e038c953c9a7cc9', '09.11.17', '11:06:30', 0]], []]