2013-08-13 4 views
3

다음 JSON을 파싱하려고합니다. json 객체 디코딩 오류

>>> string1 
u'{"content":{"search_highlight":{},"message_exchanged":{"messagesOnlyToViewee":true,"messagesOnlyToViewer":true},"Certifications":{"certsMpr":{},"empty":{}},"lix_treasury_callout":"B","network_overview":{"img_overview_locked":"http://s.c.lnkd.licdn.com/scds/common/u/img/pic/pic_network_overview_locked_178x276.png","lix_showDetail":"control"},"Projects":{"empty":{},"projectsMpr":{}},"lix_discovery_order":"control","Volunteering":{"volunteer":{},"empty":{}},"lix_treasury_upload":"B","connections":{},"view_tracking":{},"Badge":{"badges":{},"empty":{}},"Patents":{"patentsMpr":{},"empty":{}},"Publications":{"empty":{},"pubsMpr":{}},"Summary":{"summary":{"deferImg":true,"hasSummaryOrSpecialties":false,"visible":true,"showSummarySection":false,"associatedWith":{}},"empty":{}},"Notes":{},"frontierajaxform__text_plain__there_were":"There were one or more errors in your submission. Please correct the marked fields below.","ContactInfo":{"distance":{"distance":\\u002d1,"numberOfConnections":2},"contact_info":{"deferImg":true,"showTwitter":true,"visible":true}},"Following":{"follow_channels":{},"follow_school":{},"follow_people":{"count":0,"viewee":{"id":29841231},"influencerSeeMore":\\u002d7,"influencers":[]},"follow":{"i18n_following_section_label":"Following","i18n_currently_following":"Following","isCondensed":true,"companyFolloweeCount":1,"industryFolloweeCount":1,"i18n_unfollow":"Unfollow","industryFollowees":[{"link_industry":"/today/insurance?trk=prof\\u002dfollowing\\u002dindustry\\u002dicon","universalName":"insurance","id":42,"i18n_x_followers":"93,628 followers","ind_follow":"/lite/follow?type=INDUSTRY&id=42&csrfToken=ajax%3A1584468784299534813&goback=%2Enpv_29841231_*1_*1_NAME*4SEARCH_AcEA_*1_en*4US_*1_*1_*1_123452511375704563981_2_7_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1","canonicalName":"Insurance","fmt_following_count":"93,628","ind_unfollow":"/lite/unfollow?type=INDUSTRY&id=42&csrfToken=ajax%3A1584468784299534813&goback=%2Enpv_29841231_*1_*1_NAME*4SEARCH_AcEA_*1_en*4US_*1_*1_*1_123452511375704563981_2_7_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1","isShared":false}],"i18n_follow":"Follow","i18n_see_less":"See less","companyFollowees":[{"link_biz":"/company/metlife?trk=prof\\u002dfollowing\\u002dcompany\\u002dlogo","universalName":"metlife","id":2213,"logo":"http://m.c.lnkd.licdn.com/media/p/2/000/021/14e/05da35a.png","canonicalName":"MetLife","biz_follow":"/company/follow/submit?id=2213&csrfToken=ajax%3A1584468784299534813&goback=%2Enpv_29841231_*1_*1_NAME*4SEARCH_AcEA_*1_en*4US_*1_*1_*1_123452511375704563981_2_7_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1","ind_lookup":"Insurance","isShared":false,"logoId":"/p/2/000/021/14e/05da35a.png"}],"i18n_news":"News","isFollowing":true,"lix_profile_showChannels":"control"}},"BasicInfo":{"empty":{},"upsell":{"deferImg":true,"visible":true},"basic_info":{"showTopCardDetail":true,"visible":true,"phoneticname":"","i18n__Industry":"Industry","industry_pivot":"/search?search=&industry=42&sortCriteria=R&keepFacets=true&trk=prof\\u002d0\\u002dovw\\u002dindustry","find_others_region":"Find other members in Mumbai Area, India","headline_highlight":"Manager at Metlife","i18n__find_others_in_industry":"Find other members in this industry","i18n_Edit":"Edit"}}}}' 

하지만 내가 말할 때로드 할 것

>>> import re 
>>> import json 
>>> nw = json.loads(string1) 

Traceback (most recent call last): 
    File "<pyshell#33>", line 1, in <module> 
    nw = json.loads(string1) 
    File "C:\Python27\lib\json\__init__.py", line 338, in loads 
    return _default_decoder.decode(s) 
    File "C:\Python27\lib\json\decoder.py", line 365, in decode 
    obj, end = self.raw_decode(s, idx=_w(s, 0).end()) 
    File "C:\Python27\lib\json\decoder.py", line 383, in raw_decode 
    raise ValueError("No JSON object could be decoded") 
ValueError: No JSON object could be decoded 

감사

+0

\ o/예, 실제로 깨진 실제 JSON 주석을 발견했습니다! :-P –

답변

4

하여 입력 문자열에 큰 따옴표 문자가 있습니다; 대시는 \u002d 이스케이프 문자열로 바뀌 었습니다. JSON 값은 아니지만 JavaScript에서는 정상적으로 작동합니다.

당신은 수리해야이 첫째 : 당신의 HTML 주석이 끝 주석 --> 문자 시퀀스로 해석되는 - 문자를 방지하기 위해 원 발생, 그래서 그들은 사용하여 대시 탈출 누구든

>>> json.loads(string1.replace(r'\u002d', '-')) 
{u'content': {u'message_exchanged': {u'messagesOnlyToViewee': True, u'messagesOnlyToViewer': True}, u'view_tracking': {}, u'Certifications': {u'certsMpr': {}, u'empty': {}}, u'BasicInfo': {u'upsell': {u'deferImg': True, u'visible': True}, u'empty': {}, u'basic_info': {u'i18n__Industry': u'Industry', u'industry_pivot': u'/search?search=&industry=42&sortCriteria=R&keepFacets=true&trk=prof-0-ovw-industry', u'find_others_region': u'Find other members in Mumbai Area, India', u'headline_highlight': u'Manager at Metlife', u'showTopCardDetail': True, u'i18n_Edit': u'Edit', u'visible': True, u'i18n__find_others_in_industry': u'Find other members in this industry', u'phoneticname': u''}}, u'Volunteering': {u'empty': {}, u'volunteer': {}}, u'frontierajaxform__text_plain__there_were': u'There were one or more errors in your submission. Please correct the marked fields below.', u'lix_treasury_callout': u'B', u'Badge': {u'empty': {}, u'badges': {}}, u'Notes': {}, u'lix_treasury_upload': u'B', u'Summary': {u'empty': {}, u'summary': {u'deferImg': True, u'hasSummaryOrSpecialties': False, u'associatedWith': {}, u'showSummarySection': False, u'visible': True}}, u'connections': {}, u'search_highlight': {}, u'Patents': {u'empty': {}, u'patentsMpr': {}}, u'lix_discovery_order': u'control', u'Following': {u'follow': {u'i18n_see_less': u'See less', u'isFollowing': True, u'i18n_following_section_label': u'Following', u'lix_profile_showChannels': u'control', u'companyFolloweeCount': 1, u'industryFollowees': [{u'ind_unfollow': u'/lite/unfollow?type=INDUSTRY&id=42&csrfToken=ajax%3A1584468784299534813&goback=%2Enpv_29841231_*1_*1_NAME*4SEARCH_AcEA_*1_en*4US_*1_*1_*1_123452511375704563981_2_7_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1', u'canonicalName': u'Insurance', u'link_industry': u'/today/insurance?trk=prof-following-industry-icon', u'i18n_x_followers': u'93,628 followers', u'ind_follow': u'/lite/follow?type=INDUSTRY&id=42&csrfToken=ajax%3A1584468784299534813&goback=%2Enpv_29841231_*1_*1_NAME*4SEARCH_AcEA_*1_en*4US_*1_*1_*1_123452511375704563981_2_7_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1', u'universalName': u'insurance', u'id': 42, u'fmt_following_count': u'93,628', u'isShared': False}], u'i18n_unfollow': u'Unfollow', u'i18n_follow': u'Follow', u'i18n_news': u'News', u'industryFolloweeCount': 1, u'isCondensed': True, u'companyFollowees': [{u'biz_follow': u'/company/follow/submit?id=2213&csrfToken=ajax%3A1584468784299534813&goback=%2Enpv_29841231_*1_*1_NAME*4SEARCH_AcEA_*1_en*4US_*1_*1_*1_123452511375704563981_2_7_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1_*1', u'canonicalName': u'MetLife', u'ind_lookup': u'Insurance', u'logoId': u'/p/2/000/021/14e/05da35a.png', u'logo': u'http://m.c.lnkd.licdn.com/media/p/2/000/021/14e/05da35a.png', u'universalName': u'metlife', u'id': 2213, u'link_biz': u'/company/metlife?trk=prof-following-company-logo', u'isShared': False}], u'i18n_currently_following': u'Following'}, u'follow_people': {u'count': 0, u'influencerSeeMore': -7, u'influencers': [], u'viewee': {u'id': 29841231}}, u'follow_school': {}, u'follow_channels': {}}, u'ContactInfo': {u'distance': {u'distance': -1, u'numberOfConnections': 2}, u'contact_info': {u'deferImg': True, u'showTwitter': True, u'visible': True}}, u'network_overview': {u'lix_showDetail': u'control', u'img_overview_locked': u'http://s.c.lnkd.licdn.com/scds/common/u/img/pic/pic_network_overview_locked_178x276.png'}, u'Projects': {u'projectsMpr': {}, u'empty': {}}, u'Publications': {u'pubsMpr': {}, u'empty': {}}}} 

자바 스크립트 유니 코드 이스케이프 시퀀스.

+0

답변 주셔서 감사합니다. 이러한 문자를보다 동적으로 처리 할 수있는 방법이 있는지 알고 싶습니다. 왜냐하면 내 데이터에서도 '\ u2605'같은 것을 볼 수 있기 때문입니다. – user1946217

+0

이스케이프 코드가 * JSON 문자열 (따옴표 안에) 안에 있으면 JSON의 유효한 값입니다. 문자열의 일부가 아닌 * 경우에만 문제가 있습니다. –

+0

'\ u2605' 문자는'★'별표 문자입니다. ** 문자열 밖에서 ** 표시되지 않습니다. –