2014-01-13 6 views
2

하이브 JSON SerDe를 사용하여 Twitter JSON을 하이브 테이블에 넣으려고합니다. 먼저 ROW FORMAT SERDE로 정의 된 하나의 테이블에 JSON을 가져온 다음 RCFile로 저장된 다른 테이블로 가져옵니다.Hive JSON SerDe - ClassCastException : java.lang.Integer를 java.lang.Double에 캐스팅 할 수 없습니다.

CREATE EXTERNAL TABLE gh_raw (
    coordinates struct < 
     coordinates: array <double>, 
     type: string>, 
    created_at string, 
    entities struct < 
     hashtags: array <struct <text: string>>, 
     media: array <struct < 
      display_url: string, 
      expanded_url: string, 
      media_url: string, 
      media_url_https: string, 
      sizes: struct < 
       large: struct < 
        h: int, 
        resize: string, 
        w: int>, 
       medium: struct < 
        h: int, 
        resize: string, 
        w: int>, 
       small: struct < 
        h: int, 
        resize: string, 
        w: int>, 
       thumb: struct < 
        h: int, 
        resize: string, 
        w: int>>, 
      type: string, 
      url: string>>, 
     urls: array <struct < 
      display_url: string, 
      expanded_url: string, 
      url: string>>, 
     user_mentions: array <struct < 
      id: int, 
      name: string, 
      screen_name: string>>>, 
    geo struct < 
     coordinates: array <double>, 
     type: string>, 
    id_str string, 
    in_reply_to_screen_name string, 
    in_reply_to_status_id_str string, 
    in_reply_to_user_id_str string, 
    place struct < 
     attributes: struct < 
     locality: string, 
     region: string, 
     street_address: string>, 
     bounding_box: struct < 
     coordinates: array <array <array <double>>>, 
     type: string>, 
     country: string, 
     country_code: string, 
     full_name: string, 
     name: string, 
     place_type: string, 
     url: string>, 
    possibly_sensitive boolean, 
    retweeted_status struct < 
     coordinates: struct < 
     coordinates: array <double>, 
     type: string>, 
     created_at: string, 
     entities: struct < 
     hashtags: array <struct < 
       text: string>>, 
     media: array <struct < 
       display_url: string, 
       expanded_url: string, 
       media_url: string, 
       media_url_https: string, 
       sizes: struct < 
        large: struct < 
        h: int, 
        resize: string, 
        w: int>, 
        medium: struct < 
        h: int, 
        resize: string, 
        w: int>, 
        small: struct < 
        h: int, 
        resize: string, 
        w: int>, 
        thumb: struct < 
        h: int, 
        resize: string, 
        w: int>>, 
       type: string, 
       url: string>>, 
     urls: array <struct < 
       display_url: string, 
       expanded_url: string, 
       url: string>>, 
     user_mentions: array <struct < 
       id: int, 
       name: string, 
       screen_name: string>>>, 
     favorited: boolean, 
     geo: struct < 
     coordinates: array <double>, 
     type: string>, 
     id_str: string, 
     in_reply_to_screen_name: string, 
     in_reply_to_status_id_str: string, 
     in_reply_to_user_id_str: string, 
     place: struct < 
     attributes: struct < 
     locality: string, 
     region: string, 
     street_address: string 
     >, 
     bounding_box: struct < 
      coordinates: array <array <array <double>>>, 
      type: string>, 
     country: string, 
     country_code: string, 
     full_name: string, 
     name: string, 
     place_type: string, 
     url: string>, 
     possibly_sensitive: boolean, 
     scopes: struct < 
     followers: boolean>, 
     source: string, 
     text: string, 
     truncated: boolean, 
     user: struct < 
     contributors_enabled: boolean, 
     created_at: string, 
     default_profile: boolean, 
     default_profile_image: boolean, 
     description: string, 
     favourites_count: int, 
     followers_count: int, 
     friends_count: int, 
     geo_enabled: boolean, 
     id: int, 
     id_str: string, 
     is_translator: boolean, 
     lang: string, 
     listed_count: int, 
     `location`: string, 
     name: string, 
     profile_background_color: string, 
     profile_background_image_url: string, 
     profile_background_image_url_https: string, 
     profile_background_tile: boolean, 
     profile_banner_url: string, 
     profile_image_url: string, 
     profile_image_url_https: string, 
     profile_link_color: string, 
     profile_sidebar_border_color: string, 
     profile_sidebar_fill_color: string, 
     profile_text_color: string, 
     profile_use_background_image: boolean, 
     protected: boolean, 
     screen_name: string, 
     statuses_count: int, 
     time_zone: string, 
     url: string, 
     utc_offset: int, 
     verified: boolean>>, 
    source string, 
    text string, 
    truncated boolean, 
    user struct < 
     contributors_enabled: boolean, 
     created_at: string, 
     default_profile: boolean, 
     default_profile_image: boolean, 
     description: string, 
     favourites_count: int, 
     followers_count: int, 
     friends_count: int, 
     geo_enabled: boolean, 
     id: int, 
     id_str: string, 
     is_translator: boolean, 
     lang: string, 
     listed_count: int, 
     `location`: string, 
     name: string, 
     profile_background_color: string, 
     profile_background_image_url: string, 
     profile_background_image_url_https: string, 
     profile_background_tile: boolean, 
     profile_banner_url: string, 
     profile_image_url: string, 
     profile_image_url_https: string, 
     profile_link_color: string, 
     profile_sidebar_border_color: string, 
     profile_sidebar_fill_color: string, 
     profile_text_color: string, 
     profile_use_background_image: boolean, 
     protected: boolean, 
     screen_name: string, 
     statuses_count: int, 
     time_zone: string, 
     url: string, 
     utc_offset: int, 
     verified: boolean> 
) 
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe' 
LOCATION '/user/ahanna/gh_raw'; 

나는이 때 충돌되는 도표 : 그것은 시점까지 작동하지만 나는 다음과 같은 자연의 ClassCastException이 얻을 :

java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row [Error getting row data with exception java.lang.ClassCastException: java.lang.Integer cannot be cast to java.lang.Double 
    at org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaDoubleObjectInspector.get(JavaDoubleObjectInspector.java:40) 
    at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:259) 
    at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:307) 
    at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:354) 
    at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:354) 
    at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:354) 
    at org.apache.hadoop.hive.serde2.SerDeUtils.getJSONString(SerDeUtils.java:220) 
    at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:667) 
    at org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:141) 
    at org.apache.hadoop 

여기가 SerDe 테이블을 정의하기 위해 사용하고 스키마의를 좌표 세트 또는 바운딩 박스를 찾습니다.

제가 사용하고있는 JSON SerDe의 버그라고 생각하지만 잘 모르겠습니다. 나는이 문제를 고쳤다 고 말한 사람으로부터 맨 처음부터 사용하고있는 것을 컴파일했지만, 아무 것도하지 않습니다 : https://github.com/brndnmtthws/Hive-JSON-Serde

답변

0

해보십시오 BIGINT 대신 지능. 그것은 나를 위해 작동합니다.

관련 문제