하이브 JSON SerDe를 사용하여 Twitter JSON을 하이브 테이블에 넣으려고합니다. 먼저 ROW FORMAT SERDE로 정의 된 하나의 테이블에 JSON을 가져온 다음 RCFile로 저장된 다른 테이블로 가져옵니다.Hive JSON SerDe - ClassCastException : java.lang.Integer를 java.lang.Double에 캐스팅 할 수 없습니다.
CREATE EXTERNAL TABLE gh_raw (
coordinates struct <
coordinates: array <double>,
type: string>,
created_at string,
entities struct <
hashtags: array <struct <text: string>>,
media: array <struct <
display_url: string,
expanded_url: string,
media_url: string,
media_url_https: string,
sizes: struct <
large: struct <
h: int,
resize: string,
w: int>,
medium: struct <
h: int,
resize: string,
w: int>,
small: struct <
h: int,
resize: string,
w: int>,
thumb: struct <
h: int,
resize: string,
w: int>>,
type: string,
url: string>>,
urls: array <struct <
display_url: string,
expanded_url: string,
url: string>>,
user_mentions: array <struct <
id: int,
name: string,
screen_name: string>>>,
geo struct <
coordinates: array <double>,
type: string>,
id_str string,
in_reply_to_screen_name string,
in_reply_to_status_id_str string,
in_reply_to_user_id_str string,
place struct <
attributes: struct <
locality: string,
region: string,
street_address: string>,
bounding_box: struct <
coordinates: array <array <array <double>>>,
type: string>,
country: string,
country_code: string,
full_name: string,
name: string,
place_type: string,
url: string>,
possibly_sensitive boolean,
retweeted_status struct <
coordinates: struct <
coordinates: array <double>,
type: string>,
created_at: string,
entities: struct <
hashtags: array <struct <
text: string>>,
media: array <struct <
display_url: string,
expanded_url: string,
media_url: string,
media_url_https: string,
sizes: struct <
large: struct <
h: int,
resize: string,
w: int>,
medium: struct <
h: int,
resize: string,
w: int>,
small: struct <
h: int,
resize: string,
w: int>,
thumb: struct <
h: int,
resize: string,
w: int>>,
type: string,
url: string>>,
urls: array <struct <
display_url: string,
expanded_url: string,
url: string>>,
user_mentions: array <struct <
id: int,
name: string,
screen_name: string>>>,
favorited: boolean,
geo: struct <
coordinates: array <double>,
type: string>,
id_str: string,
in_reply_to_screen_name: string,
in_reply_to_status_id_str: string,
in_reply_to_user_id_str: string,
place: struct <
attributes: struct <
locality: string,
region: string,
street_address: string
>,
bounding_box: struct <
coordinates: array <array <array <double>>>,
type: string>,
country: string,
country_code: string,
full_name: string,
name: string,
place_type: string,
url: string>,
possibly_sensitive: boolean,
scopes: struct <
followers: boolean>,
source: string,
text: string,
truncated: boolean,
user: struct <
contributors_enabled: boolean,
created_at: string,
default_profile: boolean,
default_profile_image: boolean,
description: string,
favourites_count: int,
followers_count: int,
friends_count: int,
geo_enabled: boolean,
id: int,
id_str: string,
is_translator: boolean,
lang: string,
listed_count: int,
`location`: string,
name: string,
profile_background_color: string,
profile_background_image_url: string,
profile_background_image_url_https: string,
profile_background_tile: boolean,
profile_banner_url: string,
profile_image_url: string,
profile_image_url_https: string,
profile_link_color: string,
profile_sidebar_border_color: string,
profile_sidebar_fill_color: string,
profile_text_color: string,
profile_use_background_image: boolean,
protected: boolean,
screen_name: string,
statuses_count: int,
time_zone: string,
url: string,
utc_offset: int,
verified: boolean>>,
source string,
text string,
truncated boolean,
user struct <
contributors_enabled: boolean,
created_at: string,
default_profile: boolean,
default_profile_image: boolean,
description: string,
favourites_count: int,
followers_count: int,
friends_count: int,
geo_enabled: boolean,
id: int,
id_str: string,
is_translator: boolean,
lang: string,
listed_count: int,
`location`: string,
name: string,
profile_background_color: string,
profile_background_image_url: string,
profile_background_image_url_https: string,
profile_background_tile: boolean,
profile_banner_url: string,
profile_image_url: string,
profile_image_url_https: string,
profile_link_color: string,
profile_sidebar_border_color: string,
profile_sidebar_fill_color: string,
profile_text_color: string,
profile_use_background_image: boolean,
protected: boolean,
screen_name: string,
statuses_count: int,
time_zone: string,
url: string,
utc_offset: int,
verified: boolean>
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
LOCATION '/user/ahanna/gh_raw';
나는이 때 충돌되는 도표 : 그것은 시점까지 작동하지만 나는 다음과 같은 자연의 ClassCastException이 얻을 :
java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row [Error getting row data with exception java.lang.ClassCastException: java.lang.Integer cannot be cast to java.lang.Double
at org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaDoubleObjectInspector.get(JavaDoubleObjectInspector.java:40)
at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:259)
at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:307)
at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:354)
at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:354)
at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:354)
at org.apache.hadoop.hive.serde2.SerDeUtils.getJSONString(SerDeUtils.java:220)
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:667)
at org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:141)
at org.apache.hadoop
여기가 SerDe 테이블을 정의하기 위해 사용하고 스키마의를 좌표 세트 또는 바운딩 박스를 찾습니다.
제가 사용하고있는 JSON SerDe의 버그라고 생각하지만 잘 모르겠습니다. 나는이 문제를 고쳤다 고 말한 사람으로부터 맨 처음부터 사용하고있는 것을 컴파일했지만, 아무 것도하지 않습니다 : https://github.com/brndnmtthws/Hive-JSON-Serde