-
Notifications
You must be signed in to change notification settings - Fork 0
/
links.hql
44 lines (38 loc) · 1.33 KB
/
links.hql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
-- This file will create an ORC table with movie links
drop table if exists nprabhu_movie_links_csv;
-- First, map the raw CSV data in Hive
create external table nprabhu_movie_links_csv(
MovieId int,
ImdbId int,
TmdbId int)
row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES (
"separatorChar" = "\,"
)
STORED AS TEXTFILE
location '/nprabhu/inputs/movie_links'
tblproperties ("skip.header.line.count"="1");
-- Create an ORC table for movie links data (Note "stored as ORC" at the end)
drop table if exists nprabhu_movie_links_hive;
create table nprabhu_movie_links_hive(
MovieId int,
ImdbId int,
TmdbId int)
stored as orc;
-- Copy the CSV table to the ORC table
insert overwrite table nprabhu_movie_links_hive
select * from nprabhu_movie_links_csv;
-- Create mapping table in hbase used for streaming-layer
drop table if exists nprabhu_movie_links_hbase;
create external table nprabhu_movie_links_hbase(
MovieId int,
ImdbId int)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ('hbase.columns.mapping' =
':key,
links:imdbid'
)
TBLPROPERTIES ('hbase.table.name' = 'nprabhu_movie_links_hbase');
-- Populate hbase table from link table in hive
insert overwrite table nprabhu_movie_links_hbase
select MovieId, ImdbId from nprabhu_movie_links_hive;