0%

Hive SQL Cheatsheet

database

1
2
3
4
5
6
show databases;

show tables in default;

-- 使用默认库
use default ;

Table信息

1
2
3
desc tmp_table;

show create table tmp_namespace.tmp_table;

DDL

删除表

1
drop table if exists tmp_table;

创建表

1
2
3
4
5
6
7
8
9
10
create table tmp_table(
name string,
hobby array<string>,
add map<String,string>
)
partitioned by (dt string comment 'date', hm string comment 'hour and minut')
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
collection items terminated by '-'
map keys terminated by ':'
;

创建分区

1
alter table tmp_table add partition(dt='20190101', hm='0000') partition(dt='20190101', hm='0100');

插入数据

1
2
3
4
5
6
7
8
9
insert into table tmp_table partition(dt='20190101', hm='0000')  
select 'a', array('sing,dance'), str_to_map('a:1,b:2') ;
```

查看
```sql
desc tmp_table;
show create table tmp_table;
show partitions tmp_table;

字符串

替换

1
select regexp_replace('[a,b,c]','\\[|\\]','') -- 把方括号替换

多行拼接

1
2
3
4
-- slow, why?
select concat_ws(',',collect_set(val)) from
dual
lateral view explode(array('a','b','c','a')) tmp_table as val;

日期

1
2
3
4
5
6
7
8
9
select unix_timestamp();  -- 当前时间戳:1572425940
select current_timestamp(); -- 当前日期:2019-10-30 16:59:24.24
select date_add('2019-10-31', 1); -- tomorrow
select date_sub('2019-10-31', 1); -- yesterday

select unix_timestamp('2018-06-29 00:00:00'); -- string to timestamp: 1530201600
select unix_timestamp('2018/06/29 09', 'yyyy/MM/dd HH'); -- string to timestamp: 1530234000

select from_unixtime(1356768454, 'yyyy-MM-dd HH:mm:ss'); -- timestamp to string

内置UDTF

查看Function/UDF

1
2
3
SHOW FUNCTIONS;
DESCRIBE FUNCTION <function_name>;
DESCRIBE FUNCTION EXTENDED <function_name>;

数组转表

1
2
3
4
5
select  explode(array('a','b','c')) as col;

select table_b.col
from dual
lateral view explode(array('a','b','c','a')) table_b as col; -- UDTF cannot be used in select

JSON转表

1
2
3
select get_json_object('{"a":1,"b":2}','$.a') as col;

select get_json_object('{"list":[{"a":1},{"a":11},{"a":111}]}','$.list.a') ;
1
2
3
select * 
from tmp_table
lateral view json_tuple('{"a":1,"b":2}','a','b') tmp_json as col_a, col_b;