Presto 与 Hive 使用差异
最近常用到 Presto,发现与 Hive 语法有一些差异,特此记录,持续更新……
语法差异
rlike
Hive 中使用 rlike
或者 regexp
匹配正则表达式,而 Presto 不支持 rlike
和 regexp
语法,而是用 regexp_like
达到同样效果
-- Hive 语法
SELECT 'Hadoop' RLIKE '^H.*';
-- 结果:true
SELECT 'Hadoop' REGEXP '^H.*';
-- 结果:true
-- Presto 语法
-- regexp_like(string, pattern) → boolean
SELECT REGEXP_LIKE('Hadoop','^H.*')
-- 结果: true
注意:
Hive 的
rlike
和regexp
中,\d
需要转义成\\d
,\d
可以用[0-9]
代替,不用转义
json
Hive 中解析 JSON 有 get_json_object
和 json_tuple
方式,而 Presto 中使用 json_extract
及 json_extract_scalar
解析 JSON
测试数据
{
"store": {
"fruit": [{
"weight": 8,
"type": "apple"
}, {
"weight": 9,
"type": "pear"
}],
"bicycle": {
"price": 19.95,
"color": "red"
}
},
"email": "amy@only_for_json_udf_test.net",
"owner": "amy"
}
在 Hive 中建立测试表
CREATE TABLE json_test(
json_str STRING COMMENT 'JSON 字符串'
) COMMENT 'JSON 测试表';
插入转义后的测试数据
INSERT INTO json_test(json_str)
VALUES("{\"store\":{\"fruit\":[{\"weight\":8,\"type\":\"apple\"},{\"weight\":9,\"type\":\"pear\"}],\"bicycle\":{\"price\":19.95,\"color\":\"red\"}},\"email\":\"amy@only_for_json_udf_test.net\",\"owner\":\"amy\"}");
Hive 语法
-- Hive 查询单个字段
SELECT
GET_JSON_OBJECT(json_str,'$.owner')
FROM
json_test;
-- 结果:amy
-- Hive 查询数组
SELECT
get_json_object(json_str, '$.store.fruit\[0]')
FROM
json_test;
-- 结果:{"weight":8,"type":"apple"}
-- Hive json_tuple 方式解析 JSON
SELECT
j.owner,
j.email
FROM
json_test
LATERAL VIEW JSON_TUPLE(json_str,'owner','email') j AS owner,email;
-- 结果:amy amy@only_for_json_udf_test.net
-- Hive json_tuple 方式解析嵌套数据
SELECT
a.owner,
a.email,
b.fruit,
b.bicycle
FROM
json_test
LATERAL VIEW JSON_TUPLE(json_str,'owner','email','store') a AS owner,email,store
LATERAL VIEW JSON_TUPLE(a.store,'fruit','bicycle') b AS fruit,bicycle;
-- 结果:amy amy@only_for_json_udf_test.net [{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}] {"price":19.95,"color":"red"}
Presto 语法
-- Presto
-- json_extract(json, json_path) → json
-- 查询单个字段
SELECT
json_extract(json_str, '$.owner')
FROM
json_test;
-- 结果:"amy"
-- 查询数组
SELECT
json_extract(json_str, '$.store.fruit[0]')
FROM
json_test;
-- 结果:{"weight":8,"type":"apple"}
-- json_extract_scalar(json, json_path) → varchar
-- json_extract_scalar 方式,同 json_extract,差别是返回值是字符串
SELECT
json_extract_scalar(json_str, '$.owner')
FROM
json_test;
-- 结果:amy
SELECT
json_extract_scalar(json_str, '$.store.fruit[0]')
FROM
json_test;
-- 结果:NULL
SELECT
json_extract_scalar(json_str, '$.store.fruit[0].type')
FROM
json_test;
-- 结果:apple
参数差异
date_add
Hive 中日期加减函数与 Presto 名称一致,但是参数列表不一致
-- Hive
-- date_add(date/timestamp/string startdate,tinyint/smallint/int days)
SELECT DATE_ADD('2021-01-01',1);
-- 结果:2021-01-02
-- Presto
-- date_add(unit, value, timestamp) → [same as input]
SELECT DATE_ADD('day',1,DATE '2021-01-01')
-- 结果:2021-01-02
-- 另一种算法
SELECT DATE '2021-01-01' + INTERVAL '1' DAY
-- 结果:2021-01-02
参考文档:
Hive LanguageManual UDF
Presto JSON Functions and Operators