Hive Sql


1.创建复杂类型sql表

CREATE TABLE `person`(
  `name` string COMMENT 'name',
  `work_locations` array COMMENT 'work_locations',
  `score` map COMMENT 'score',
  `course` struct COMMENT 'course')
ROW FORMAT DELIMITED
  FIELDS TERMINATED BY '\t'
  COLLECTION ITEMS TERMINATED BY ','
  MAP KEYS TERMINATED BY ':'
STORED AS INPUTFORMAT
  'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
  'hdfs://x/user/yy/tmp.db/person'
TBLPROPERTIES (
  'transient_lastDdlTime'='1646984872')
LOAD DATA LOCAL INPATH '/home/hadoop/person.txt' OVERWRITE INTO TABLE person;

2.literal explode

select 
	dt
	,count(distinct id) as uv
	,count(1) as pv
from 
(
	select
		dt
		,id
		,pr
	from t1
	where dt >= '02-31' and dt <= '10-01'
	and eid = 'XXX'
	and split(pr,'#')[0] in ('qqq')
)table_a LATERAL VIEW explode(split(pr, '_')) ad as v
where split(v,'\\$')[1] in ('12','23')
group by dt

3.合并小文件

1.合并csv 小文件:

hadoop fs -getmerge /user//part-* a.txt

2.中文显示乱码

iconv -f UTF-8 -t gb18030 a.txt -o ./res.csv

https://blog.csdn.net/u010670689/article/details/72885944

相关