2014년 8월 12일 화요일

[HIVE] UDF를 이용하여 누적구하기



- hive 누적 공유jar git정보
https://github.com/nexr/hive-udf.git
https://github.com/LEETAEKYUNG/hive-udf

- hive jar 추가방법
add jar /home/hadoop/snap/nexr-hive-udf-0.2-SNAPSHOT.jar;
CREATE TEMPORARY FUNCTION cum_sum AS 'com.nexr.platform.hive.udf.GenericUDFSum';
CREATE FUNCTION cumulativeSum AS 'com.nexr.platform.hive.udf.GenericUDFSum' using jar 'hdfs:///user/hive/warehouse/jar/nexr-hive-udf-0.2-SNAPSHOT.jar';


- 쿼리
insert into table emp
select *
  from (
select '1','dt01','2000' union all
select '2','dt01','3000' union all
select '3','dt01','4000' union all
select '4','dt01','5000' union all
select '5','dt01','3500'
       )t
;

select t.empno, t.deptno, t.salary, cum_sum(hash(t.deptno), t.salary) as sal_sum
  from (
select a.empno, a.deptno, a.salary from emp a
distribute by hash(a.deptno)
sort by a.deptno, a.empno
) t

댓글 없음 :

댓글 쓰기