apache kylin

贴一篇guide
启动

$KYLIN_HOME/bin/check-env.sh
$KYLIN_HOME/bin/kylin.sh start

运行例子

$KYLIN_HOME/bin/sample.sh

默认运行在localhost:7070/kylin
ADMIN/KYLIN

在启动kylin的时候遇到了很多坑,最坑的莫过于这个

Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient

报了java的runtimeException
然后恶心了好久
最后发现原因是因为
kylin在使用hive的时候要启动一个hive线程,而这个线程只能存在一个
所以,当你把hive搭好,执行hive命令跑起来以后,要用kylin了,要把之前这个hive进程停掉先。。。

查询

select car_dt,max(pm10) as maxpm10,min(pm10) as minpm10,max(pm25) as maxpm25,min(pm25) as minpm25 from SAMPLE_CAR_DATA where speed>100 group by car_dt,position order by car_dt

select position,max(pm10) as maxpm10,sum(pm10) as sumpm10,max(pm25) as maxpm25,sum(pm25) as sumpm25 from SAMPLE_CAR_DATA where speed>100 group by position order by position

select car_dt,max(pm10) as maxpm10,sum(pm10) as sumpm10,max(pm25) as maxpm25,sum(pm25) as sumpm25,max(co) as maxco,sum(co) as sumco from SAMPLE_CAR_DATA where speed>80 group by car_dt order by car_dt

select car_dt,count(car_dt) as dts,sum(pm10) as sumpm10,sum(pm25) as sumpm25,sum(co) as sumco from SAMPLE_CAR_DATA where speed>50  group by car_dt order by car_dt  

country,city

select car_dt,country,city,count(city) as dts,sum(pm25) as sumpm25,sum(co) as sumco from SAMPLE_CAR_DATA where speed>50  group by car_dt,country,city order by car_dt,country,city

kylin 3.7s,hive 41s

select car_dt,country,count(city) as dts,sum(pm25) as sumpm25,sum(co) as sumco from SAMPLE_CAR_DATA where speed>50  group by car_dt,country order by car_dt,country

kylin 0.6s,hive 34s

select car_dt,country,city,count(city) as dts,sum(pm25) as sumpm25,sum(co) as sumco from car where speed>50 and city like '%i%'  group by car_dt,country,city order by car_dt,country,city
kylin 0.5s hive 39s

100w

select car_dt,country,city,count(city) as dts,sum(pm25) as sumpm25,sum(co) as sumco,avg(speed) as avaSpeed  from car where speed>30 and city like '%h%'  group by car_dt,country,city order by car_dt,country,city

kylin 0.8s hive 41s