select get_json_object(substring(json,2,length(json)-1),'$.userId') from weibo limit 1;
查询需求
微博总量和独立用户数
1 2 3 4 5 6
#总量 select count(*) from weibo; #独立用户数 select count(distinct(get_json_object(a.j,'$.userId'))) from (select substring(json,2,length(json)-1) as j from weibo) a;
用户所有微博被转发的总数,输出前3个用户
使用json_tuple提取多个字段
1 2 3 4 5 6 7 8 9 10
select b.id,sum(b.cnt) as bsum from (select json_tuple(a.j,'userId','reportCount') as (id,cnt) from (select substring(json,2,length(json)-1) as j from weibo) a) b group by b.id order by bsum desc limit 3;
被转发次数最多的前3条微博,输出用户id
1 2 3 4 5 6 7
select get_json_object(a.j,'$.userId') as id, cast(get_json_object(a.j,'$.reportCount') as INT) as cnt from (select substring(json,2,length(json)-1) as j from weibo) a order by cnt desc limit 3;
每个用户发布的微博总数,存储到临时表
1 2 3 4 5 6 7 8 9 10
create table weibo_uid_wbcnt( userid string, wbcnt int ) row format delimited fields terminated by '\t'; insert overwrite table weibo_uid_wbcnt select get_json_object(a.j,'$.userId'),count(1) from (select substring(json,2,length(json)-2) as j from weibo) a group by get_json_object(a.j,'$.userId');
select * from weibo_uid_wbcnt limit 10;
统计带图片的微博数
1 2 3 4
select count(1) from (select substring(json,2,length(json)-2) as j from weibo) a where get_json_object(a.j,'$.pic_list') like '%http%';
统计使用iphone发微博的独立用户数
1 2 3 4
select count(distinct get_json_object(a.j,'$.userId')) from (select substring(json,2,length(json)-2) as j from weibo) a where lower(get_json_object(a.j,'$.source')) like '%iphone%';
微博中评论次数小于1000的用户id和数据来源,放入视图
1 2 3 4 5 6
create view weibo_view as select get_json_object(a.j,'$.userId') as id,get_json_object(a.j,'$.source') as source from (select substring(json,2,length(json)-2) as j from weibo) a where get_json_object(a.j,'$.commentCount')<1000; select * from weibo_view limit 10;
统计上条视图中数据来源“ipad客户端”的用户数目
1
select count(distinct id) as cnt from weibo_view wheresource='iPad客户端';
将微博的点赞数和转发数求和,降序,取前10条。
1 2 3 4 5 6 7 8 9 10 11 12 13 14
public class DemoTest1 extends UDF { public Integer evaluate(Integer num1,Integer num2){ try{ return num1+num2; }catch (Exception e){ return null; } } } create temporary function wb as 'DemoTest1';
select wb(cast(get_json_object(a.j,'$.praiseCount') as int),cast(get_json_object(a.j,'$.reportCount') as int)) as cnt from (select substring(json,2,length(json)-2) as j from weibo) a order by cnt desc limit 10;
public class DemoTest2 extends UDF { public int evaluate(String content,String word){ int count = 0; if(content != null&&content.length()>0){ String[] array = content.split(word); count = array.length-1; } return count; } } create temporary function wcount as 'DemoTest2';
select b.id,max(b.cnt) as cn from (select get_json_object(a.j,'$.userId') as id,wcount(get_json_object(a.j,'$.content'),'iphone') as cnt from (select substring(json,2,length(json)-2) as j from weibo) a) b group by b.id order by cn desc limit 10;