valloadDataSql=s"insert overwrite table ${MyConfigSession.HIVE_TABLE3} partition(created_day='${scnDate}') select * from session_heart "
sessionProcessHeart.sparkSession.sql(loadDataSql)
println("----------------------------------update task record table---------------------------------------")
//任务执行成功,更新 Mysql record 配置表
valupdateSQL:String=
s"""
|update ${MyConfigSession.JDBC_TABLE} set status=?,end_time=?,data_count=? where job_name='pica_dw.dw_fact_log_session_heart' and start_time='${startTime}'
|update ${MyConfigSession.JDBC_TABLE} set status=?,end_time=?,data_count=? where job_name='${MyConfigSession.HIVE_TABLE3}' and start_time='${startTime}'
|update ${MyConfigSession.JDBC_TABLE} set status=?,exception=?,end_time=? where job_name='pica_dw.dw_fact_log_session_heart' and start_time='${startTime}'
|update ${MyConfigSession.JDBC_TABLE} set status=?,exception=?,end_time=? where job_name='${MyConfigSession.HIVE_TABLE3}' and start_time='${startTime}'
varsourceDF:DataFrame=sessionProcessPref.sparkSession.sql(MyConfigSession.SOURCE_SQL_PREF+s" and created_day='${scnData}'").repartition(200).distinct()
varsourceDF:DataFrame=sessionProcessPref.sparkSession.sql(MyConfigSession.SOURCE_SQL_PREF+s" and created_day='${scnData}'").repartition(120).distinct()
valfields=List("pseudo_session","user_id","COALESCE(cast(user_id as int),0) user_id_int","mobile","device_token","user_token","view_class","view_path","action","action_type",
s"insert overwrite table ${MyConfigSession.HIVE_TABLE0} partition(created_day='${scnData}') select ${fields.mkString(",")} from fact_log_session_pref distribute by rand()"
sessionProcessPref.sparkSession.sql(loadDataSql)
valdataCount=baseDF.count()
// val dataCount = 1 //统计耗时较长,给个默认值1,表示不统计
println("----------------------------------update task record table---------------------------------------")
//任务执行成功,更新 Mysql record配置表
valupdateSQL:String=
s"""
|update ${MyConfigSession.JDBC_TABLE} set status=?,end_time=?,data_count=? where job_name='${MyConfigSession.HIVE_TABLE0}' and start_time='${startTime}'
|update ${MyConfigSession.JDBC_TABLE} set status=?,exception=?,end_time=? where job_name='${MyConfigSession.HIVE_TABLE0}' and start_time='${startTime}'
|left join (select id,cast(id as string) id_str from pica_ds.pica_doctor a where a.delete_flag = 1 and to_date(a.creat_time) <= '${created_day}') AS b on ss.user_id = b.id_str
|SELECT t.*,t.user_id_ods user_id_old, COALESCE(cast(b.id as string),'0') AS user_id from ${MyConfigSession.VIEW_SESSION_ODS} as t
|left join (select id,cast(id as string) id_str from pica_ds.pica_doctor a where a.delete_flag = 1 and to_date(a.creat_time) <= '${created_day}') AS b on t.user_id_ods = cast(b.id as string)
//根据pica_doctor补充user_id_int字段(字段类型转换成int型), 限制 delete_flag = 1 and creat_time截止昨日创建,未关联上显示为0
// val USER_ID_INT_SQL: String =
// s"""
// |SELECT ss.*,COALESCE(b.id,0) user_id_int from ${MyConfigSession.VIEW_DEVICE_TOKEN} AS ss
// |left join (select id,cast(id as string) id_str from pica_ds.pica_doctor a where a.delete_flag = 1 and to_date(a.creat_time) <= '${created_day}') AS b on ss.user_id = b.id_str
// |""".stripMargin
// val userIdDF: DataFrame = sparkSQLSession.sql(USER_ID_INT_SQL)
| where servicename='trace2' and action!='ACTION_EQUIP_INFO'
| and (case when ((view_class like '%YunqueApp%' and action!='ACTION_HEART_BEAT') or LENGTH(view_class)<=3 or view_class='YQGuidePageViewVC') then '2' else '1' end)='1'
| and created_day='${scnData}'
|""".stripMargin// and pseudo_session='3b3cec3b-2305-4e3a-b690-843e2f666c69'
|select concat(regexp_replace( '${scnData}',"-","") ,cast(row_number() over(partition by 1 order by a.created_time) as string)) as id,
|a.session_id,
|a.device_token,
|a.user_id,
|a.mobile,
|a.menu_code,
|a.menu_begin_time,
| case when a.action_type in('ACTION_VIEW','ACTION_HEART') and b.menu_end_time is null then c.session_end_time else b.menu_end_time end menu_end_time,
|(cast((case when a.action_type in('ACTION_VIEW','ACTION_HEART') and b.menu_end_time is null then c.session_end_time else b.menu_end_time end) as bigint)-cast(a.menu_begin_time as bigint))/1000 menu_time_diff,
|a.action_type,
|a.action_code,
|a.position,
|a.label_value,
|lag(a.menu_code) over(partition by a.session_id order by a.created_time) refer_menu_code,
|lag(a.action_code) over(partition by a.session_id order by a.created_time) refer_action_code,
|lag(a.position) over(partition by a.session_id order by a.created_time) refer_position,
|lag(a.label_value) over(partition by a.session_id order by a.created_time) refer_label_value,
|lag(a.action_type) over(partition by a.session_id order by a.created_time) refer_action_type,
|a.action_step,
|a.device_type,
|a.app_version,
|a.created_time,
|a.date_time,
|c.session_begin_time,
|c.session_end_time,
|(cast(c.session_end_time as bigint)-cast(c.session_begin_time as bigint))/1000 session_time_diff,
|a.refer_session_id
|from refer_result_table a
|left join refer_menu_table b on a.session_id=b.session_id and a.device_token=b.device_token and a.user_id=b.user_id and a.menu_code=b.menu_code and a.created_time=b.created_time
|left join session_end_table c on a.session_id = c.session_id
// s"select ${fields.mkString(",")} from ${MyConfigSession.HIVE_TABLE4} where created_day='${scnData}'")
println("----------------------------------update task record table---------------------------------------")
//任务执行成功,更新 Mysql record 配置表
valupdateSQL:String=
s"""
|update ${MyConfigSession.JDBC_TABLE} set status=?,end_time=?,data_count=? where job_name='${MyConfigSession.HIVE_TABLE4}' and start_time='${startTime}'
|update ${MyConfigSession.JDBC_TABLE} set status=?,exception=?,end_time=? where job_name='${MyConfigSession.HIVE_TABLE4}' and start_time='${startTime}'
|SELECT t.*,t.user_id_ods user_id_old, COALESCE(cast(b.id as string),'0') AS user_id from ${MyConfigSession.VIEW_SESSION_ODS} as t
|left join (select id,cast(id as string) id_str from pica_ds.pica_doctor a where a.delete_flag = 1 and to_date(a.creat_time) <= '${created_day}') AS b on t.user_id_ods = cast(b.id as string)
| select concat(pseudo_session,'0') session_id,min(created_time) session_begin_time,max(created_time) session_end_time from select_res_table
| where pseudo_session not in (select distinct SUBSTRING(refer_session_id,1,length(refer_session_id)-1) from refer_result_table where refer_session_id like '%0')
| group by concat(pseudo_session,'0')
|""".stripMargin
//2.计算所有新拆分的sessionid的起止时间
valsourceSql2=
"""
|select session_id,min(created_time) session_begin_time,max(created_time) session_end_time from refer_result_table
| where SUBSTRING(session_id,1,length(session_id)-1)
| in (select distinct SUBSTRING(refer_session_id,1,length(refer_session_id)-1) from refer_result_table where refer_session_id!='')
| case when t1.menu_end_time is null and t1.action_type='ACTION_VIEW' then t2.session_end_time else t1.menu_end_time end menu_end_time,
| (cast((case when t1.menu_end_time is null and t1.action_type='ACTION_VIEW' then t2.session_end_time else t1.menu_end_time end) as bigint)-cast(t1.menu_begin_time as bigint))/1000 menu_time_diff,