提交 1a0bc349 编写于 作者: wuyunfeng's avatar wuyunfeng

新增一个job,SessionProcessPathNew

上级 7dec4501
......@@ -41,7 +41,7 @@ object MyConfigSession {
final val SOURCE_SQL_PREF: String =
"""
|select device_token,pseudo_session,class_name,action,view_path,component_tag,created,mobile,doctor_id,device_brand,device_model,app_version,
|device_type,web_data,web_data_type,alternate_info,network_type,remark1 login_state,remark2 first_app_version,user_token_tourist,serviceName
|device_type,web_data,web_data_type,alternate_info,network_type,remark1 login_state,remark2 first_app_version,remark3 ,user_token_tourist,serviceName
| from pica_log.picalog_trace_app_part
| where pseudo_session is not null and pseudo_session !=''
| and pseudo_id !='' and extra_info !='com.picahealth.patient' and serviceName != 'trace3'
......@@ -78,6 +78,15 @@ object MyConfigSession {
| and (menu_code != '0' and menu_code !='null' and menu_code !='' and length(menu_code) <= 3 )
""".stripMargin
//从dw_fact_log_session_TERM表中筛选数据
final val SOURCE_SQL_PATH_NEW: String =
s"""
|select id log_session_id, session_id, user_id,device_token,action_type,user_token,menu_code,action_code,position,label_value,label_class,action_step,
|app_version,device_type,device_brand,device_model,net_type,created_time,date_time,module_class1,module_class2 from ${MyConfigSession.HIVE_TABLE4}
| where app_version >= '3.1.7'
| AND ((action_type ='ACTION_CLICK' and action_code != 'null' ) OR action_type ='ACTION_VIEW' )
| and (menu_code != '0' and menu_code !='null' and menu_code !='' and length(menu_code) <= 3 and cast(menu_code as int ) is not null)
""".stripMargin
//匹配user_id的条件
......@@ -101,7 +110,7 @@ object MyConfigSession {
| from ${MyConfigSession.VIEW_SESSION_ODS} as t
|left join pica_ds.pica_doctor as b on t.user_id = cast(b.id as string)
""".stripMargin
//1.针对没有匹配到的user_id,先使用 mobile_phone 进行匹配,得到 user_id 匹配,'0'
//1.针对没有匹配到的user_id,先使用 mobile_phone 进行匹配,得到 user_id 匹配,'0',XK0HdMN6dAfOlYPOFHHL0A==表示''加密之后的mobile
final val MOBILE_PHONE_SQL: String =
s"""
|SELECT ss.session_id, COALESCE(cast(b.id as string),'0') AS user_id, ss.mobile, ss.device_token, ss.user_token,
......@@ -109,7 +118,7 @@ object MyConfigSession {
|ss.action_code, ss.position,ss.label_value,ss.label_class,ss.module_class1,ss.module_class2,ss.app_version, ss.device_type,
|ss.device_brand, ss.device_model,ss.device_system, ss.net_type,ss.created_time,
|ss.date_time from ${MyConfigSession.VIEW_SESSION_NO_MATCH} AS ss
|left join (select distinct id,mobile_phone from pica_ds.pica_doctor where pica_doctor.delete_flag = 1 ) AS b on ss.mobile = b.mobile_phone
|left join (select distinct id,mobile_phone from pica_ds.pica_doctor where pica_doctor.delete_flag = 1 and mobile_phone!='' and mobile_phone!='XK0HdMN6dAfOlYPOFHHL0A==' ) AS b on ss.mobile = b.mobile_phone
""".stripMargin
final val MOBILE_PHONE_SQL_PREF: String =
s"""
......@@ -125,8 +134,9 @@ object MyConfigSession {
final val EQUIPMENT_INFO_SQL: String =
"""
|SELECT a.user_id,a.device_token ,ROW_NUMBER() OVER ( PARTITION BY a.device_token ORDER BY a.creat_time DESC ) row_d
|from pica_ds.picams_equipment_info AS a
| where a.user_id IS NOT NULL
|from (select e1.user_id,e1.device_token,e1.creat_time,e1.modify_time,e1.delete_flag from pica_ds.picams_equipment_info e1 union
| select e2.user_id,e2.device_token,e2.creat_time, e2.modify_time,e2.delete_flag from pica_ds.picams_p_equipment_info_shadow e2 ) a
| where a.user_id IS NOT NULL and a.user_id>0 and a.delete_flag=1 and device_token is not null
| AND (to_date(a.creat_time) = date_sub(current_date(),1) OR to_date(a.modify_time) = date_sub(current_date(),1))
""".stripMargin
......
此差异已折叠。
......@@ -15,8 +15,7 @@ import scala.collection.mutable
import scala.collection.mutable.ListBuffer
/**
* 处理埋点数据,进行简单的清晰过滤,导入到DW层pica_dw.dw_fact_log_session_pref
*
* 处理埋点数据,进行简单的清晰过滤,导入到DW层pica_dw.dw_fact_log_session_pref,已废弃
* @Author yunfeng.wu
* @Date 2020/08/07 09:23
* @Version 1.0
......@@ -62,12 +61,12 @@ object SessionProcessPref {
println("---------------------------------------process columns-------------------------------------------")
import sessionProcessPref.sparkSession.implicits._
var baseDF = baseRdd.toDF("pseudo_session", "user_id", "mobile", "device_token", "user_token", "view_class", "view_path", "action", "action_type", "component_tag",
"menu_code", "menu_code_new", "action_code", "position", "label_value", "label_class", "module_class1", "module_class2", "app_version", "device_type", "device_brand", "device_model",
"menu_code", "menu_code_new", "action_code", "position", "label_value", "label_class", "module_class_1", "module_class_2", "app_version", "device_type", "device_brand", "device_model",
"net_type", "created_time", "date_time", "web_data", "web_data_type", "alternate_info", "login_state", "first_app_version", "serviceName", "tag8", "tag9", "tag10")
println("baseDF.show=======>")
baseDF.printSchema()
baseDF.repartition(120).persist(StorageLevel.MEMORY_AND_DISK_SER)
sessionProcessPref.loadData(baseDF,scnData,index)
sessionProcessPref.loadData(baseDF,scnData,index,dataCount)
dataCount += baseDF.count().toInt
}
println("----------------------------------update task record table---------------------------------------")
......@@ -115,12 +114,6 @@ class SessionProcessPref extends java.io.Serializable {
val sparkSession: SparkSession = getSparkSession("SessionProcessPref")
//获取符合要求的actionType广播变量
// val actionTypeBroad = UseUtil.getBroadcast(sparkSession, MyConfigSession.ACTION_TYPE_SQL, "action_type", "is_valid")
//获取clasName广播变量
// val classNameBroad =
// UseUtil.getBroadcast(sparkSession, MyConfigSession.CLASS_NAME_SQL, "class_name", "is_valid")
//获取menu_code广播变量
val menuCodeBroad = UseUtil.getBroadcast(sparkSession, MyConfigSession.MENU_CODE_SQL, "view_path", "menu_code")
// //获取actionCategory变量
val actionCategory = UseUtil.getBroadcast(sparkSession, MyConfigSession.ACTION_CATEGORY_SQL, "action_type", "action_category")
......@@ -167,7 +160,7 @@ class SessionProcessPref extends java.io.Serializable {
}
//3.拆分 component_tag字段
val component_tag: String = StringUtils.getNotNullString(row.getAs[String]("component_tag"))
val tagArr = Array("menu_code", "action_code", "position", "label_value", "label_class", "module_class1", "module_class2", "tag8", "tag9", "tag10")
val tagArr = Array("menu_code", "action_code", "position", "label_value", "label_class", "module_class_1", "module_class_2", "tag8", "tag9", "tag10")
val tagMap = mutable.Map[String, String]()
tagArr.foreach(r => tagMap.put(r, ""))
//将符合要求的component_tag进行切割,获取 aciton_code,label_value
......@@ -216,7 +209,7 @@ class SessionProcessPref extends java.io.Serializable {
StringUtils.getNotNullString(row.getAs[String]("class_name")),
StringUtils.getNotNullString(row.getAs[String]("view_path")),
action, action_type, component_tag, tagMap("menu_code"), menu_code_new, tagMap("action_code"),
tagMap("position"), tagMap("label_value"), tagMap("label_class"), tagMap("module_class1"), tagMap("module_class2"),
tagMap("position"), tagMap("label_value"), tagMap("label_class"), tagMap("module_class_1"), tagMap("module_class_2"),
StringUtils.getNotNullString(row.getAs[String]("app_version")),
StringUtils.getNotNullString(row.getAs[String]("device_type")),
StringUtils.getNotNullString(row.getAs[String]("device_brand")),
......@@ -238,7 +231,7 @@ class SessionProcessPref extends java.io.Serializable {
baseRdd
}
def loadData(dataFrame: DataFrame, partitionDay: String,index:Integer): Unit = {
def loadData(dataFrame: DataFrame, partitionDay: String,index:Integer,count:Integer): Unit = {
val tmpTable = "result_view"
var insertSql = "insert overwrite"
if(index!=1){
......@@ -247,10 +240,11 @@ class SessionProcessPref extends java.io.Serializable {
println(s"-----------------create view ${tmpTable} and load to ${MyConfigSession.HIVE_TABLE0} --------------------")
dataFrame.repartition(10).createOrReplaceTempView(tmpTable)
val fields = List("pseudo_session", "user_id", "COALESCE(cast(user_id as int),0) user_id_int", "mobile", "device_token", "user_token", "view_class", "view_path", "action", "action_type",
"component_tag", "menu_code", "menu_code_new", "action_code", "position", "label_value", "label_class", "module_class1", "module_class2", "app_version", "device_type", "device_brand",
"component_tag", "menu_code", "menu_code_new", "action_code", "position", "label_value", "label_class", "module_class_1", "module_class_2", "app_version", "device_type", "device_brand",
"device_model", "net_type", "created_time", "date_time", "web_data", "web_data_type", "alternate_info", "login_state", "first_app_version",
"servicename", "tag8", "tag9", "tag10")
val loadDataSql = s"${insertSql} table ${MyConfigSession.HIVE_TABLE0} partition(created_day='${partitionDay}') select ${fields.mkString(",")} from ${tmpTable} distribute by rand()"
val loadDataSql = s"${insertSql} table ${MyConfigSession.HIVE_TABLE0} partition(created_day='${partitionDay}') select concat(regexp_replace( '${partitionDay}','-','') ,cast( (row_number() over(partition by 1 order by created_time) +${count}) as string)) as id," +
s"${fields.mkString(",")} from ${tmpTable} distribute by rand()"
sparkSession.sql(loadDataSql)
}
}
......@@ -271,8 +265,8 @@ case class SessionPref(pseudo_session: String,
position: String,
label_value: String,
label_class: String,
module_class1: String,
module_class2: String,
module_class_1: String,
module_class_2: String,
app_version: String,
device_type: String,
device_brand: String,
......
package com.utils
import com.session.{SessionMenuCalc, SessionMenuCalcNew, SessionProcess, SessionProcessHeart, SessionProcessPath, SessionProcessPref, SessionProcessTerm}
import com.session.{SessionMenuCalc, SessionMenuCalcNew, SessionProcess, SessionProcessHeart, SessionProcessPath, SessionProcessPathNew, SessionProcessPref, SessionProcessTerm}
import org.apache.hadoop.util.ProgramDriver
/**
......@@ -15,9 +15,10 @@ object Driver {
driver.addClass("SessionProcessPath",classOf[SessionProcessPath],"用户Session数据分析导入到dw_fact_log_session_path表")
driver.addClass("SessionProcessHeart",classOf[SessionProcessHeart],"用户Session数据分析导入到dw_fact_log_session_heart表")
driver.addClass("SessionMenuCalc",classOf[SessionMenuCalc],"传递日期参数--用户Session数据分析导入到dw_fact_log_session_menu_calc表")
driver.addClass("SessionProcessPref",classOf[SessionProcessPref],"传递日期参数--用户Session数据etl导入到dw_fact_log_sesson_pref表")
driver.addClass("SessionProcessTerm",classOf[SessionProcessTerm],"传递日期参数--用户Session数据etl导入到dw_fact_log_sesson_term表")
driver.addClass("SessionMenuCalcNew",classOf[SessionMenuCalcNew],"传递日期参数--用户Session数据分析导入到dw_fact_log_session_menu_calc_new表")
driver.addClass("SessionProcessPathNew",classOf[SessionProcessPathNew],"用户Session数据分析导入到dw_fact_log_session_path_new表")
driver.run(args)
}
}
Markdown 格式
0% or
您添加了 0 到此讨论。请谨慎行事。
先完成此消息的编辑!
想要评论请 注册