提交 5c56dda7 编写于 作者: wuyunfeng's avatar wuyunfeng

优化session_pref与term

上级 500cc55e
...@@ -43,8 +43,12 @@ object SessionProcessTerm { ...@@ -43,8 +43,12 @@ object SessionProcessTerm {
""".stripMargin """.stripMargin
//设置同步数据的批次号,格式是2019-09-12 //设置同步数据的批次号,格式是2019-09-12
var scnData: String = DateUtils.getYesterdayDate var scnData: String = DateUtils.getYesterdayDate
var condition = " 1=1"
if (args.length >= 1) { if (args.length >= 1) {
scnData = args(0) scnData = args(0)
if(args.length > 1){
condition = args(1)
}
} }
println(s"scnData=${scnData}") println(s"scnData=${scnData}")
//设置任务开始时间,格式是2019-09-12 14:03:30 //设置任务开始时间,格式是2019-09-12 14:03:30
...@@ -68,8 +72,8 @@ object SessionProcessTerm { ...@@ -68,8 +72,8 @@ object SessionProcessTerm {
| from ${MyConfigSession.HIVE_TABLE0} | from ${MyConfigSession.HIVE_TABLE0}
| where servicename='trace2' and action!='ACTION_EQUIP_INFO' | where servicename='trace2' and action!='ACTION_EQUIP_INFO'
| and (case when ((view_class like '%YunqueApp%' and action!='ACTION_HEART_BEAT') or LENGTH(view_class)<=3 or view_class='YQGuidePageViewVC') then '2' else '1' end)='1' | and (case when ((view_class like '%YunqueApp%' and action!='ACTION_HEART_BEAT') or LENGTH(view_class)<=3 or view_class='YQGuidePageViewVC') then '2' else '1' end)='1'
| and created_day='${scnData}' | and created_day='${scnData}' and ${condition}
|""".stripMargin // and pseudo_session='3b3cec3b-2305-4e3a-b690-843e2f666c69' |""".stripMargin
val sourceDF: DataFrame = sparkSession.sql(SOURCE_SQL_TERM ) val sourceDF: DataFrame = sparkSession.sql(SOURCE_SQL_TERM )
println("sourceDF.show==================") println("sourceDF.show==================")
sourceDF.printSchema() sourceDF.printSchema()
...@@ -86,13 +90,15 @@ object SessionProcessTerm { ...@@ -86,13 +90,15 @@ object SessionProcessTerm {
println("selectDF.show========") println("selectDF.show========")
selectDF.printSchema() selectDF.printSchema()
println("selectDF.count=========",selectDF.count()) println("selectDF.count=========",selectDF.count())
val conditionGroup = List("<='4' ","between '5' and '9'",">'9'") // val conditionGroup = List("<='4' ","between '5' and '9'",">'9'")
val conditionGroup = List("='0'","='1'","='2'","='3'","='4'","='5'","='6'","='7'","='8'","='9'",
"='a'","='b'","='c'","='d'","='e'","='f'")
var dataCount = 0 var dataCount = 0
var index = 0 var index = 0
selectDF.persist(StorageLevel.MEMORY_AND_DISK_SER) selectDF.persist(StorageLevel.MEMORY_AND_DISK_SER)
for(condition <- conditionGroup){ for(condition <- conditionGroup){
index += 1 index += 1
val slideDF = selectDF.where(s" SUBSTRING(pseudo_session,1,1) ${condition}") val slideDF = selectDF.where(s" SUBSTRING(pseudo_session,2,1) ${condition}").repartition(100)
println(s"-----------------------------------compute refer columns,condition=${condition}-----------------------------------------") println(s"-----------------------------------compute refer columns,condition=${condition}-----------------------------------------")
val referResultRdd = sessionProcessTerm.getReferColumns(slideDF) val referResultRdd = sessionProcessTerm.getReferColumns(slideDF)
val referResultDF: DataFrame = sparkSession.createDataFrame(referResultRdd, StructType( val referResultDF: DataFrame = sparkSession.createDataFrame(referResultRdd, StructType(
...@@ -118,7 +124,7 @@ object SessionProcessTerm { ...@@ -118,7 +124,7 @@ object SessionProcessTerm {
println("referResultDF.show()============'") println("referResultDF.show()============'")
referResultDF.printSchema() referResultDF.printSchema()
// referResultDF.where("action_type='ACTION_VIEW'").show(100,true) // referResultDF.where("action_type='ACTION_VIEW'").show(100,true)
referResultDF.repartition(100).persist(StorageLevel.MEMORY_AND_DISK_SER).createOrReplaceTempView("refer_result_table") referResultDF.persist(StorageLevel.MEMORY_AND_DISK_SER).createOrReplaceTempView("refer_result_table")
println("-----------------------------------compute menu_code term-----------------------------------------") println("-----------------------------------compute menu_code term-----------------------------------------")
val getMenuTermSql = val getMenuTermSql =
""" """
...@@ -172,7 +178,7 @@ object SessionProcessTerm { ...@@ -172,7 +178,7 @@ object SessionProcessTerm {
|(cast(c.session_end_time as bigint)-cast(c.session_begin_time as bigint))/1000 session_time_diff, |(cast(c.session_end_time as bigint)-cast(c.session_begin_time as bigint))/1000 session_time_diff,
|a.refer_session_id |a.refer_session_id
|from refer_result_table a |from refer_result_table a
|left join refer_menu_table b on a.session_id=b.session_id and a.device_token=b.device_token and a.user_id=b.user_id and a.menu_code=b.menu_code and a.created_time=b.created_time |left join refer_menu_table b on a.session_id=b.session_id and a.device_token=b.device_token and a.user_id=b.user_id and a.menu_code=b.menu_code and a.created_time=b.created_time and a.action_type in('ACTION_VIEW','ACTION_HEART')
|left join session_end_table c on a.session_id = c.session_id |left join session_end_table c on a.session_id = c.session_id
| distribute by rand() | distribute by rand()
|""".stripMargin |""".stripMargin
...@@ -191,11 +197,6 @@ object SessionProcessTerm { ...@@ -191,11 +197,6 @@ object SessionProcessTerm {
println(s"${condition}的结果==${resCount}") println(s"${condition}的结果==${resCount}")
dataCount += resCount dataCount += resCount
} }
val fields = List("id","session_id","device_token","user_id","mobile","menu_code","menu_begin_time","menu_end_time","menu_time_diff","action_type",
"action_code","position","label_value","refer_menu_code","refer_action_code","refer_position","refer_label_value","refer_action_type","action_step",
"device_type","app_version","created_time","date_time","session_begin_time","session_end_time","session_time_diff","refer_session_id")
// sparkSession.sql(s"insert overwrite table ${MyConfigSession.HIVE_TABLE4} partition(created_day='${scnData}') " +
// s"select ${fields.mkString(",")} from ${MyConfigSession.HIVE_TABLE4} where created_day='${scnData}'")
println("----------------------------------update task record table---------------------------------------") println("----------------------------------update task record table---------------------------------------")
//任务执行成功,更新 Mysql record 配置表 //任务执行成功,更新 Mysql record 配置表
val updateSQL: String = val updateSQL: String =
......
...@@ -88,6 +88,8 @@ object UseUtil { ...@@ -88,6 +88,8 @@ object UseUtil {
conf.set("spark.reducer.maxSizeInFlight", "96m") conf.set("spark.reducer.maxSizeInFlight", "96m")
//设置字符串 //设置字符串
conf.set("spark.debug.maxToStringFields","100") conf.set("spark.debug.maxToStringFields","100")
//启用自动设置 Shuffle Reducer,默认false
conf.set("spark.sql.adaptive.enabled","true")
} }
......
Markdown 格式
0% or
您添加了 0 到此讨论。请谨慎行事。
先完成此消息的编辑!
想要评论请 注册