提交 f6990c5a 编写于 作者: wuyunfeng's avatar wuyunfeng

去掉无用的job

上级 7359ba44
package com.session
import java.sql
import java.sql.PreparedStatement
import com.config.MyConfigSession
import com.pica.utils.DateUtils
import com.utils.{JDBCUtil, UseUtil}
import org.apache.spark.SparkConf
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.apache.spark.sql.{Row, SparkSession}
import scala.collection.mutable.{ArrayBuffer, ListBuffer}
/**
* 处理结果存入 pica_dw.dw_fact_log_session_menu_calc_new
* @Author yunfeng.wu
* @Date 2020/06/12 10:23
* @Version 1.0
*/
class SessionMenuCalcNew extends Serializable{
def getSparkSession(appName: String): SparkSession = {
val conf: SparkConf = new SparkConf().setAppName(appName)
UseUtil.setConfigure(conf)
val sparkSession: SparkSession = SparkSession.builder().config(conf).enableHiveSupport().getOrCreate()
sparkSession
}
val sparkSession: SparkSession = getSparkSession("SessionMenuCalcNew")
def handleByMcPart1(spark: SparkSession,createdDay:String) = {
var df = spark.sql("select cast(user_id as string),session_id,created_time,date_time, menu_code,refer_menu_code,action_code,nvl(menu_time_diff,0) menu_time_diff " +
s"from pica_dw.dw_fact_log_session_term where created_day='${createdDay}' and menu_code!='200' and action_type in ('ACTION_CLICK','ACTION_VIEW') ")//and user_id='1000000186'
val groupRdd = df.rdd.groupBy(row => row.getAs[String]("user_id") + "_" + row.getAs[String]("session_id"))
val resRdd = groupRdd.flatMap(g => {
val user_session_id: String = g._1
val user_id = user_session_id.split("_")(0)
val session_id = user_session_id.split("_")(1)
var rowList: Iterable[Row] = g._2
//定义一个累加量
var before_menu = ""
var this_menu = ""
var series = 1
val list = new ListBuffer[Row]() //[(String,String,String,Integer,String,String,String)]
var mc_during_map = Map[String, Integer]()
var mc_time_map = Map[String, ArrayBuffer[String]]()
rowList = rowList.toList.sortBy(_.getAs[String]("created_time"))
rowList.foreach(row => {
this_menu = row.getAs[String]("menu_code")
val menu_time_diff = row.getAs[Integer]("menu_time_diff")
val created_time = row.getAs[String]("created_time")
var key = this_menu + "_" + series
if ("".equals(before_menu) || this_menu.equals(before_menu)) {
var sum_during: Integer = mc_during_map.getOrElse(key, 0)
mc_during_map.+=(key -> (menu_time_diff + sum_during))
before_menu = this_menu
} else {
series += 1
key = this_menu + "_" + series
var sum_during: Integer = mc_during_map.getOrElse(key, 0)
mc_during_map.+=(key -> (menu_time_diff + sum_during))
before_menu = this_menu
}
var time_arr: ArrayBuffer[String] = mc_time_map.getOrElse(key, new ArrayBuffer[String]())
time_arr += (created_time)
mc_time_map.+=(key -> time_arr)
})
mc_during_map.foreach(kv => {
val ar: ArrayBuffer[String] = mc_time_map.getOrElse(kv._1, new ArrayBuffer[String]())
// println((user_id, session_id, kv._1, kv._2, kv._1.split("_")(0), ar.toArray.min, ar.toArray.max))
list.append(Row(user_id, session_id, kv._1, kv._2, kv._1.split("_")(0),"", ar.toArray.min, ar.toArray.max))
})
list.toList
})
resRdd
}
def handleByMcPart2(spark: SparkSession, createdDay: String) = {
var df = spark.sql("select cast(user_id as string),session_id,created_time,date_time, menu_code,refer_menu_code,action_code,nvl(menu_time_diff,0) menu_time_diff " +
s"from pica_dw.dw_fact_log_session_term where created_day='${createdDay}' and menu_code ='200' and action_type in ('ACTION_CLICK','ACTION_VIEW') ")
val groupRdd = df.rdd.groupBy(row => row.getAs[String]("user_id") + "_" + row.getAs[String]("session_id"))
val resRdd = groupRdd.flatMap(g => {
val user_session_id: String = g._1
val user_id = user_session_id.split("_")(0)
val session_id = user_session_id.split("_")(1)
var rowList: Iterable[Row] = g._2
//定义一个累加量
var before_action = ""
var this_action = ""
var series = 1
val list = new ListBuffer[Row]() //[(String,String,String,Integer,String,String,String)]
var ac_during_map = Map[String, Integer]()
var ac_time_map = Map[String, ArrayBuffer[String]]()
rowList = rowList.toList.sortBy(_.getAs[String]("created_time"))
rowList.foreach(row => {
this_action = row.getAs[String]("action_code")
val menu_time_diff = row.getAs[Integer]("menu_time_diff")
val created_time = row.getAs[String]("created_time")
var key = this_action + "_" + series
if ("".equals(before_action) || this_action.equals(before_action)) {
var sum_during: Integer = ac_during_map.getOrElse(key, 0)
ac_during_map.+=(key -> (menu_time_diff + sum_during))
before_action = this_action
} else {
series += 1
key = this_action + "_" + series
var sum_during: Integer = ac_during_map.getOrElse(key, 0)
ac_during_map.+=(key -> (menu_time_diff + sum_during))
before_action = this_action
}
var time_arr: ArrayBuffer[String] = ac_time_map.getOrElse(key, new ArrayBuffer[String]())
time_arr += (created_time)
ac_time_map.+=(key -> time_arr)
})
ac_during_map.foreach(kv => {
val ar: ArrayBuffer[String] = ac_time_map.getOrElse(kv._1, new ArrayBuffer[String]())
list.append(Row(user_id, session_id, "200_0", kv._2,"200", kv._1.split("_")(0), ar.toArray.min, ar.toArray.max))
})
list.toList
})
resRdd
}
}
object SessionMenuCalcNew {
def apply(): SessionMenuCalcNew = new SessionMenuCalcNew()
def main(args: Array[String]): Unit = {
if (args.length < 2) {
System.err.println("Usage: SessionMenuCalcNew <dbTable> <createdDay>")
System.exit(1)
}
//1.执行任务之前先往record表记录
val insertSQL: String =
s"""
|insert into ${MyConfigSession.DATA_BASE}.${MyConfigSession.JDBC_TABLE} (job_id,job_name,job_type,job_scn,status,start_time)
|values(0,'pica_dw.dw_fact_log_session_menu_calc_new','3',?,'0',?)
""".stripMargin
val dbTable = args.apply(0)
val createdDay = args.apply(1)
println(s"dbTable:${dbTable},createdDay:${createdDay}")
//设置任务开始时间,格式是2019-09-12 14:03:30
val startTime: String = DateUtils.getTodayTime
//存储SQL中的参数
val insertArr: Array[String] = Array[String](createdDay, startTime)
//获取MYSQL连接
val connSql: sql.Connection = JDBCUtil.getConnection()
//向 record 表插入数据
val flag: Int = JDBCUtil.insertRecord(connSql, insertSQL, insertArr)
try {
val sessionMenuCalcNew: SessionMenuCalcNew = SessionMenuCalcNew()
val resRdd1 = sessionMenuCalcNew.handleByMcPart1(sessionMenuCalcNew.sparkSession,createdDay)
val resRdd2 = sessionMenuCalcNew.handleByMcPart2(sessionMenuCalcNew.sparkSession,createdDay)
val resRdd = resRdd1.union(resRdd2)
resRdd.take(20)
val resDf = sessionMenuCalcNew.sparkSession.createDataFrame(resRdd, StructType(
List(StructField("user_id", StringType, false),
StructField("session_id", StringType, false),
StructField("menu_code_term", StringType, false),
StructField("during_by_refer", IntegerType, false),
StructField("menu_code", StringType, false),
StructField("action_code", StringType, false),
StructField("begin_time", StringType, false),
StructField("end_time", StringType, false))
))
resDf.printSchema()
resDf.createOrReplaceTempView("session_menu_view_calc")
sessionMenuCalcNew.sparkSession.sql(s"insert overwrite table ${dbTable} partition(created_day='${createdDay}') " +
s"select cast(user_id as int) user_id,session_id,menu_code_term,during_by_refer,menu_code,action_code,begin_time,end_time from session_menu_view_calc distribute by rand()")
println("----------------------------------update task record table---------------------------------------")
//任务执行成功,更新 Mysql record 配置表
val updateSQL: String =s"update ${MyConfigSession.JDBC_TABLE} set status=?,end_time=?,data_count=? where job_name='pica_dw.dw_fact_log_session_menu_calc_new' and start_time='${startTime}'"
val endTime: String = DateUtils.getTodayTime
val upreSta: PreparedStatement = connSql.prepareStatement(updateSQL)
upreSta.setString(1, "1")
upreSta.setString(2, endTime)
upreSta.setInt(3, resDf.count().toInt)
//更新表数据
upreSta.executeUpdate()
//关闭连接
JDBCUtil.close(connSql, upreSta)
sessionMenuCalcNew.sparkSession.stop()
} catch {
case e: Exception => {
println(s"-----------------------------------任务异常:e=${e}---------------------------------------------------")
e.printStackTrace()
val exceptionSQL: String =
s"""
|update ${MyConfigSession.JDBC_TABLE} set status=?,exception=?,end_time=? where job_name='pica_dw.dw_fact_log_session_menu_calc_new' and start_time='${startTime}'
""".stripMargin
val errorArr = Array[String]("2", e.getMessage, DateUtils.getTodayTime)
JDBCUtil.insertRecord(connSql, exceptionSQL, errorArr)
connSql.close()
}
}
}
}
package com.utils package com.utils
import com.session.{SessionMenuCalc, SessionMenuCalcNew, SessionProcess, SessionProcessHeart, SessionProcessPath, SessionProcessPathNew, SessionProcessPref, SessionProcessTerm} import com.session.{SessionMenuCalc, SessionProcess, SessionProcessPath, SessionProcessPathNew, SessionProcessTerm}
import org.apache.hadoop.util.ProgramDriver import org.apache.hadoop.util.ProgramDriver
/** /**
...@@ -13,10 +13,8 @@ object Driver { ...@@ -13,10 +13,8 @@ object Driver {
val driver: ProgramDriver = new ProgramDriver() val driver: ProgramDriver = new ProgramDriver()
driver.addClass("SessionProcess",classOf[SessionProcess],"用户Session数据分析导入到dw_fact_log_session表") driver.addClass("SessionProcess",classOf[SessionProcess],"用户Session数据分析导入到dw_fact_log_session表")
driver.addClass("SessionProcessPath",classOf[SessionProcessPath],"用户Session数据分析导入到dw_fact_log_session_path表") driver.addClass("SessionProcessPath",classOf[SessionProcessPath],"用户Session数据分析导入到dw_fact_log_session_path表")
driver.addClass("SessionProcessHeart",classOf[SessionProcessHeart],"用户Session数据分析导入到dw_fact_log_session_heart表")
driver.addClass("SessionMenuCalc",classOf[SessionMenuCalc],"传递日期参数--用户Session数据分析导入到dw_fact_log_session_menu_calc表") driver.addClass("SessionMenuCalc",classOf[SessionMenuCalc],"传递日期参数--用户Session数据分析导入到dw_fact_log_session_menu_calc表")
driver.addClass("SessionProcessTerm",classOf[SessionProcessTerm],"传递日期参数--用户Session数据etl导入到dw_fact_log_sesson_term表") driver.addClass("SessionProcessTerm",classOf[SessionProcessTerm],"传递日期参数--用户Session数据etl导入到dw_fact_log_sesson_term表")
driver.addClass("SessionMenuCalcNew",classOf[SessionMenuCalcNew],"传递日期参数--用户Session数据分析导入到dw_fact_log_session_menu_calc_new表")
driver.addClass("SessionProcessPathNew",classOf[SessionProcessPathNew],"用户Session数据分析导入到dw_fact_log_session_path_new表") driver.addClass("SessionProcessPathNew",classOf[SessionProcessPathNew],"用户Session数据分析导入到dw_fact_log_session_path_new表")
driver.run(args) driver.run(args)
......
Markdown 格式
0% or
您添加了 0 到此讨论。请谨慎行事。
先完成此消息的编辑!
想要评论请 注册