package com.session

import java.sql
import java.sql.PreparedStatement

import com.config.MyConfigSession
import com.pica.utils.{DateUtils, StringUtils}
import com.utils.{JDBCUtil, UseUtil}
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.expressions.{Window, WindowSpec}
import org.apache.spark.sql.functions.{lag, row_number}
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{DataFrame, Row, SparkSession}

import scala.collection.mutable.ListBuffer
import scala.util.control.Breaks.{break, breakable}


/**
  * 处理昨天的数据,导入到pica_dw.dw_fact_log_session_path表
  * @Author zhenxin.ma
  * @Date 2020/3/27 10:58
  * @Version 1.0
  */
class SessionProcessPath {
    def getSparkSession(appName: String): SparkSession = {
        val conf: SparkConf = new SparkConf().setAppName(appName)
        UseUtil.setConfigure(conf)
        val sparkSession: SparkSession = SparkSession.builder().config(conf).enableHiveSupport().getOrCreate()
        sparkSession
    }
}


object SessionProcessPath {
    def apply(): SessionProcessPath = new SessionProcessPath()


    def main(args: Array[String]): Unit = {
        //1.执行任务之前先往record表记录
        val insertSQL: String =
            s"""
               			   |insert into ${MyConfigSession.DATA_BASE}.${MyConfigSession.JDBC_TABLE} (job_id,job_name,job_type,job_scn,status,start_time)
               			   |values(1968,'pica_dw.dw_fact_log_session_path','3',?,'0',?)
		""".stripMargin
        //设置同步数据的批次号,格式是2019-09-12
        var scnData: String = DateUtils.getYesterdayDate
        if(args.length>=1){
            scnData = args(0)
        }
        println(s"scnData=${scnData}")
        //设置任务开始时间,格式是2019-09-12 14:03:30
        val startTime: String = DateUtils.getTodayTime
        //存储SQL中的参数
        val insertArr: Array[String] = Array[String](scnData, startTime)
        //获取MYSQL连接
        val connSql: sql.Connection = JDBCUtil.getConnection()
        //向 record 表插入数据
        val flag: Int = JDBCUtil.insertRecord(connSql, insertSQL, insertArr)
        try {
            val sparkSession: SparkSession = SessionProcessPath().getSparkSession("SessionProcessPath")
            //获取position对应的label_value广播变量
            val positionUrlLabelBroad = UseUtil.getBroadcast(sparkSession, MyConfigSession.ACTION_URLLABEL_SQL, "url_content", "label_value")
            println(s"positionUrlLabelBroad=${positionUrlLabelBroad.value}")
            //筛选源数据
            val sourceDF: DataFrame = sparkSession.sql(MyConfigSession.SOURCE_SQL_PATH+s" and created_day='${scnData}'")
            sourceDF.show()

            println("-----------------------------------compute refer columns-----------------------------------------")
            val referResDF: DataFrame = getReferColumns(sourceDF,sparkSession)
            println("referResDF.printSchema()")
            referResDF.printSchema()
            println("------------------------------------单独计算label_value----------------------------------------------")
            //"menu_code = '930' and  action_code IN ( '930000', '930001', '930002' ) and action_type = 'ACTION_CLICK'
            val newLabelRdd: RDD[Row]  = referResDF.where("menu_code = '930'").rdd.mapPartitions(rows=>{
//                val rowList: ListBuffer[(String,String,Integer,String,String,String,String,String,String,String,String,String,String,String,
//                  Integer,Integer,String,String,String,String )] = new ListBuffer()
                val rowList: ListBuffer[Row]= new ListBuffer[Row]()
                val positionLabelMap: Map[String, String] = positionUrlLabelBroad.value
                rows.toList.foreach(row=>{
                    val action_code = row.getAs[String]("action_code")
                    val action_type = row.getAs[String]("action_type")
                    val position = row.getAs[String]("position")
                    var label_value =  ""
                    if(List("930000","930001","930002" ).contains(action_code) && "ACTION_CLICK".equals(action_type)){
                        breakable {
                            //利用position url_content映射表匹配
                            for (tuple <- positionLabelMap) {
                                if (StringUtils.getNotNullString(position).contains(tuple._1)) {
                                    //满足条件后,修改源数据的label_value
                                    label_value = tuple._2
                                    println("--------------------menu_code match successfully-----------------------")
                                    //结束遍历
                                    break()
                                }
                            }
                        }
                    }
                    //经过上述匹配,如果label_value仍然为空串,那么置为原始值
                    if (label_value.equals("")) {
                        label_value = row.getAs[String]("label_value")
                    }
                    rowList.append( Row(
                        StringUtils.getNotNullString(row.getAs[String]("log_session_id")),
                        StringUtils.getNotNullString(row.getAs[String]("session_id")),
                        row.getAs[Integer]("user_id"),action_type,
                        StringUtils.getNotNullString(row.getAs[String]("user_token")),
                        StringUtils.getNotNullString(row.getAs[String]("menu_code")),
                        StringUtils.getNotNullString(row.getAs[String]("action_code")),
                        StringUtils.getNotNullString(row.getAs[String]("position")),
                        label_value,
                        StringUtils.getNotNullString(row.getAs[String]("label_class")),
                        StringUtils.getNotNullString(row.getAs[String]("refer_menu_code")),
                        StringUtils.getNotNullString(row.getAs[String]("refer_action_code")),
                        StringUtils.getNotNullString(row.getAs[String]("refer_position")),
                        StringUtils.getNotNullString(row.getAs[String]("refer_action_type")),
                        StringUtils.getNotNullString(row.getAs[String]("refer_created")),
                        row.getAs[Integer]("step_id"),
                        StringUtils.getNotNullString(row.getAs[String]("app_version")),
                        StringUtils.getNotNullString(row.getAs[String]("device_type")),
                        StringUtils.getNotNullString(row.getAs[String]("created_time")),
                        StringUtils.getNotNullString(row.getAs[String]("date_time")),
                        StringUtils.getNotNullString(row.getAs[String]("module_class1")),
                        StringUtils.getNotNullString(row.getAs[String]("module_class2")),
                        row.getAs[Double]("refer_time_diff")
                    ))
                })
                rowList.iterator
            })
            val resultDF = sparkSession.createDataFrame(newLabelRdd,referResDF.schema).union(referResDF.where("menu_code != '930'"))
            println("-----------------------------------load data to pica_dw.dw_fact_log_session_path-----------------")
            loadData(resultDF,sparkSession,scnData)

            println("----------------------------------update task record table---------------------------------------")
            //任务执行成功,更新 Mysql record 配置表
            val updateSQL: String =
                s"""
                   |update ${MyConfigSession.JDBC_TABLE} set status=?,end_time=?,data_count=? where job_id=1968 and start_time='${startTime}'
				""".stripMargin
            val upreSta: PreparedStatement = connSql.prepareStatement(updateSQL)
            upreSta.setString(1, "1")
            upreSta.setString(2, DateUtils.getTodayTime)
            upreSta.setInt(3, resultDF.count().toInt)
            //更新表数据
            upreSta.executeUpdate()
            //关闭连接
            JDBCUtil.close(connSql, upreSta)
            sparkSession.stop()
        }catch {
            case e:Exception => {
                println("-----------------------------------任务异常---------------------------------------------------")
                e.printStackTrace()
                val exceptionSQL: String =
                    s"""
                       |update ${MyConfigSession.JDBC_TABLE} set status=?,exception=?,end_time=? where job_id=1968 and start_time='${startTime}'
					""".stripMargin
                val errorArr = Array[String]("2", e.getMessage, DateUtils.getTodayTime)
                JDBCUtil.insertRecord(connSql, exceptionSQL, errorArr)
                connSql.close()
            }

        }
    }


    /**
      * @Description         获取需要的字段的refer字段
      * @param dataFrame     源数据
      * @param sparkSession  SparkSession 环境
      * @return org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>
      **/
    def getReferColumns(dataFrame: DataFrame ,sparkSession: SparkSession):DataFrame = {
        //先按照 session_id分区,再按照 created_time排序,进行窗口计算
        val sessionIDWinSpec: WindowSpec = Window.partitionBy("session_id").orderBy("created_time")
        //增叫refer_字段
        val menuDF: DataFrame =
            dataFrame.withColumn("refer_menu_code", lag(dataFrame("menu_code"), 1).over(sessionIDWinSpec))
        val acodeDF: DataFrame =
            menuDF.withColumn("refer_action_code", lag(menuDF("action_code"), 1).over(sessionIDWinSpec))
        val positionDF: DataFrame =
            acodeDF.withColumn("refer_position", lag(acodeDF("position"), 1).over(sessionIDWinSpec))
        val actypeDF: DataFrame =
            positionDF.withColumn("refer_action_type", lag(positionDF("action_type"), 1).over(sessionIDWinSpec))
        val recreatDF: DataFrame =
            actypeDF.withColumn("refer_created", lag(actypeDF("created_time"), 1).over(sessionIDWinSpec))
        val rowNumberDF: DataFrame =
            recreatDF.withColumn("step_id", row_number().over(sessionIDWinSpec))

        //去掉refer字段中的NULL值
        val coaleseDF: DataFrame = rowNumberDF.selectExpr(
            "log_session_id","session_id", "user_id", "action_type", "user_token", "menu_code", "action_code", "position", "label_value","label_class",
            "COALESCE(refer_menu_code,'') as refer_menu_code",
            "COALESCE(refer_action_code,'') as refer_action_code",
            "COALESCE(refer_position,'') as refer_position",
            "COALESCE(refer_action_type,'') as refer_action_type",
            "COALESCE(refer_created,created_time) as refer_created",
            "step_id", "app_version", "device_type", "created_time", "date_time","module_class1","module_class2")

        //在此基础上增加字段 refer_time_diff,值为 created_time, refer_created 之差
        val referTimeDiff: DataFrame =
            coaleseDF.withColumn("refer_time_diff", coaleseDF("created_time") - coaleseDF("refer_created"))
        referTimeDiff
    }



    /**
      * @Description         导入数据到表中
      * @param dataFrame     源数据
      * @param sparkSession  SparkSession 环境
      * @param partitionDay  分区日期
      * @return void
      **/
    def loadData(dataFrame: DataFrame, sparkSession: SparkSession, partitionDay:String):Unit = {
        dataFrame.createOrReplaceTempView("result_view")
        val loadDataSql =
            s"""
               |insert overwrite table ${MyConfigSession.HIVE_TABLE2} partition(created_day='${partitionDay}')
               | select log_session_id, session_id,user_id,action_type,user_token,menu_code,action_code,position,label_value,label_class,
               | refer_menu_code,refer_action_code,refer_position,refer_action_type,
               | cast(refer_time_diff as int) as refer_time_diff,
               | step_id,app_version,device_type,created_time,date_time, module_class1, module_class2
               | from result_view distribute by rand()
                """.stripMargin
        sparkSession.sql(loadDataSql)
    }


}
