package com.session

import java.sql
import java.sql.PreparedStatement

import com.config.MyConfigSession
import com.pica.utils.{DateUtils, StringUtils}
import com.utils.{JDBCUtil, UseUtil}
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructField, StructType}
import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
import org.apache.spark.storage.StorageLevel

import scala.collection.mutable.ListBuffer


/**
 * 处理昨天的数据,用于计算时长以及访问路径，结果导入dw_fact_log_session_term
 * 注意点：
 * 1.只保留service_name='trace2'(native)埋点数据，（h5埋点数据也会走native上报一份）
 * 2.去掉ACTION_EQUIP_INFO类数据，去掉action类型数量<=2的session
 * 1.首页加载前的引导页(android不埋,ios:view_class=YQGuidePageViewVC)不计入首页流量统计
 * 2.对于ACTION_VIEW类型事件,用页面的resume以及stop事件计算访问时差
 * 3.对于ACTION_HEART_BEAT事件中的back类型或者是BACKGROUND类事件，将menu_code定位back，
 * 取连续backgroud的末次back的时间以及首次back时间差作为后台执行时长
 * 4.如果menu_code值为空或值不符合规范，如果view_class有值取view_class代替
 *
 * @Author zhenxin.ma
 * @Date 2020/3/27 10:58
 * @Version 1.0
 */

object SessionProcessTerm {
  def apply(): SessionProcessTerm = new SessionProcessTerm()

  def main(args: Array[String]): Unit = {
    //1.执行任务之前先往record表记录
    val insertSQL: String =
      s"""
               			   |insert into ${MyConfigSession.DATA_BASE}.${MyConfigSession.JDBC_TABLE} (job_id,job_name,job_type,job_scn,status,start_time)
               			   |values(0,'${MyConfigSession.HIVE_TABLE4}','3',?,'0',?)
		""".stripMargin
    //设置同步数据的批次号,格式是2019-09-12
    var scnData: String = DateUtils.getYesterdayDate
    var condition = " 1=1"
    if (args.length >= 1) {
      scnData = args(0)
      if(args.length > 1){
        condition = args(1)
      }
    }
    println(s"scnData=${scnData}")
    //设置任务开始时间,格式是2019-09-12 14:03:30
    val startTime: String = DateUtils.getTodayTime
    //存储SQL中的参数
    val insertArr: Array[String] = Array[String](scnData, startTime)
    //获取MYSQL连接
    val connSql: sql.Connection = JDBCUtil.getConnection()
    //向 record 表插入数据
    val flag: Int = JDBCUtil.insertRecord(connSql, insertSQL, insertArr)
    val sessionProcessTerm: SessionProcessTerm = SessionProcessTerm()
    try {
      val sparkSession: SparkSession = sessionProcessTerm.getSparkSession("SessionProcessTerm")
      var SOURCE_SQL_TERM =
        s"""
           |select pseudo_session,user_id_int user_id,mobile,device_token,view_class,view_path,action,
           |case when action='ACTION_HEART_BEAT' then 'ACTION_HEART' else  action_type end action_type,component_tag,menu_code,menu_code_new,
           |case when component_tag ='back' then 'back' when (menu_code_new in('0','null','') and  view_class not in('0','null','')) then view_class
           |else menu_code_new end menu_code_offset ,
           |action_code,position,label_value,label_class,app_version,device_type,created_time,date_time
           | from ${MyConfigSession.HIVE_TABLE0}
           | where servicename='trace2' and action!='ACTION_EQUIP_INFO'
           | and (case when ((view_class like '%YunqueApp%' and action!='ACTION_HEART_BEAT') or LENGTH(view_class)<=3 or view_class='YQGuidePageViewVC') then '2' else '1' end)='1'
           |   and created_day='${scnData}' and ${condition}
           |""".stripMargin
      val sourceDF: DataFrame = sparkSession.sql(SOURCE_SQL_TERM )
      println("sourceDF.show==================")
      sourceDF.printSchema()
      sourceDF.createOrReplaceTempView("session_term_ods")
      //过滤掉action事件<=2个的session,以及首页加载前的引导页数据(view_class=YQGuidePageViewVC)
      SOURCE_SQL_TERM =
        s"""
           |select t1.*  from session_term_ods t1
           |join (select   pseudo_session from session_term_ods group by pseudo_session having count(distinct action)>2) t2
           |on t1.pseudo_session = t2.pseudo_session
           |where   t1.menu_code_offset not in('0','null','')
           |""".stripMargin
      val selectDF = sparkSession.sql(SOURCE_SQL_TERM).drop("menu_code_new")
      println("selectDF.show========")
      selectDF.printSchema()
      println("selectDF.count=========",selectDF.count())
//      val conditionGroup = List("<='4' ","between '5' and '9'",">'9'")
      val conditionGroup = List("='0'","='1'","='2'","='3'","='4'","='5'","='6'","='7'","='8'","='9'",
        "='a'","='b'","='c'","='d'","='e'","='f'")
      var dataCount = 0
      var index = 0
      selectDF.persist(StorageLevel.MEMORY_AND_DISK_SER)
      for(condition <- conditionGroup){
        index += 1
        val slideDF = selectDF.where(s" SUBSTRING(pseudo_session,2,1)  ${condition}").repartition(100)
        println(s"-----------------------------------compute refer columns,condition=${condition}-----------------------------------------")
        val referResultRdd = sessionProcessTerm.getReferColumns(slideDF)
        val referResultDF: DataFrame = sparkSession.createDataFrame(referResultRdd, StructType(
          List(StructField("session_id", StringType, false),
            StructField("device_token", StringType, false),
            StructField("user_id", IntegerType, false),
            StructField("mobile", StringType, false),
            StructField("menu_code", StringType, false),
            StructField("menu_begin_time", StringType, false),
            StructField("action_code", StringType, false),
            StructField("position", StringType, false),
            StructField("label_value", StringType, false),
            StructField("action", StringType, false),
            StructField("action_type", StringType, false),
            StructField("action_step", StringType, false),
            StructField("device_type", StringType, false),
            StructField("app_version", StringType, false),
            StructField("created_time", StringType, false),
            StructField("date_time", StringType, false),
            StructField("refer_session_id", StringType, false)
          )
        ))
        println("referResultDF.show()============'")
        referResultDF.printSchema()
//        referResultDF.where("action_type='ACTION_VIEW'").show(100,true)
        referResultDF.persist(StorageLevel.MEMORY_AND_DISK_SER).createOrReplaceTempView("refer_result_table")
        println("-----------------------------------compute menu_code term-----------------------------------------")
        val getMenuTermSql =
          """
            |select a.session_id,a.device_token,a.user_id,a.menu_code,
            |lag(a.created_time) over(partition by a.session_id order by a.created_time desc ) menu_end_time,a.created_time
            | from  refer_result_table a
            | left  join(select session_id,min(app_version) min_version from refer_result_table group by session_id) b on a.session_id=b.session_id
            |""".stripMargin
        //处理session最小版本>='3.4.5'的session数据
        val newVersionMenuDF = sparkSession.sql(s"${getMenuTermSql} where b.min_version>='3.4.5' and a.action in('ACTION_ACTIVITY_RESUME','ACTION_HEART_BEAT') ")
        println("newVersionMenuDF,show()======")
        val oldVersionMenuDF = sparkSession.sql(s"${getMenuTermSql} where b.min_version<'3.4.5' and a.action_type in ('ACTION_VIEW','ACTION_HEART')")
        println("oldVersionMenuDF,show()======")
        val referMenuDF = newVersionMenuDF.union(oldVersionMenuDF)
        println("referMenuDF.show()=========")
        referMenuDF.printSchema()
        referMenuDF.createOrReplaceTempView("refer_menu_table")

        println("-----------------------------------compute session_id end_time-----------------------------------------")
        val sessionEndDF: DataFrame = sessionProcessTerm.getSessionTail(slideDF, sparkSession)
        println("------sessionEndDF.show()-------------")
        sessionEndDF.printSchema()
        sessionEndDF.createOrReplaceTempView("session_end_table")
        val getReferSql =
          s"""
             |select  concat(regexp_replace( '${scnData}',"-","") ,cast(row_number() over(partition by 1 order by a.created_time) as string)) as id,
             |a.session_id,
             |a.device_token,
             |a.user_id,
             |a.mobile,
             |a.menu_code,
             |a.menu_begin_time,
             | case when  a.action_type in('ACTION_VIEW','ACTION_HEART') and  b.menu_end_time is null  then c.session_end_time else b.menu_end_time end menu_end_time,
             |(cast((case when  a.action_type in('ACTION_VIEW','ACTION_HEART') and  b.menu_end_time is null  then c.session_end_time else b.menu_end_time end) as bigint)-cast(a.menu_begin_time as bigint))/1000 menu_time_diff,
             |a.action_type,
             |a.action_code,
             |a.position,
             |a.label_value,
             |lag(a.menu_code) over(partition by a.session_id order by a.created_time)  refer_menu_code,
             |lag(a.action_code) over(partition by a.session_id order by a.created_time) refer_action_code,
             |lag(a.position) over(partition by a.session_id order by a.created_time) refer_position,
             |lag(a.label_value) over(partition by a.session_id order by a.created_time) refer_label_value,
             |lag(a.action_type) over(partition by a.session_id order by a.created_time) refer_action_type,
             |a.action_step,
             |a.device_type,
             |a.app_version,
             |a.created_time,
             |a.date_time,
             |c.session_begin_time,
             |c.session_end_time,
             |(cast(c.session_end_time as bigint)-cast(c.session_begin_time as bigint))/1000 session_time_diff,
             |a.refer_session_id
             |from refer_result_table a
             |left join refer_menu_table b on a.session_id=b.session_id and a.device_token=b.device_token and a.user_id=b.user_id and a.menu_code=b.menu_code  and a.created_time=b.created_time and a.action_type in('ACTION_VIEW','ACTION_HEART')
             |left join session_end_table c on a.session_id = c.session_id
             | distribute by rand()
             |""".stripMargin
        val menuCodeAddDF = sparkSession.sql(getReferSql).repartition(100)
        println("menuCodeAddDF.show=======")
        menuCodeAddDF.printSchema()
        menuCodeAddDF.persist(StorageLevel.MEMORY_AND_DISK_SER)
        println("-------------------------------match user_id 逻辑-------------------------------------------------")
        val resultDF: DataFrame = sessionProcessTerm.matchUserId(menuCodeAddDF, sparkSession, scnData).repartition(5)
        println("dwFactLogSession.show=======>")
        resultDF.printSchema()
//        resultDF.persist(StorageLevel.MEMORY_AND_DISK_SER)
        println("-----------------------------------load data to pica_dw.dw_fact_log_session_term-----------------")
        sessionProcessTerm.loadData(resultDF, sparkSession, scnData,index)
        val resCount = resultDF.count().toInt
        println(s"${condition}的结果==${resCount}")
        dataCount += resCount
      }
      println("----------------------------------update task record table---------------------------------------")
      //任务执行成功,更新 Mysql record 配置表
      val updateSQL: String =
        s"""
           |update ${MyConfigSession.JDBC_TABLE} set status=?,end_time=?,data_count=? where job_name='${MyConfigSession.HIVE_TABLE4}' and start_time='${startTime}'
				""".stripMargin
      val upreSta: PreparedStatement = connSql.prepareStatement(updateSQL)
      upreSta.setString(1, "1")
      upreSta.setString(2, DateUtils.getTodayTime)
      upreSta.setInt(3, dataCount.toInt)
      //更新表数据
      upreSta.executeUpdate()
      //关闭连接
      JDBCUtil.close(connSql, upreSta)
      sparkSession.stop()
    } catch {
      case e: Exception => {
        println("-----------------------------------任务异常---------------------------------------------------")
        e.printStackTrace()
        val exceptionSQL: String =
          s"""
             |update ${MyConfigSession.JDBC_TABLE} set status=?,exception=?,end_time=? where job_name='${MyConfigSession.HIVE_TABLE4}' and start_time='${startTime}'
					""".stripMargin
        val errorArr = Array[String]("2", e.getMessage, DateUtils.getTodayTime)
        JDBCUtil.insertRecord(connSql, exceptionSQL, errorArr)
        connSql.close()
      }
    }
  }
}

class SessionProcessTerm {

  def getSparkSession(appName: String): SparkSession = {
    val conf: SparkConf = new SparkConf().setAppName(appName)
    UseUtil.setConfigure(conf)
    val sparkSession: SparkSession = SparkSession.builder().config(conf).enableHiveSupport().getOrCreate()
    sparkSession
  }

  /**
   * @Description 匹配user_id,补全数据中的user_id字段
   * @param dataFrame       筛选后的数据
   * @param sparkSQLSession SparkSession 环境
   * @param created_day     当前数据的日期,格式 "2020-03-01"
   * @return org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>
   **/
  def matchUserId(dataFrame: DataFrame, sparkSQLSession: SparkSession, created_day: String): DataFrame = {
    //追加:将dataFrame与pica_ds.pica_doctor根据user_id进行匹配,匹配不上的user_id置为'0'
    println("matchUserId开始执行-----------------------------------")
    dataFrame.withColumnRenamed("user_id", "user_id_ods").createOrReplaceTempView(MyConfigSession.VIEW_SESSION_ODS)
    val INIT_USER_ID_SQL_PREF =
      s"""
         |SELECT t.*,t.user_id_ods user_id_old, COALESCE(cast(b.id as string),'0') AS user_id  from ${MyConfigSession.VIEW_SESSION_ODS} as t
         |left join  (select  id,cast(id as string) id_str  from pica_ds.pica_doctor a where a.delete_flag = 1 and to_date(a.creat_time) <= '${created_day}') AS b on t.user_id_ods = cast(b.id as string)
            """.stripMargin
    val DF = sparkSQLSession.sql(INIT_USER_ID_SQL_PREF).drop("user_id_ods")
    /*
        以下的所有逻辑是为了补全user_id字段
     */
    //第一步：首先筛选出不符合的use_id数据,将这些user_id置为字符串'0'
    val noMatchUserIdDF: Dataset[Row] = DF.where("user_id ='' OR user_id = '0' OR LENGTH(user_id) = 24")
    noMatchUserIdDF.drop("user_id_old").withColumnRenamed("user_id", "user_id_old")
      .createOrReplaceTempView(MyConfigSession.VIEW_SESSION_NO_MATCH)
    val MOBILE_PHONE_SQL_PREF: String =
      s"""
         |SELECT ss.*,COALESCE(cast(b.id as string),'0') AS user_id  from ${MyConfigSession.VIEW_SESSION_NO_MATCH}  AS ss
         |left join (select distinct id,mobile_phone from pica_ds.pica_doctor where pica_doctor.delete_flag = 1 ) AS b on ss.mobile = b.mobile_phone
         """.stripMargin
    //1.筛选出上一步没有匹配到的user_id,先按照mobile_phone进行匹配
    val mobilePhoneDF: DataFrame = sparkSQLSession.sql(MOBILE_PHONE_SQL_PREF)
    //2.使用临时表equiment,筛选出为1的那条最新数据
    var equipmentInfoSql = MyConfigSession.EQUIPMENT_INFO_SQL
    if (!created_day.equals(DateUtils.getYesterdayDate)) { //如果不是跑昨天的数据,使用equipment拉链表
      equipmentInfoSql = MyConfigSession.EQUIPMENT_INFO_SQL_ARGS + s"'${created_day}'"
    }
    println(s"equipmentInfoSql=${equipmentInfoSql}")
    val equipmentDF: DataFrame = sparkSQLSession.sql(equipmentInfoSql).where("row_d =1")
    equipmentDF.createOrReplaceTempView(MyConfigSession.VIEW_EQUIPMENT_INFO)
    mobilePhoneDF.drop("user_id_old").withColumnRenamed("user_id", "user_id_old")
      .createOrReplaceTempView(MyConfigSession.VIEW_MOBILE_PHONE)
    val DEVICE_TOKEN_SQL_PREF: String =
      s""" SELECT t.*,COALESCE(cast(b.user_id as string),'0') AS user_id
         | from (select * from  ${MyConfigSession.VIEW_MOBILE_PHONE}  a where a.user_id_old= '0' ) as t
         |left join ${MyConfigSession.VIEW_EQUIPMENT_INFO} as b on t.device_token = b.device_token
            """.stripMargin
    println(s"DEVICE_TOKEN_SQL_PREF=${DEVICE_TOKEN_SQL_PREF}")
    //3.将第2步筛选出来的数据按照device_token进行匹配,获得user_id
    val deviceTokenDF: DataFrame = sparkSQLSession.sql(DEVICE_TOKEN_SQL_PREF)

    //4.将上述三者union,最终导入表中的数据
    val rightUserIdDF: Dataset[Row] = DF.where("user_id !='' and user_id != '0' and LENGTH(user_id) !=24")
    val mobilePhoneResDF: Dataset[Row] = mobilePhoneDF.where("user_id !='0'")
    println("rightUserIdDF/mobilePhoneResDF/deviceTokenDF.schema===========")
    rightUserIdDF.printSchema()
    mobilePhoneResDF.printSchema()
    deviceTokenDF.printSchema()
    val dwFactLogSession: Dataset[Row] = rightUserIdDF.union(mobilePhoneResDF).union(deviceTokenDF)
    dwFactLogSession.createOrReplaceTempView(MyConfigSession.VIEW_DEVICE_TOKEN)
    dwFactLogSession
  }

  /**
   * @Description 获取需要的字段的refer字段以及对pseudo_session进行拆分
   *              https://www.tapd.cn/64812329/prong/stories/view/1164812329001012031需求上线版本为(Android:3.4.5,iOS:3.4.4,之后的版本统一到3.4.6)
   *              计算sessionId,按版本号区分数据,之前的作为老数据处理,之后的作为新数据处理,老数据对pseudo_session进行拆分,时间最大间隔30分钟
   *              新数据不拆分session,按ACTION_HEART_BEAT区分session活跃时间(新增ACTION_HEART_BEAT事件的版本是3.3.2)
   * @param sourceDF 源数据
   * @return org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>
   **/
  def getReferColumns(sourceDF: DataFrame): RDD[Row] = {
    val actions = List("'ACTION_VIEW'", "'ACTION_CLICK'", "'ACTION_EXPOSE'") //高版本使用"'ACTION_HEART_BEAT'"
    val actionCommonDF = sourceDF.where(s" ( action_type in (${actions.mkString(",")}) or (action_type='ACTION_HEART' and menu_code_offset='back' ))")
    println("actionCommonDF.show()===========")
    actionCommonDF.printSchema()
//    actionCommonDF.where("pseudo_session='5964908742'").show(200)
    //计算session中menu_code的起始时间,对于ACTION_HEART_BEAT的埋点记录剔除冗余数据
    val groupRdd = actionCommonDF.rdd.groupBy(r => r.getAs[String]("pseudo_session"))//.repartition(100)
    println("开始执行groupRdd.flatMap=========")
    val resRdd = groupRdd.flatMap(g => {
      val pseudo_session = g._1
      val resList: ListBuffer[Row] = new ListBuffer[Row]()
      var rowList = g._2
      rowList = rowList.toList.sortWith((x, y) => x.getAs[String]("created_time") < y.getAs[String]("created_time")) //按created由小到小排序
      var actionStep = "0_0"
//      var sessionBeginTime = rowList.head.getAs[String]("created_time")
      var thisMenuCode = ""
      var menuBeginTime = ""
      var prefCreatedTime = ""
      var prefActionType = ""
      var count = 0

      for (row <- rowList) {
        var isSessionNew = false
        var appVersion = row.getAs[String]("app_version")
        //如果版本号低于3.3.2,则没有ACTION_HEART_BEAT事件,需要根据两次事件之间的时差对session进行切分,超过30分钟则拆分出一个session
        var createdTime = row.getAs[String]("created_time")
        if (appVersion < "3.3.2") { //版本号低于3.3.2,需要根据时差判断是否拆分session
          if (prefCreatedTime != "" && (createdTime.toLong - prefCreatedTime.toLong) > MyConfigSession.SESSION_GAP) { //本次action事件事件比
            count += 1
            isSessionNew = true
          }
        }
        var menuCode = row.getAs[String]("menu_code_offset")
        val action = row.getAs[String]("action")
        val actionType = row.getAs[String]("action_type")
        var sessionId = pseudo_session + count
        var referSessionId = ""
        if (count > 0) {
          referSessionId = pseudo_session + (count - 1)
        }
        //使用版本以及action_type限制是否入库
        var needPut = true
        if ("ACTION_VIEW".equals(actionType) || "ACTION_HEART".equals(actionType)) {
          if (appVersion >= "3.4.5") { //针对3.4.5之后的版本单独处理
            //如果本条记录为ACTION_ACTIVITY_RESUME类型或者ACTION_HEART类型,则入库
            if ("ACTION_ACTIVITY_RESUME".equals(action) || "ACTION_HEART".equals(actionType) ) {
              needPut = true
            }else{
              needPut = false
            }
            if ( thisMenuCode.equals(menuCode) && ("ACTION_HEART"==prefActionType  || "ACTION_VIEW" == prefActionType)){
              needPut = false
            }
          }else{
            if ( thisMenuCode.equals(menuCode) && ("ACTION_HEART"==prefActionType  || "ACTION_VIEW" == prefActionType) && !isSessionNew ) {
              needPut = false
            }
          }
          if(needPut){
            menuBeginTime = createdTime
          }
//          /*
//            排除掉冗余数据:
//            1.如果上条埋点和本条埋点都是ACTION_HEART_BEAT类数据,则本条记录不在统计进来
//            2.如果本条和上条记录都为ACTION_VIEW类型,且menu不变,则本条记录不入库
//           */
//          if ( thisMenuCode.equals(menuCode) && ("ACTION_HEART"==prefActionType  || "ACTION_VIEW" == prefActionType) && !isSessionNew ) {
//            needPut = false
//          }else{
//            needPut = true
//            menuBeginTime = createdTime
//          }
        }
        if (!thisMenuCode.equals(menuCode) || isSessionNew) {
          menuBeginTime = createdTime
          if(needPut ){//只有在确定该menu_code对应的记录入库后,才更新thisMenuCode以及对应的menu_code访问顺序
            thisMenuCode = menuCode
            actionStep = (actionStep.split("_")(0).toInt + 1).toString + "_0"
          }
        } else {
          if (!(prefActionType.equals(actionType) && "ACTION_VIEW".equals(actionType))) {
            actionStep = (actionStep.split("_")(0)) + "_" + (actionStep.split("_")(1).toInt + 1).toString
          }
        }
//        println(s"created_time=${createdTime},thisMenuCode=${thisMenuCode},actionType=${actionType},action=${action},isMenuChange=${!thisMenuCode.equals(menuCode)},prefActionType=${prefActionType},needPut=${needPut}" )
        prefCreatedTime = createdTime
        prefActionType = actionType
        if (needPut) {
          resList += (Row(sessionId,
            row.getAs[String]("device_token"),
            row.getAs[Integer]("user_id"),
            row.getAs[String]("mobile"),
            thisMenuCode,
            menuBeginTime,
            row.getAs[String]("action_code"),
            row.getAs[String]("position"),
            row.getAs[String]("label_value"),
            action, actionType, actionStep,
            row.getAs[String]("device_type"),
            row.getAs[String]("app_version"),
            createdTime,
            row.getAs[String]("date_time"),
            referSessionId
          ))
        }
      }
      resList.iterator
    })
    resRdd
  }


  /**
   * @Description 计算session_end_time,分两张情况:
   *             1.如果是低版本数据,session有拆分记录,则取session_id的首条记录时间以及末次记录时间作为session起始结束时间
   *             2.如果是高版本数据,session无拆分记录,则取session_id对应的pseudo_session首次以及末次出现的记录时间作为session起止时间
   * @param sourceDF 源数据
   * @return
   */
  def getSessionTail(sourceDF: DataFrame, sparkSession: SparkSession): DataFrame = {
    sourceDF.createOrReplaceTempView("select_res_table")

    /*
      1.排除掉所有拆分过新session的pseudo_session,计算出他们的起止时间
     */
    val sourceSql =
      """
        |  select concat(pseudo_session,'0') session_id,min(created_time) session_begin_time,max(created_time) session_end_time from select_res_table
        |  where pseudo_session not in (select distinct SUBSTRING(refer_session_id,1,length(refer_session_id)-1)  from refer_result_table where refer_session_id like '%0')
        |  group by  concat(pseudo_session,'0')
        |""".stripMargin
    //2.计算所有新拆分的sessionid的起止时间
    val sourceSql2 =
      """
        |select session_id,min(created_time) session_begin_time,max(created_time) session_end_time from refer_result_table
        | where SUBSTRING(session_id,1,length(session_id)-1)
        | in (select distinct SUBSTRING(refer_session_id,1,length(refer_session_id)-1)  from refer_result_table where refer_session_id!='')
        | group by  session_id
        |""".stripMargin
    val uniqueSessionEndDF = sparkSession.sql(sourceSql )
    println("uniqueSessionEndDF.show()=========")
    val firstSessionEndDF = sparkSession.sql(sourceSql2)
    println("firstSessionEndDF.show()=========")
    var sessionEndDF = uniqueSessionEndDF.union(firstSessionEndDF)
    sessionEndDF
  }
  /**
   * @Description 导入数据到表中
   * @param dataFrame    源数据
   * @param sparkSession SparkSession 环境
   * @param partitionDay 分区日期
   * @return void
   **/
  def loadData1(dataFrame: DataFrame, sparkSession: SparkSession, partitionDay: String,index:Integer): Unit = {
    dataFrame.createOrReplaceTempView("result_view")
    var insertSql = "insert overwrite"
    if(index!=1){
      insertSql = "insert into"
    }
    //补充新生session超过10个之后的session的session_begin_time与session_end_time
    val loadDataSql =
      s"""
         |${insertSql} table ${MyConfigSession.HIVE_TABLE4} partition(created_day='${partitionDay}')
         | select id,t1.session_id,device_token,user_id,mobile,menu_code,menu_begin_time,
         |    menu_end_time,
         |   menu_time_diff,
         | action_type,action_code,position,label_value,refer_menu_code,refer_action_code,refer_position,refer_label_value,refer_action_type,
         | action_step,device_type,app_version,created_time,date_time,
         |   session_begin_time,
         |   session_end_time,
         |  session_time_diff,
         | refer_session_id  from result_view t1
         """.stripMargin
    sparkSession.sql(loadDataSql)
  }

  def loadData(dataFrame: DataFrame, sparkSession: SparkSession, partitionDay: String,index:Integer): Unit = {
    dataFrame.createOrReplaceTempView("result_view")
    var insertSql = "insert overwrite"
    if(index!=1){
      insertSql = "insert into"
    }
    //补充新生session超过10个之后的session的session_begin_time与session_end_time
    val loadDataSql =
      s"""
         |${insertSql} table ${MyConfigSession.HIVE_TABLE4} partition(created_day='${partitionDay}')
         | select id,t1.session_id,device_token,user_id,mobile,menu_code,menu_begin_time,
         |  case when t1.menu_end_time is null  and t1.action_type='ACTION_VIEW'  then t2.session_end_time else t1.menu_end_time end menu_end_time,
         | (cast((case when t1.menu_end_time is null and t1.action_type='ACTION_VIEW' then t2.session_end_time else t1.menu_end_time end) as bigint)-cast(t1.menu_begin_time as bigint))/1000 menu_time_diff,
         | action_type,action_code,position,label_value,refer_menu_code,refer_action_code,refer_position,refer_label_value,refer_action_type,
         | action_step,device_type,app_version,created_time,date_time,
         | case when t1.session_begin_time is not null then t1.session_begin_time else t2.session_begin_time end session_begin_time,
         | case when t1.session_end_time is not null then t1.session_end_time else t2.session_end_time end  session_end_time,
         | case when t1.session_time_diff is not null then t1.session_time_diff else t2.session_time_diff end  session_time_diff,
         | refer_session_id  from result_view t1
         | left join (select session_id, min(created_time) session_begin_time,max(created_time) session_end_time,
         |      (cast(max(created_time) as bigint)-cast( min(created_time) as bigint))/1000 session_time_diff
         |  from result_view where session_time_diff is null group by session_id ) t2
         | on t1.session_id=t2.session_id
         """.stripMargin
    sparkSession.sql(loadDataSql)
  }
}

