package com.utils
import java.sql.{Connection, DriverManager}
import java.util.{Properties, UUID}

import org.apache.hadoop.hbase.util.{Bytes, MD5Hash}
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.sql.{Column, DataFrame, Row, SaveMode, SparkSession}
import org.apache.spark.{SparkConf, SparkContext}

import scala.collection.mutable.ArrayBuffer

/**
  * @Author zhenxin.ma
  * @Date 2019/8/30 11:59
  * @Version 1.0
  */
object UseUtil {
	def getUUID(): String = {
		val uuid: String = UUID.randomUUID().toString().replaceAll("-", "").toLowerCase
		uuid
	}

	def getMD5(a: String): String = {
		val md5: String = MD5Hash.getMD5AsHex(Bytes.toBytes(a))
		md5
	}

	//给DF增加列
	def addDataFrameColumn(df:DataFrame , map: Map[String,Column]): DataFrame={
		var result: DataFrame = df
		map.foreach(it => {
			result = result.withColumn(it._1,it._2)
		})
		result
	}


	/**
	 * @Description //TODO
	 * @param net NotNull
	 * @return java.lang.String
	 **/
	def netTypeMatch(net: String): String = {
		var net_type: String = null
		net match {
			case "2G" => net_type = "2"
			case "3G" => net_type = "3"
			case "4G" => net_type = "4"
			case "5G" => net_type = "5"
			case "6G" => net_type = "6"
			case "2G/3G/4G" => net_type = "4"
			case "2G/3G/4G/5G" => net_type = "5"
			case "2G/3G/4G/5G/6G" => net_type = "6"
			case "WIFI" => net_type = "1"
			case "WLAN" => net_type = "1"
			case _:Any => net_type = "0"
		}
		net_type
	}


	//Spark 任务设置配置
	def setConfigure(conf: SparkConf): Unit = {
		conf.set("spark.serializer", classOf[KryoSerializer].getName)
		// 序列化时使用的内存缓冲区大小
		conf.set("spark.kryoserializer.buffer.max", "128m")
		// 启用rdd压缩
		conf.set("spark.rdd.compress", "true")
		// 设置压缩格式为snappy, 默认也就是lz4, 这种压缩格式压缩比高, 速度快, 但是耗费的内存相对也多一些
		conf.set("spark.io.compression.codec", "snappy")
		// 设置压缩时使用的内存缓冲区大小
		conf.set("spark.io.compression.snappy.blockSize", "64k")
		// spark sql 在shuffle时产生的partition数量, 默认是200
		conf.set("spark.sql.shuffle.partitions", "200")
		// SortShuffleManager开启by-pass(不需要排序)模式的阈值, 默认为200, 在partition数量小于这个值时会开启by-pass模式
		conf.set("spark.shuffle.sort.bypassMergeThreshold", "210")
		//调节持久化的内存比例
		conf.set("spark.memory.useLegacyMode", "true")
		//设置shuffle过程中一个task拉取到上个stage的task的输出后，进行聚合操作时能够使用的Executor内存的比例，默认是0.2
		//shuffle操作比较多时,适当增加这个值,增加task执行需要的内存
		conf.set("spark.shuffle.memoryFraction","0.5")
		// rdd默认的并行度
		conf.set("spark.default.parallelism", "200")
		// shuffle溢写缓冲区, 默认32k, 在内存充足的情况下可以适当增加
		conf.set("spark.shuffle.file.buffer", "64k")
		// shuffle read task的buffer缓冲大小, 这个缓冲区大小决定了read task每次能拉取的数据量, 在内存充足的情况下可以适当增加
		conf.set("spark.reducer.maxSizeInFlight", "96m")
		//设置字符串
		conf.set("spark.debug.maxToStringFields","100")
		//启用自动设置 Shuffle Reducer,默认false
		conf.set("spark.sql.adaptive.enabled","true")
	}


	/**
	 * @Description	读取配置表,筛选符合的action_type类型
	 * @param action
	 * @return java.lang.String
	 **/
	def getActionType(action:String): String = {
		var actionType: String = action
		actionType match {
			case "ACTION_APP_BACKGROUND" => actionType = "ACTION_BACKGROUND"
			case "ACTION_CLICK" => actionType = "ACTION_CLICK"
			case "ACTION_WEB_CLICK" => actionType = "ACTION_CLICK"
			case "WEB_ACTION_CLICK" => actionType = "ACTION_CLICK"
			case "ACTION_EQUIP_INFO" => actionType = "ACTION_EQUIP_INFO"
			case "ACTION_EXIT_APP" => actionType = "ACTION_EXIT"
			case "ACTION_PAGE_EXPOSE" => actionType = "ACTION_EXPOSE"
			case "ACTION_RECENT_APPS" => actionType = "ACTION_RECENT"
			case "ACTION_APP_START" => actionType = "ACTION_START"
			case "ACTION_ACTIVITY_CREATE" => actionType = "ACTION_VIEW"
			case "ACTION_LOAD_URL" => actionType = "ACTION_VIEW"
			case "ACTION_WEB_ENTER" => actionType = "ACTION_VIEW"
			case "ACTION_WEB_PAGE_IN" => actionType = "ACTION_VIEW"
			case _ => actionType =" "
		}
		actionType
	}


	/**
	 * @Description 根据SQL获取广播变量
	 * @param sparkSession
	 * @param sourSql
	 * @param colName1
	 * @param colName2
	 * @return org.apache.spark.broadcast.Broadcast<scala.collection.immutable.Map<java.lang.String,java.lang.Object>>
	 **/
	def getBroadcast(sparkSession: SparkSession,sourSql: String,colName1: String,colName2: String):Broadcast[Map[String,String]] = {
		import sparkSession.implicits._
		val df: DataFrame = sparkSession.sql(sourSql)
		val map: Map[String, String] = df.map(row => {
			(row.getAs[String](colName1), row.getAs[String](colName2))
		}).collect().toMap
		val broad: Broadcast[Map[String, String]] = sparkSession.sparkContext.broadcast(map)
		broad
	}



	/**
	  * 将DataFrame保存为Mysql表
	  * @param dataFrame 需要保存的dataFrame
	  * @param tableName 保存的mysql 表名
	  * @param saveMode  保存的模式 ：Append、Overwrite、ErrorIfExists、Ignore
	  * @param proPath   配置文件的路径
	  */
	def saveASMysqlTable(dataFrame: DataFrame, tableName: String, saveMode: SaveMode, proPath: String) = {
		var table = tableName
		val properties: Properties = getProPerties(proPath)
		val prop = new Properties //配置文件中的key 与 spark 中的 key 不同 所以创建prop 按照spark 的格式 进行配置数据库
		prop.setProperty("user", properties.getProperty("mysql.username"))
		prop.setProperty("password", properties.getProperty("mysql.password"))
		prop.setProperty("driver", properties.getProperty("mysql.driver"))
		prop.setProperty("url", properties.getProperty("mysql.url"))
		if (saveMode == SaveMode.Overwrite) {
			var conn: Connection = null
			try {
				conn = DriverManager.getConnection(
					prop.getProperty("url"),
					prop.getProperty("user"),
					prop.getProperty("password")
				)
				val stmt = conn.createStatement
				table = table.toUpperCase
				stmt.execute(s"truncate table $table") //此操作的目的是在覆盖的时候不删除原来的表，避免数据的类型全部变为TEXT类型
				conn.close()
			}
			catch {
				case e: Exception =>
					println("MySQL Error:")
					e.printStackTrace()
			}
		}
		dataFrame.write.mode(SaveMode.Append).jdbc(prop.getProperty("url"), tableName, prop)
	}

	/**
	  * 获取配置文件
	  * @param proPath	配置文件路径,放在resources目录下,路径为: /myconfig.properties
	  * @return
	  */
	def getProPerties(proPath: String): Properties = {
		val properties: Properties = new Properties()
		properties.load(this.getClass.getResourceAsStream(proPath))
		properties
	}


}
