修改了逻辑

c1230591 · zhenxin.ma · a23da01d · c1230591
--- a/src/main/scala/com/data/SyncAttachRegionResult.scala
+++ b/src/main/scala/com/data/SyncAttachRegionResult.scala
@@ -57,54 +57,8 @@ object SyncAttachRegionResult {
 			val broadcast: Broadcast[Array[(Long, Long, Long, Long, Long)]] =
 				sparkSession.sparkContext.broadcast(ppact)
-			val reDS: Dataset[(Long, Long, Long, Long, Long, Long)] = df1.mapPartitions(it => {
+			//转换为字段名的DataFrame
-				//存储最终的结果,每行代表一个元组
+			val reDF: DataFrame = getDataFrame(df1,sparkSession,broadcast)
-				val tuples: ListBuffer[(Long, Long, Long, Long, Long, Long)] = ListBuffer[(Long, Long, Long, Long, Long, Long)]()
-				val list: List[Row] = it.toList
-				list.foreach(row => {
-					var count: Int = 0
-					//注意:这里有类型的转换
-					val project_id: Long = row.getAs[Int]("project_id").toLong
-					val doctor_id: Long = row.getAs[Int]("doctor_id").toLong
-					val province_id: Long = row.getAs[Long]("province_id")
-					val city_id: Long = row.getAs[Long]("city_id")
-					val county_id: Long = row.getAs[Long]("county_id")
-					val town_id: Long = row.getAs[Long]("town_id")
-					if (province_id != 0) {
-						count = count + 1
-					}
-					if (city_id != 0) {
-						count = count + 1
-					}
-					if (county_id != 0) {
-						count = count + 1
-					}
-					if (town_id != 0) {
-						count = count + 1
-					}
-					//通过广播变量,在广播变量中查看
-					val broad: Array[(Long, Long, Long, Long, Long)] = broadcast.value
-					broad.foreach(tuple => {
-						if (count == 0 && project_id == tuple._1) {
-							tuples += ((project_id, doctor_id, tuple._2, tuple._3, tuple._4, tuple._5))
-						} else if (count == 1 && project_id == tuple._1 && province_id == tuple._2) {
-							tuples += ((project_id, doctor_id, tuple._2, tuple._3, tuple._4, tuple._5))
-						} else if (count == 2 && project_id == tuple._1 && city_id == tuple._3) {
-							tuples += ((project_id, doctor_id, tuple._2, tuple._3, tuple._4, tuple._5))
-						} else if (count == 3 && project_id == tuple._1 && county_id == tuple._4) {
-							tuples += ((project_id, doctor_id, tuple._2, tuple._3, tuple._4, tuple._5))
-						} else if (count == 4 && project_id == tuple._1 && town_id == tuple._5) {
-							tuples += ((project_id, doctor_id, tuple._2, tuple._3, tuple._4, tuple._5))
-						}
-					})
-				})
-				tuples.iterator
-			})
-			//转换为字段名
-			val reDF: DataFrame = reDS.toDF("project_id","doctor_id","province_id","city_id","county_id","town_id")
 			//写入到parquet文件中
 			reDF.write.mode(SaveMode.Overwrite).format("parquet")
 				.save(s"${SyncDataConfig.PARQUET_PATH}${SyncDataConfig.DATABASE2}.${SyncDataConfig.Hive_TABLE6}")
@@ -138,4 +92,53 @@ object SyncAttachRegionResult {
 			}
 		}
 	}
+	def getDataFrame(data:DataFrame ,sparkSession: SparkSession,broadcast: Broadcast[Array[(Long, Long, Long, Long, Long)]]): DataFrame= {
+		import sparkSession.implicits._
+		val reDS: Dataset[(Long, Long, Long, Long, Long, Long)] = data.mapPartitions(it => {
+			//存储最终的结果,每行代表一个元组
+			val tuples: ListBuffer[(Long, Long, Long, Long, Long, Long)] = ListBuffer[(Long, Long, Long, Long, Long, Long)]()
+			val list: List[Row] = it.toList
+			list.foreach(row => {
+				var count: Int = 0
+				//注意:这里有类型的转换
+				val project_id: Long = row.getAs[Int]("project_id").toLong
+				val doctor_id: Long = row.getAs[Int]("doctor_id").toLong
+				val province_id: Long = row.getAs[Long]("province_id")
+				val city_id: Long = row.getAs[Long]("city_id")
+				val county_id: Long = row.getAs[Long]("county_id")
+				val town_id: Long = row.getAs[Long]("town_id")
+				if (province_id != 0) {
+					count = count + 1
+				}
+				if (city_id != 0) {
+					count = count + 1
+				}
+				if (county_id != 0) {
+					count = count + 1
+				}
+				if (town_id != 0) {
+					count = count + 1
+				}
+				//通过广播变量,在广播变量中查看
+				val broad: Array[(Long, Long, Long, Long, Long)] = broadcast.value
+				broad.foreach(tuple => {
+					if (count == 0 && project_id == tuple._1) {
+						tuples += ((project_id, doctor_id, tuple._2, tuple._3, tuple._4, tuple._5))
+					} else if (count == 1 && project_id == tuple._1 && province_id == tuple._2) {
+						tuples += ((project_id, doctor_id, tuple._2, tuple._3, tuple._4, tuple._5))
+					} else if (count == 2 && project_id == tuple._1 && city_id == tuple._3) {
+						tuples += ((project_id, doctor_id, tuple._2, tuple._3, tuple._4, tuple._5))
+					} else if (count == 3 && project_id == tuple._1 && county_id == tuple._4) {
+						tuples += ((project_id, doctor_id, tuple._2, tuple._3, tuple._4, tuple._5))
+					} else if (count == 4 && project_id == tuple._1 && town_id == tuple._5) {
+						tuples += ((project_id, doctor_id, tuple._2, tuple._3, tuple._4, tuple._5))
+					}
+				})
+			})
+			tuples.iterator
+		})
+		val result: DataFrame = reDS.toDF("project_id","doctor_id","province_id","city_id","county_id","town_id")
+		result
+	}
 }