Skip to content
项目
群组
代码片段
帮助
正在加载...
帮助
提交反馈
为 GitLab 提交贡献
登录
切换导航
L
label
项目
项目
详情
动态
版本
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
成员
成员
收起侧边栏
Close sidebar
动态
分支图
统计图
提交
打开侧边栏
zhenxin.ma
label
提交
d458386b
提交
d458386b
编写于
11月 26, 2020
作者:
wuyunfeng
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
修改SessionProcess为批量执行
上级
1a0bc349
变更
1
显示空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
64 行增加
和
53 行删除
+64
-53
SessionProcess.scala
src/main/scala/com/session/SessionProcess.scala
+64
-53
未找到文件。
src/main/scala/com/session/SessionProcess.scala
浏览文件 @
d458386b
...
@@ -51,8 +51,16 @@ object SessionProcess {
...
@@ -51,8 +51,16 @@ object SessionProcess {
val
sessionProcess
:
SessionProcess
=
SessionProcess
()
val
sessionProcess
:
SessionProcess
=
SessionProcess
()
//step1:获取源数据,重新分区,产生shuffle,Spark读Hive默认的分区数太少,并对数据去重
//step1:获取源数据,重新分区,产生shuffle,Spark读Hive默认的分区数太少,并对数据去重
var
sourceDF
:
DataFrame
=
sessionProcess
.
sparkSession
.
sql
(
MyConfigSession
.
SOURCE_SQL
+
s
" and created_day='${scnData}'"
).
repartition
(
200
).
distinct
()
var
sourceDF
:
DataFrame
=
sessionProcess
.
sparkSession
.
sql
(
MyConfigSession
.
SOURCE_SQL
+
s
" and created_day='${scnData}'"
).
repartition
(
200
).
distinct
()
var
conditionGroup
=
List
(
"<='2'"
,
"between '3' and '5'"
,
"between '6' and '8'"
,
"between '9' and 'b'"
,
">='c'"
)
var
dataCount
=
0
var
index
=
0
sourceDF
.
persist
(
StorageLevel
.
MEMORY_AND_DISK_SER
)
for
(
condition
<-
conditionGroup
){
index
+=
1
val
slideDF
=
sourceDF
.
where
(
s
" SUBSTRING(pseudo_session,1,1) ${condition}"
).
repartition
(
100
)
//step2:抽取出当天pseudo_session对应的非空的device_token,doctor_id,mobile,补充到对应的pseudo_session下这几项为空的记录中
//step2:抽取出当天pseudo_session对应的非空的device_token,doctor_id,mobile,补充到对应的pseudo_session下这几项为空的记录中
val
groupRdd
=
s
ourc
eDF
.
rdd
.
groupBy
(
r
=>
r
.
getAs
[
String
](
"pseudo_session"
))
val
groupRdd
=
s
lid
eDF
.
rdd
.
groupBy
(
r
=>
r
.
getAs
[
String
](
"pseudo_session"
))
val
resRdd
=
groupRdd
.
flatMap
(
g
=>
{
val
resRdd
=
groupRdd
.
flatMap
(
g
=>
{
val
pseudo_session
=
g
.
_1
val
pseudo_session
=
g
.
_1
val
resList
:
ListBuffer
[
Row
]
=
new
ListBuffer
[
Row
]()
val
resList
:
ListBuffer
[
Row
]
=
new
ListBuffer
[
Row
]()
...
@@ -106,11 +114,10 @@ object SessionProcess {
...
@@ -106,11 +114,10 @@ object SessionProcess {
val
resDF
=
sessionProcess
.
sparkSession
.
createDataFrame
(
resRdd
,
sourceDF
.
schema
)
val
resDF
=
sessionProcess
.
sparkSession
.
createDataFrame
(
resRdd
,
sourceDF
.
schema
)
resDF
.
persist
(
StorageLevel
.
MEMORY_AND_DISK_SER
)
resDF
.
persist
(
StorageLevel
.
MEMORY_AND_DISK_SER
)
println
(
"resDF.show=======>"
)
println
(
"resDF.show=======>"
)
// resDF.show()
// resDF.show()
val
data
:
RDD
[
Row
]
=
resDF
.
rdd
.
mapPartitions
(
sessionProcess
.
filterRows
)
val
data
:
RDD
[
Row
]
=
resDF
.
rdd
.
mapPartitions
(
sessionProcess
.
filterRows
)
println
(
"---------------------------------------process columns-------------------------------------------"
)
println
(
"---------------------------------------process columns-------------------------------------------"
)
val
baseDF
:
DataFrame
=
data
.
mapPartitions
(
sessionProcess
.
processColumns
)
val
baseDF
:
DataFrame
=
data
.
mapPartitions
(
sessionProcess
.
processColumns
)
.
toDF
(
"pseudo_session"
,
"user_id"
,
"mobile"
,
"device_token"
,
"user_token"
,
"view_class"
,
"view_path"
,
.
toDF
(
"pseudo_session"
,
"user_id"
,
"mobile"
,
"device_token"
,
"user_token"
,
"view_class"
,
"view_path"
,
"action_type"
,
"component_tag"
,
"menu_code"
,
"action_code"
,
"position"
,
"label_value"
,
"label_class"
,
"module_class1"
,
"module_class2"
,
"app_version"
,
"action_type"
,
"component_tag"
,
"menu_code"
,
"action_code"
,
"position"
,
"label_value"
,
"label_class"
,
"module_class1"
,
"module_class2"
,
"app_version"
,
...
@@ -120,16 +127,20 @@ object SessionProcess {
...
@@ -120,16 +127,20 @@ object SessionProcess {
//默认缓存级别是:MEMORY_AND_DISK
//默认缓存级别是:MEMORY_AND_DISK
sessionIdDF
.
persist
(
StorageLevel
.
MEMORY_AND_DISK_SER
)
sessionIdDF
.
persist
(
StorageLevel
.
MEMORY_AND_DISK_SER
)
println
(
"sessionIdDF.show=======>"
)
println
(
"sessionIdDF.show=======>"
)
// sessionIdDF.show()
println
(
"-------------------------------match user_id 逻辑-------------------------------------------------"
)
println
(
"-------------------------------match user_id 逻辑-------------------------------------------------"
)
val
dwFactLogSession
:
DataFrame
=
sessionProcess
.
matchUserId
(
sessionIdDF
,
sessionProcess
.
sparkSession
,
scnData
)
val
dwFactLogSession
:
DataFrame
=
sessionProcess
.
matchUserId
(
sessionIdDF
,
sessionProcess
.
sparkSession
,
scnData
)
println
(
"dwFactLogSession.show=======>"
)
println
(
"dwFactLogSession.show=======>"
)
// dwFactLogSession.show()
println
(
"-----------------create view fact_log_session and load to dw_fact_log_session--------------------"
)
println
(
"-----------------create view fact_log_session and load to dw_fact_log_session--------------------"
)
dwFactLogSession
.
createOrReplaceTempView
(
"fact_log_session"
)
dwFactLogSession
.
createOrReplaceTempView
(
"fact_log_session"
)
var
insertMode
=
"insert overwrite"
if
(
index
!=
1
){
insertMode
=
"insert into"
}
val
loadDataSql
=
val
loadDataSql
=
s
"insert overwrite
table ${MyConfigSession.HIVE_TABLE1} partition(created_day='${scnData}') select * from fact_log_session distribute by rand()"
s
"${insertMode}
table ${MyConfigSession.HIVE_TABLE1} partition(created_day='${scnData}') select * from fact_log_session distribute by rand()"
sessionProcess
.
sparkSession
.
sql
(
loadDataSql
)
sessionProcess
.
sparkSession
.
sql
(
loadDataSql
)
dataCount
=
dataCount
+
dwFactLogSession
.
count
().
toInt
}
println
(
"----------------------------------update task record table---------------------------------------"
)
println
(
"----------------------------------update task record table---------------------------------------"
)
//任务执行成功,更新 Mysql record 配置表
//任务执行成功,更新 Mysql record 配置表
...
@@ -141,7 +152,7 @@ object SessionProcess {
...
@@ -141,7 +152,7 @@ object SessionProcess {
val
upreSta
:
PreparedStatement
=
connSql
.
prepareStatement
(
updateSQL
)
val
upreSta
:
PreparedStatement
=
connSql
.
prepareStatement
(
updateSQL
)
upreSta
.
setString
(
1
,
"1"
)
upreSta
.
setString
(
1
,
"1"
)
upreSta
.
setString
(
2
,
endTime
)
upreSta
.
setString
(
2
,
endTime
)
upreSta
.
setInt
(
3
,
sessionIdDF
.
count
().
toI
nt
)
upreSta
.
setInt
(
3
,
dataCou
nt
)
//更新表数据
//更新表数据
upreSta
.
executeUpdate
()
upreSta
.
executeUpdate
()
//关闭连接
//关闭连接
...
...
写
预览
Markdown
格式
0%
请重试
or
附加一个文件
附加文件
取消
您添加了
0
人
到此讨论。请谨慎行事。
先完成此消息的编辑!
取消
想要评论请
注册
或
登录