Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
transactionLogMask
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
谢森
transactionLogMask
Commits
aa763a65
Commit
aa763a65
authored
Oct 22, 2020
by
王海鹰
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
白名单逻辑优化
正则表达式完善
parent
cee08142
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
202 additions
and
140 deletions
+202
-140
src/main/java/com/zorkdata/datamask/TransactionLogMask.java
src/main/java/com/zorkdata/datamask/TransactionLogMask.java
+1
-1
src/main/java/com/zorkdata/datamask/constant/ParamConstants.java
...n/java/com/zorkdata/datamask/constant/ParamConstants.java
+1
-5
src/main/java/com/zorkdata/datamask/constant/RegExpConstants.java
.../java/com/zorkdata/datamask/constant/RegExpConstants.java
+22
-0
src/main/java/com/zorkdata/datamask/domain/HDFSLogQueryParam.java
.../java/com/zorkdata/datamask/domain/HDFSLogQueryParam.java
+8
-4
src/main/java/com/zorkdata/datamask/domain/KafkaMsgQueryParam.java
...java/com/zorkdata/datamask/domain/KafkaMsgQueryParam.java
+8
-3
src/main/java/com/zorkdata/datamask/hadoop/HadoopMask.java
src/main/java/com/zorkdata/datamask/hadoop/HadoopMask.java
+21
-13
src/main/java/com/zorkdata/datamask/kafka/KafkaMask.java
src/main/java/com/zorkdata/datamask/kafka/KafkaMask.java
+6
-9
src/main/java/com/zorkdata/datamask/util/MaskRegexConfig.java
...main/java/com/zorkdata/datamask/util/MaskRegexConfig.java
+0
-30
src/main/java/com/zorkdata/datamask/util/MaskUtil.java
src/main/java/com/zorkdata/datamask/util/MaskUtil.java
+95
-56
src/main/java/com/zorkdata/datamask/util/ParamUtils.java
src/main/java/com/zorkdata/datamask/util/ParamUtils.java
+24
-6
src/main/resources/application.yml
src/main/resources/application.yml
+16
-13
No files found.
src/main/java/com/zorkdata/datamask/TransactionLogMask.java
View file @
aa763a65
...
...
@@ -23,7 +23,7 @@ public class TransactionLogMask {
public
static
void
main
(
String
[]
args
)
throws
Exception
{
if
(
args
.
length
!=
PARAM_LENGTH
)
{
String
error
=
"参数缺失,请输入配置文件,例如: --conf
--conf
/opt/TransactionLogMask/application.yml"
;
String
error
=
"参数缺失,请输入配置文件,例如: --conf /opt/TransactionLogMask/application.yml"
;
logger
.
error
(
error
);
throw
new
RuntimeException
(
error
);
}
...
...
src/main/java/com/zorkdata/datamask/constant/ParamConstants.java
View file @
aa763a65
...
...
@@ -24,9 +24,5 @@ public interface ParamConstants {
String
HDFS
=
"hdfs"
;
String
KAFKA
=
"kafka"
;
String
NAME_REG_EXP
=
"name_reg_exp"
;
String
MOBILE_REG_EXP
=
"mobile_reg_exp"
;
String
PHONE_REG_EXP
=
"phone_reg_exp"
;
String
EMAIL_REG_EXP
=
"email_reg_exp"
;
String
FIELDS_WHITE_LIST
=
"fieldsWhiteList"
;
}
src/main/java/com/zorkdata/datamask/constant/RegExpConstants.java
0 → 100644
View file @
aa763a65
package
com.zorkdata.datamask.constant
;
/**
* Description :
*
* @author : wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* Date : Create in 2020/10/20 15:32
*/
public
interface
RegExpConstants
{
String
REG_EXP
=
"reg_exp"
;
String
NAME_REG_EXP
=
"name"
;
String
MOBILE_REG_EXP
=
"mobile"
;
String
PHONE_REG_EXP
=
"phone"
;
String
EMAIL_REG_EXP
=
"email"
;
String
ID15_REG_EXP
=
"id15"
;
String
ID18_REG_EXP
=
"id18"
;
String
BANK_CARD_REG_EXP
=
"bank_card"
;
String
ADDRESS_REG_EXP
=
"address"
;
String
IP_REG_EXP
=
"ip"
;
String
MAC_REG_EXP
=
"mac"
;
}
src/main/java/com/zorkdata/datamask/domain/H
adoop
Param.java
→
src/main/java/com/zorkdata/datamask/domain/H
DFSLogQuery
Param.java
View file @
aa763a65
...
...
@@ -2,6 +2,8 @@ package com.zorkdata.datamask.domain;
import
lombok.Data
;
import
java.io.Serializable
;
/**
* @author 谢森
* @Description 参数实体类
...
...
@@ -9,7 +11,10 @@ import lombok.Data;
* @Date 2020/10/21 14:33
*/
@Data
public
class
HadoopParam
{
public
class
HDFSLogQueryParam
implements
Serializable
{
private
static
final
long
serialVersionUID
=
1L
;
private
String
source
;
private
String
hdfsSrc
;
private
String
hdfsDest
;
...
...
@@ -18,7 +23,7 @@ public class HadoopParam {
private
Long
startTime
;
private
Long
endTime
;
public
H
adoop
Param
(
String
source
,
String
hdfsSrc
,
String
hdfsDest
,
String
core
,
String
date
,
Long
startTime
,
public
H
DFSLogQuery
Param
(
String
source
,
String
hdfsSrc
,
String
hdfsDest
,
String
core
,
String
date
,
Long
startTime
,
Long
endTime
)
{
this
.
source
=
source
;
this
.
hdfsSrc
=
hdfsSrc
;
...
...
@@ -28,5 +33,4 @@ public class HadoopParam {
this
.
startTime
=
startTime
;
this
.
endTime
=
endTime
;
}
}
src/main/java/com/zorkdata/datamask/domain/KafkaParam.java
→
src/main/java/com/zorkdata/datamask/domain/Kafka
MsgQuery
Param.java
View file @
aa763a65
...
...
@@ -2,6 +2,8 @@ package com.zorkdata.datamask.domain;
import
lombok.Data
;
import
java.io.Serializable
;
/**
* @author 谢森
* @Description kafka 参数实体类
...
...
@@ -9,7 +11,10 @@ import lombok.Data;
* @Date 2020/10/21 15:07
*/
@Data
public
class
KafkaParam
{
public
class
KafkaMsgQueryParam
implements
Serializable
{
private
static
final
long
serialVersionUID
=
1L
;
private
String
servers
;
private
String
zookeeper
;
private
String
topic
;
...
...
@@ -19,7 +24,7 @@ public class KafkaParam {
private
Long
startTime
;
private
Long
endTime
;
public
KafkaParam
(
String
servers
,
String
zookeeper
,
String
topic
,
String
hdfsDest
,
String
core
,
String
date
,
public
Kafka
MsgQuery
Param
(
String
servers
,
String
zookeeper
,
String
topic
,
String
hdfsDest
,
String
core
,
String
date
,
Long
startTime
,
Long
endTime
)
{
this
.
servers
=
servers
;
this
.
zookeeper
=
zookeeper
;
...
...
src/main/java/com/zorkdata/datamask/hadoop/HadoopMask.java
View file @
aa763a65
...
...
@@ -2,9 +2,10 @@ package com.zorkdata.datamask.hadoop;
import
com.alibaba.fastjson.JSON
;
import
com.alibaba.fastjson.TypeReference
;
import
com.zorkdata.datamask.constant.ParamConstants
;
import
com.zorkdata.datamask.constant.StrConstants
;
import
com.zorkdata.datamask.domain.LogData
;
import
com.zorkdata.datamask.domain.H
adoop
Param
;
import
com.zorkdata.datamask.domain.H
DFSLogQuery
Param
;
import
com.zorkdata.datamask.domain.TransactionLog
;
import
com.zorkdata.datamask.util.DateUtils
;
import
com.zorkdata.datamask.util.MaskUtil
;
...
...
@@ -38,14 +39,15 @@ import java.io.IOException;
import
java.net.URI
;
import
java.net.URISyntaxException
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.List
;
import
java.util.Map
;
/**
*
@author 谢森
*
@Description hadoop 文件数据脱敏
* @
Email xiesen310@163.com
*
@Date 2020/10/21 14:29
*
Description: hdfs日志文件脱敏
*
* @
author: wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
*
Date: Create in 2020/9/23 9:30
*/
public
class
HadoopMask
{
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
HadoopMask
.
class
);
...
...
@@ -61,12 +63,18 @@ public class HadoopMask {
env
.
setParallelism
(
1
);
JobConf
jobConf
=
new
JobConf
();
jobConf
.
set
(
"avro.output.schema"
,
TransactionLog
.
SCHEMA
$
.
toString
(
true
));
H
adoopParam
hadoop
Param
=
ParamUtils
.
initHadoopConf
(
conf
);
H
DFSLogQueryParam
hdfsLogQuery
Param
=
ParamUtils
.
initHadoopConf
(
conf
);
ParameterTool
parameterTool
=
ParameterTool
.
fromMap
(
conf
);
env
.
getConfig
().
setGlobalJobParameters
(
parameterTool
);
List
<
String
>
logFiles
=
filterHdfsLogFiles
(
hadoopParam
.
getHdfsSrc
(),
hadoopParam
.
getDate
(),
hadoopParam
.
getStartTime
(),
hadoopParam
.
getEndTime
());
MaskUtil
maskUtil
=
ParamUtils
.
initMaskUtil
(
conf
);
String
[]
fieldsWhiteListArray
=
String
.
valueOf
(
conf
.
get
(
ParamConstants
.
FIELDS_WHITE_LIST
)).
trim
().
split
(
","
);
ArrayList
<
String
>
fieldsWhiteList
=
new
ArrayList
<
String
>(
fieldsWhiteListArray
.
length
);
Collections
.
addAll
(
fieldsWhiteList
,
fieldsWhiteListArray
);
List
<
String
>
logFiles
=
filterHdfsLogFiles
(
hdfsLogQueryParam
.
getHdfsSrc
(),
hdfsLogQueryParam
.
getDate
(),
hdfsLogQueryParam
.
getStartTime
(),
hdfsLogQueryParam
.
getEndTime
());
for
(
String
logFile
:
logFiles
)
{
/**
...
...
@@ -88,14 +96,14 @@ public class HadoopMask {
new
TypeReference
<
LogData
>()
{
});
//根据日志事件的核心信息做过滤
if
(
null
!=
h
adoopParam
.
getCore
()
&&
logData
.
getDimensions
().
get
(
"hostname"
).
indexOf
(
"c9"
)
>
-
1
)
{
if
(
null
!=
h
dfsLogQueryParam
.
getCore
()
&&
logData
.
getDimensions
().
get
(
"hostname"
).
indexOf
(
"c9"
)
>
-
1
)
{
//根据日志事件的timestamp做过滤
Long
timestamp
=
DateUtils
.
utc2timestamp
(
logData
.
getTimestamp
());
boolean
flag
=
null
!=
timestamp
&&
timestamp
>
h
adoop
Param
.
getStartTime
()
&&
timestamp
<
h
adoop
Param
.
getEndTime
()
||
Boolean
.
TRUE
;
boolean
flag
=
null
!=
timestamp
&&
timestamp
>
h
dfsLogQuery
Param
.
getStartTime
()
&&
timestamp
<
h
dfsLogQuery
Param
.
getEndTime
()
||
Boolean
.
TRUE
;
if
(
flag
)
{
Map
maskResult
=
MaskUtil
.
mask
(
logData
.
getNormalFields
()
);
Map
maskResult
=
maskUtil
.
mask
(
logData
.
getNormalFields
(),
fieldsWhiteList
);
logData
.
setNormalFields
(
maskResult
);
collector
.
collect
(
logData
);
}
...
...
@@ -105,7 +113,7 @@ public class HadoopMask {
// 获取目标hdfs的输出目录
String
logFileName
=
logFile
.
split
(
StrConstants
.
FILE_SEPARATOR
)[
logFile
.
split
(
StrConstants
.
FILE_SEPARATOR
).
length
-
1
];
String
filePath
=
h
adoopParam
.
getHdfsSrc
()
+
logFileName
.
replace
(
StrConstants
.
AVRO_SUFFIX
,
String
filePath
=
h
dfsLogQueryParam
.
getHdfsDest
()
+
logFileName
.
replace
(
StrConstants
.
AVRO_SUFFIX
,
StrConstants
.
EMPTY_STR
);
HadoopOutputFormat
hadoopOutputFormat
=
new
HadoopOutputFormat
<>(
new
AvroOutputFormat
(),
jobConf
);
FileOutputFormat
.
setOutputPath
(
jobConf
,
new
Path
(
filePath
));
...
...
src/main/java/com/zorkdata/datamask/kafka/KafkaMask.java
View file @
aa763a65
package
com.zorkdata.datamask.kafka
;
import
com.zorkdata.datamask.domain.HadoopParam
;
import
com.zorkdata.datamask.domain.KafkaParam
;
import
com.zorkdata.datamask.domain.KafkaMsgQueryParam
;
import
com.zorkdata.datamask.util.ParamUtils
;
import
org.apache.flink.api.common.serialization.SimpleStringSchema
;
import
org.apache.flink.api.java.utils.ParameterTool
;
...
...
@@ -13,7 +12,6 @@ import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import
java.text.SimpleDateFormat
;
import
java.time.ZoneId
;
import
java.util.Date
;
import
java.util.Map
;
import
java.util.Properties
;
...
...
@@ -35,14 +33,13 @@ public class KafkaMask {
env
.
setParallelism
(
1
);
SimpleDateFormat
sdf
=
new
SimpleDateFormat
(
"yyyy-MM-dd"
);
Kafka
Param
kafka
Param
=
ParamUtils
.
initKafkaConf
(
conf
);
Kafka
MsgQueryParam
kafkaMsgQuery
Param
=
ParamUtils
.
initKafkaConf
(
conf
);
ParameterTool
parameterTool
=
ParameterTool
.
fromMap
(
conf
);
env
.
getConfig
().
setGlobalJobParameters
(
parameterTool
);
Properties
props
=
new
Properties
();
props
.
put
(
"bootstrap.servers"
,
kafkaParam
.
getServers
());
props
.
put
(
"zookeeper.connect"
,
kafkaParam
.
getZookeeper
());
props
.
put
(
"bootstrap.servers"
,
kafka
MsgQuery
Param
.
getServers
());
props
.
put
(
"zookeeper.connect"
,
kafka
MsgQuery
Param
.
getZookeeper
());
props
.
put
(
"group.id"
,
"group1"
);
props
.
put
(
"enable.auto.commit"
,
false
);
props
.
put
(
"key.deserializer"
,
"org.apache.kafka.common.serialization.StringDeserializer"
);
...
...
@@ -50,11 +47,11 @@ public class KafkaMask {
props
.
put
(
"auto.offset.reset"
,
"earliest"
);
props
.
put
(
"max.poll.records"
,
1000
);
SingleOutputStreamOperator
<
String
>
dataStreamSource
=
env
.
addSource
(
new
FlinkKafkaConsumer
<>(
kafkaParam
.
getTopic
(),
env
.
addSource
(
new
FlinkKafkaConsumer
<>(
kafka
MsgQuery
Param
.
getTopic
(),
new
SimpleStringSchema
(),
props
)).
setParallelism
(
1
);
// TODO 根据date、startTime、endTime过滤
BucketingSink
<
String
>
hdfsSink
=
new
BucketingSink
<>(
kafkaParam
.
getHdfsDest
());
BucketingSink
<
String
>
hdfsSink
=
new
BucketingSink
<>(
kafka
MsgQuery
Param
.
getHdfsDest
());
//创建一个按照时间创建目录的bucketer,默认是yyyy-MM-dd--HH,时区默认是美国时间。这里我都改了,一天创建一次目录,上海时间
hdfsSink
.
setBucketer
(
new
DateTimeBucketer
<
String
>(
"yyyy-MM-dd"
,
ZoneId
.
of
(
"Asia/Shanghai"
)));
//设置每个文件的最大大小 ,默认是384M(1024 * 1024 * 384)
...
...
src/main/java/com/zorkdata/datamask/util/MaskRegexConfig.java
deleted
100644 → 0
View file @
cee08142
package
com.zorkdata.datamask.util
;
/**
* Description :
*
* @author : wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* Date : Create in 2020/10/19 16:43
*/
public
class
MaskRegexConfig
{
private
String
fieldsWhiteList
;
private
String
nameRegExp
;
private
String
mobileRegExp
;
private
String
phoneRegExp
;
private
String
emailRegExp
;
private
String
idRegExp15
;
private
String
idRegExp18
;
private
String
addressRegExp
;
private
String
ipRegExp
;
private
String
macRegExp
;
}
src/main/java/com/zorkdata/datamask/util/MaskUtil.java
View file @
aa763a65
package
com.zorkdata.datamask.util
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
java.io.Serializable
;
import
java.util.*
;
import
java.util.regex.Matcher
;
import
java.util.regex.Pattern
;
/**
* Description
:
* Description:
*
* @author : wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* Date : Create in 2020/9/23 9:30
* RegularExpression
* @author: wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* Date: Create in 2020/9/23 9:30
*/
public
class
MaskUtil
{
public
class
MaskUtil
implements
Serializable
{
private
static
final
long
serialVersionUID
=
1L
;
public
static
final
int
DEFAULT_MAP_CAPACITY
=
16
;
private
MaskRegexConfig
maskRegexConfig
;
/**
* 姓名正则
*/
static
Pattern
namePattern
=
Pattern
.
compile
(
"([\\u4e00-\\u9fa5]{1,20}|[a-zA-Z\\.\\s]{1,20})"
);
private
String
nameRegExp
;
/**
* 手机号正则
*/
static
Pattern
mobilePattern
=
Pattern
.
compile
(
"((13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))"
+
"\\d{8}"
);
private
String
mobileRegExp
;
/**
* 电话号码正则
*/
static
Pattern
phonePattern
=
Pattern
.
compile
(
"(\\d{3,4}-)?\\d{6,8}"
);
private
String
phoneRegExp
;
/**
* 邮箱正则
*/
static
Pattern
emailPattern
=
Pattern
.
compile
(
"\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*"
);
private
String
emailRegExp
;
/**
* 身份证号码(15位)正则
*/
static
Pattern
idPattern15
=
Pattern
.
compile
(
"[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}"
);
private
String
idRegExp15
;
/**
* 身份证号码(18位)正则
*/
static
Pattern
idPattern18
=
Pattern
.
compile
(
"[1-9]\\d{5}[1-9]\\d{3}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}"
+
"([0-9Xx])"
);
private
String
idRegExp18
;
/**
* 银行卡号码正则
*/
private
String
bankCardRegExp
;
/**
* 家庭住址正则
*/
static
Pattern
addressPattern
=
Pattern
.
compile
(
"([\\u4E00-\\u9FA5A-Za-z0-9_]+(省|市|区|县|道|路|街|号|弄|条|室)){2,}"
);
private
String
addressRegExp
;
/**
* ip地址正则
* // static Pattern ipPattern = Pattern.compile("^((\\d|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5]|[*])\\.){3}
* // (\\d|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5]|[*])$");
*/
static
Pattern
ipPattern
=
Pattern
.
compile
(
"((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}"
+
"(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)"
);
private
String
ipRegExp
;
/**
* mac地址正则
*/
static
Pattern
macPattern
=
Pattern
.
compile
(
"([A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}"
);
static
List
<
Pattern
>
patterns
=
new
ArrayList
<
Pattern
>()
{{
add
(
macPattern
);
add
(
emailPattern
);
add
(
ipPattern
);
add
(
namePattern
);
add
(
idPattern18
);
add
(
idPattern15
);
add
(
mobilePattern
);
add
(
phonePattern
);
add
(
addressPattern
);
private
String
macRegExp
;
List
<
Pattern
>
patterns
=
new
ArrayList
<
Pattern
>()
{{
}};
public
static
Map
mask
(
Map
map
)
{
public
MaskUtil
(
String
nameRegExp
,
String
mobileRegExp
,
String
phoneRegExp
,
String
emailRegExp
,
String
idRegExp15
,
String
idRegExp18
,
String
bankCardRegExp
,
String
addressRegExp
,
String
ipRegExp
,
String
macRegExp
)
{
this
.
nameRegExp
=
nameRegExp
;
this
.
mobileRegExp
=
mobileRegExp
;
this
.
phoneRegExp
=
phoneRegExp
;
this
.
emailRegExp
=
emailRegExp
;
this
.
idRegExp15
=
idRegExp15
;
this
.
idRegExp18
=
idRegExp18
;
this
.
bankCardRegExp
=
bankCardRegExp
;
this
.
addressRegExp
=
addressRegExp
;
this
.
ipRegExp
=
ipRegExp
;
this
.
macRegExp
=
macRegExp
;
}
public
Map
mask
(
Map
map
,
ArrayList
whiteList
)
{
patterns
.
add
(
Pattern
.
compile
(
this
.
nameRegExp
));
patterns
.
add
(
Pattern
.
compile
(
this
.
macRegExp
));
patterns
.
add
(
Pattern
.
compile
(
this
.
emailRegExp
));
patterns
.
add
(
Pattern
.
compile
(
this
.
ipRegExp
));
patterns
.
add
(
Pattern
.
compile
(
this
.
nameRegExp
));
patterns
.
add
(
Pattern
.
compile
(
this
.
idRegExp18
));
patterns
.
add
(
Pattern
.
compile
(
this
.
idRegExp15
));
patterns
.
add
(
Pattern
.
compile
(
this
.
bankCardRegExp
));
patterns
.
add
(
Pattern
.
compile
(
this
.
mobileRegExp
));
patterns
.
add
(
Pattern
.
compile
(
this
.
phoneRegExp
));
patterns
.
add
(
Pattern
.
compile
(
this
.
addressRegExp
));
map
.
forEach
((
k
,
v
)
->
{
if
(!
whiteList
.
contains
(
k
))
{
String
value
=
v
.
toString
();
for
(
Pattern
pattern
:
patterns
)
{
Matcher
matcher
=
pattern
.
matcher
(
value
);
...
...
@@ -86,25 +110,40 @@ public class MaskUtil {
}
}
map
.
put
(
k
,
value
);
}
else
{
map
.
put
(
k
,
v
);
}
});
return
map
;
}
public
static
void
main
(
String
[]
args
)
{
MaskUtil
maskUtil
=
new
MaskUtil
();
MaskUtil
maskUtil
=
new
MaskUtil
(
"[\\u4e00-\\u9fa5]{1,20}|[a-zA-Z\\\\.\\\\s]{1,20}"
,
"((13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}"
,
"(\\d{3,4}-)?\\d{6,8}"
,
"\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*"
,
"[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}"
,
"[1-9]\\d{5}[1-9]\\d{3}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}([0-9Xx])"
,
"([1-9]{1})(\\d{11}|\\d{15}|\\d{16}|\\d{17}|\\d{18})"
,
"([\u4E00-\u9FA5A-Za-z0-9_]+(省|市|区|县|道|路|街|号|弄|条|室)){2,}"
,
"((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)"
,
"([A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}"
);
Map
map
=
new
HashMap
(
DEFAULT_MAP_CAPACITY
);
map
.
put
(
"姓名"
,
"王海鹰"
);
map
.
put
(
"身份证号"
,
"372925199008075158"
);
map
.
put
(
"手机号"
,
"15000101879"
);
map
.
put
(
"电话"
,
"021-61341606"
);
map
.
put
(
"邮箱"
,
"wanghaiying@zork.com"
);
map
.
put
(
"住址"
,
"上海市浦东新区碧波路690号1弄"
);
map
.
put
(
"住址2"
,
"上海市浦东新区张江微电子港304-2室"
);
map
.
put
(
"ip地址"
,
"192.168.70.2"
);
map
.
put
(
"mac地址"
,
"3c-78-43-25-80-bd"
);
map
.
put
(
"message"
,
"王海鹰,372925199008075158#15000101879"
);
//
map.put("身份证号", "372925199008075158");
//
map.put("手机号", "15000101879");
//
map.put("电话", "021-61341606");
//
map.put("邮箱", "wanghaiying@zork.com");
//
map.put("住址", "上海市浦东新区碧波路690号1弄");
//
map.put("住址2", "上海市浦东新区张江微电子港304-2室");
//
map.put("ip地址", "192.168.70.2");
//
map.put("mac地址", "3c-78-43-25-80-bd");
//
map.put("message", "王海鹰,372925199008075158#15000101879");
map
.
put
(
"messid"
,
"0000011404342B32233DDCDA"
);
System
.
out
.
println
(
maskUtil
.
mask
(
map
));
map
.
put
(
"bsflag"
,
"0000011404342B32233DDCDA"
);
map
.
put
(
"normalFields"
,
"13811110000-110101199003075517-上海市浦东新区张江微电子港-zorkdata@163.com-123456789-wanghaiying123-王海鹰-192.168.1.1-00-50-56-C0-00-08-6227002470170278192"
);
String
[]
fieldsWhiteListArray
=
"messid,fundid,custid,orgid,brhid,secuid,bankcode,market,ordersno,ordergroup,count,poststr,stkcode,bsflag,orderamt,price,qty,bankcode,tacode,ofcode,transacc,taacc"
.
split
(
","
);
ArrayList
<
String
>
fieldsWhiteList
=
new
ArrayList
<
String
>(
fieldsWhiteListArray
.
length
);
Collections
.
addAll
(
fieldsWhiteList
,
fieldsWhiteListArray
);
System
.
out
.
println
(
maskUtil
.
mask
(
map
,
fieldsWhiteList
));
}
}
src/main/java/com/zorkdata/datamask/util/ParamUtils.java
View file @
aa763a65
package
com.zorkdata.datamask.util
;
import
com.zorkdata.datamask.constant.ParamConstants
;
import
com.zorkdata.datamask.domain.HadoopParam
;
import
com.zorkdata.datamask.domain.KafkaParam
;
import
com.zorkdata.datamask.constant.RegExpConstants
;
import
com.zorkdata.datamask.domain.HDFSLogQueryParam
;
import
com.zorkdata.datamask.domain.KafkaMsgQueryParam
;
import
java.util.HashMap
;
import
java.util.Map
;
/**
...
...
@@ -19,7 +21,7 @@ public class ParamUtils {
*
* @param conf
*/
public
static
H
adoop
Param
initHadoopConf
(
Map
conf
)
{
public
static
H
DFSLogQuery
Param
initHadoopConf
(
Map
conf
)
{
String
source
=
String
.
valueOf
(
conf
.
get
(
ParamConstants
.
SOURCE
)).
trim
();
String
hdfsSrc
=
String
.
valueOf
(
conf
.
get
(
ParamConstants
.
HDFS_SRC
)).
trim
();
String
hdfsDest
=
String
.
valueOf
(
conf
.
get
(
ParamConstants
.
HDFS_DEST
)).
trim
();
...
...
@@ -27,10 +29,10 @@ public class ParamUtils {
String
date
=
String
.
valueOf
(
conf
.
get
(
ParamConstants
.
DATE
)).
trim
();
Long
startTime
=
Long
.
parseLong
(
String
.
valueOf
(
conf
.
get
(
ParamConstants
.
START_TIME
)).
trim
());
Long
endTime
=
Long
.
parseLong
(
String
.
valueOf
(
conf
.
get
(
ParamConstants
.
END_TIME
)).
trim
());
return
new
H
adoop
Param
(
source
,
hdfsSrc
,
hdfsDest
,
core
,
date
,
startTime
,
endTime
);
return
new
H
DFSLogQuery
Param
(
source
,
hdfsSrc
,
hdfsDest
,
core
,
date
,
startTime
,
endTime
);
}
public
static
KafkaParam
initKafkaConf
(
Map
conf
)
{
public
static
Kafka
MsgQuery
Param
initKafkaConf
(
Map
conf
)
{
String
servers
=
String
.
valueOf
(
conf
.
get
(
ParamConstants
.
SERVERS
)).
trim
();
String
zookeeper
=
String
.
valueOf
(
conf
.
get
(
ParamConstants
.
ZOOKEEPER
)).
trim
();
String
topic
=
String
.
valueOf
(
conf
.
get
(
ParamConstants
.
TOPIC
)).
trim
();
...
...
@@ -39,6 +41,22 @@ public class ParamUtils {
String
date
=
String
.
valueOf
(
conf
.
get
(
ParamConstants
.
DATE
)).
trim
();
Long
startTime
=
Long
.
parseLong
(
String
.
valueOf
(
conf
.
get
(
ParamConstants
.
START_TIME
)).
trim
());
Long
endTime
=
Long
.
parseLong
(
String
.
valueOf
(
conf
.
get
(
ParamConstants
.
END_TIME
)).
trim
());
return
new
KafkaParam
(
servers
,
zookeeper
,
topic
,
hdfsDest
,
core
,
date
,
startTime
,
endTime
);
return
new
KafkaMsgQueryParam
(
servers
,
zookeeper
,
topic
,
hdfsDest
,
core
,
date
,
startTime
,
endTime
);
}
public
static
MaskUtil
initMaskUtil
(
Map
conf
)
{
Map
regularExpressions
=
(
HashMap
)
conf
.
get
(
RegExpConstants
.
REG_EXP
);
String
nameRegExp
=
String
.
valueOf
(
regularExpressions
.
get
(
RegExpConstants
.
NAME_REG_EXP
)).
trim
();
String
mobileRegExp
=
String
.
valueOf
(
regularExpressions
.
get
(
RegExpConstants
.
MOBILE_REG_EXP
)).
trim
();
String
phoneRegExp
=
String
.
valueOf
(
regularExpressions
.
get
(
RegExpConstants
.
PHONE_REG_EXP
)).
trim
();
String
emailRegExp
=
String
.
valueOf
(
regularExpressions
.
get
(
RegExpConstants
.
EMAIL_REG_EXP
)).
trim
();
String
idRegExp15
=
String
.
valueOf
(
regularExpressions
.
get
(
RegExpConstants
.
ID15_REG_EXP
)).
trim
();
String
idRegExp18
=
String
.
valueOf
(
regularExpressions
.
get
(
RegExpConstants
.
ID18_REG_EXP
)).
trim
();
String
bankCardRegExp
=
String
.
valueOf
(
regularExpressions
.
get
(
RegExpConstants
.
BANK_CARD_REG_EXP
)).
trim
();
String
addressRegExp
=
String
.
valueOf
(
regularExpressions
.
get
(
RegExpConstants
.
ADDRESS_REG_EXP
)).
trim
();
String
ipRegExp
=
String
.
valueOf
(
regularExpressions
.
get
(
RegExpConstants
.
IP_REG_EXP
)).
trim
();
String
macRegExp
=
String
.
valueOf
(
regularExpressions
.
get
(
RegExpConstants
.
MAC_REG_EXP
)).
trim
();
return
new
MaskUtil
(
nameRegExp
,
mobileRegExp
,
phoneRegExp
,
emailRegExp
,
idRegExp15
,
idRegExp18
,
bankCardRegExp
,
addressRegExp
,
ipRegExp
,
macRegExp
);
}
}
src/main/resources/application.yml
View file @
aa763a65
...
...
@@ -5,7 +5,7 @@ source: "hdfs"
hdfs_src
:
"
hdfs://cdh-2:8020/tmp/datawarehouse4/jzjy/kcbp_biz_log"
# hdfs日志写入地址,非必传,默认写到hdfs-src目录下的output目录下
hdfs_dest
:
"
hdfs://cdh-2:8020/tmp/datawarehouse/jzjy/kcbp_biz_log/output
2
/"
hdfs_dest
:
"
hdfs://cdh-2:8020/tmp/datawarehouse/jzjy/kcbp_biz_log/output
4
/"
# 脱敏结果下载到的本地路径
download_path
:
"
/tmp"
...
...
@@ -23,25 +23,28 @@ start_time: 1601348849900
end_time
:
1601348850000
# 不做脱敏的字段白名单
fieldsWhiteList
:
"
fundid,custid,orgid,brhid,secuid,bankcode,market,ordersno,ordergroup,count,poststr,stkcode,bsflag,
\
fieldsWhiteList
:
"
messid,
fundid,custid,orgid,brhid,secuid,bankcode,market,ordersno,ordergroup,count,poststr,stkcode,bsflag,
\
orderamt,price,qty,bankcode,tacode,ofcode,transacc,taacc"
# 脱敏用的正则表达式
# 脱敏用的正则表达式
reg_exp
:
# 姓名正则
nameRegExp
:
"
[
\u4e00
-
\u9fa5
]{1,20}|[a-zA-Z
\\
.
\\
s]{1,20}"
name
:
"
[
\u4e00
-
\u9fa5
]{1,20}|[a-zA-Z
\\
.
\\
s]{1,20}"
# 手机号正则
mobileRegExp
:
"
(13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))
\\
d{8}"
mobile
:
"
(
(13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))
\\
d{8}"
# 电话号码正则
phoneRegExp
:
"
(
\\
d{3,4}-)?
\\
d{6,8}"
phone
:
"
(
\\
d{3,4}-)?
\\
d{6,8}"
# 邮箱正则
emailRegExp
:
"
\\
w+([-+.]
\\
w+)*@
\\
w+([-.]
\\
w+)*
\\
.
\\
w+([-.]
\\
w+)*"
email
:
"
\\
w+([-+.]
\\
w+)*@
\\
w+([-.]
\\
w+)*
\\
.
\\
w+([-.]
\\
w+)*"
# 身份证号码(15位)正则
idRegExp15
:
"
[1-9]
\\
d{7}((0
\\
d)|(1[0-2]))(([0|1|2]
\\
d)|3[0-1])
\\
d{3}"
#身份证号码(18位)正则
idRegExp18
:
"
[1-9]
\\
d{5}[1-9]
\\
d{3}((0
\\
d)|(1[0-2]))(([0|1|2]
\\
d)|3[0-1])
\\
d{3}([0-9Xx])"
id15
:
"
[1-9]
\\
d{7}((0
\\
d)|(1[0-2]))(([0|1|2]
\\
d)|3[0-1])
\\
d{3}"
# 身份证号码(18位)正则
id18
:
"
[1-9]
\\
d{5}[1-9]
\\
d{3}((0
\\
d)|(1[0-2]))(([0|1|2]
\\
d)|3[0-1])
\\
d{3}([0-9Xx])"
# 银行卡号
bank_card
:
"
([1-9]{1})(
\
d{11}|
\
d{15}|
\
d{16}|
\
d{17}|
\
d{18})"
# 家庭住址正则
addressRegExp
:
"
([
\u4E00
-
\u9FA5
A-Za-z0-9_]+(省|市|区|县|道|路|街|号|弄|条|室)){2,}"
address
:
"
([
\u4E00
-
\u9FA5
A-Za-z0-9_]+(省|市|区|县|道|路|街|号|弄|条|室)){2,}"
# ip地址正则
ipRegEx
p
:
"
((2[0-4]
\\
d|25[0-5]|[01]?
\\
d
\\
d?)
\\
.){3}(2[0-4]
\\
d|25[0-5]|[01]?
\\
d
\\
d?)"
i
p
:
"
((2[0-4]
\\
d|25[0-5]|[01]?
\\
d
\\
d?)
\\
.){3}(2[0-4]
\\
d|25[0-5]|[01]?
\\
d
\\
d?)"
# mac地址正则
macRegExp
:
"
([A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}"
\ No newline at end of file
mac
:
"
([A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}"
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment