Commit aa763a65 authored by 王海鹰's avatar 王海鹰

白名单逻辑优化

正则表达式完善
parent cee08142
......@@ -23,7 +23,7 @@ public class TransactionLogMask {
public static void main(String[] args) throws Exception {
if (args.length != PARAM_LENGTH) {
String error = "参数缺失,请输入配置文件,例如: --conf --conf /opt/TransactionLogMask/application.yml";
String error = "参数缺失,请输入配置文件,例如: --conf /opt/TransactionLogMask/application.yml";
logger.error(error);
throw new RuntimeException(error);
}
......
......@@ -24,9 +24,5 @@ public interface ParamConstants {
String HDFS ="hdfs";
String KAFKA ="kafka";
String NAME_REG_EXP = "name_reg_exp";
String MOBILE_REG_EXP = "mobile_reg_exp";
String PHONE_REG_EXP = "phone_reg_exp";
String EMAIL_REG_EXP = "email_reg_exp";
String FIELDS_WHITE_LIST = "fieldsWhiteList";
}
package com.zorkdata.datamask.constant;
/**
* Description :
*
* @author : wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* Date : Create in 2020/10/20 15:32
*/
public interface RegExpConstants {
String REG_EXP = "reg_exp";
String NAME_REG_EXP = "name";
String MOBILE_REG_EXP = "mobile";
String PHONE_REG_EXP = "phone";
String EMAIL_REG_EXP = "email";
String ID15_REG_EXP = "id15";
String ID18_REG_EXP = "id18";
String BANK_CARD_REG_EXP = "bank_card";
String ADDRESS_REG_EXP = "address";
String IP_REG_EXP = "ip";
String MAC_REG_EXP = "mac";
}
......@@ -2,6 +2,8 @@ package com.zorkdata.datamask.domain;
import lombok.Data;
import java.io.Serializable;
/**
* @author 谢森
* @Description 参数实体类
......@@ -9,7 +11,10 @@ import lombok.Data;
* @Date 2020/10/21 14:33
*/
@Data
public class HadoopParam {
public class HDFSLogQueryParam implements Serializable {
private static final long serialVersionUID = 1L;
private String source;
private String hdfsSrc;
private String hdfsDest;
......@@ -18,8 +23,8 @@ public class HadoopParam {
private Long startTime;
private Long endTime;
public HadoopParam(String source, String hdfsSrc, String hdfsDest, String core, String date, Long startTime,
Long endTime) {
public HDFSLogQueryParam(String source, String hdfsSrc, String hdfsDest, String core, String date, Long startTime,
Long endTime) {
this.source = source;
this.hdfsSrc = hdfsSrc;
this.hdfsDest = hdfsDest;
......@@ -28,5 +33,4 @@ public class HadoopParam {
this.startTime = startTime;
this.endTime = endTime;
}
}
......@@ -2,6 +2,8 @@ package com.zorkdata.datamask.domain;
import lombok.Data;
import java.io.Serializable;
/**
* @author 谢森
* @Description kafka 参数实体类
......@@ -9,7 +11,10 @@ import lombok.Data;
* @Date 2020/10/21 15:07
*/
@Data
public class KafkaParam {
public class KafkaMsgQueryParam implements Serializable {
private static final long serialVersionUID = 1L;
private String servers;
private String zookeeper;
private String topic;
......@@ -19,8 +24,8 @@ public class KafkaParam {
private Long startTime;
private Long endTime;
public KafkaParam(String servers, String zookeeper, String topic, String hdfsDest, String core, String date,
Long startTime, Long endTime) {
public KafkaMsgQueryParam(String servers, String zookeeper, String topic, String hdfsDest, String core, String date,
Long startTime, Long endTime) {
this.servers = servers;
this.zookeeper = zookeeper;
this.topic = topic;
......
......@@ -2,9 +2,10 @@ package com.zorkdata.datamask.hadoop;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.TypeReference;
import com.zorkdata.datamask.constant.ParamConstants;
import com.zorkdata.datamask.constant.StrConstants;
import com.zorkdata.datamask.domain.LogData;
import com.zorkdata.datamask.domain.HadoopParam;
import com.zorkdata.datamask.domain.HDFSLogQueryParam;
import com.zorkdata.datamask.domain.TransactionLog;
import com.zorkdata.datamask.util.DateUtils;
import com.zorkdata.datamask.util.MaskUtil;
......@@ -38,14 +39,15 @@ import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
/**
* @author 谢森
* @Description hadoop 文件数据脱敏
* @Email xiesen310@163.com
* @Date 2020/10/21 14:29
* Description: hdfs日志文件脱敏
*
* @author: wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* Date: Create in 2020/9/23 9:30
*/
public class HadoopMask {
private static final Logger logger = LoggerFactory.getLogger(HadoopMask.class);
......@@ -61,12 +63,18 @@ public class HadoopMask {
env.setParallelism(1);
JobConf jobConf = new JobConf();
jobConf.set("avro.output.schema", TransactionLog.SCHEMA$.toString(true));
HadoopParam hadoopParam = ParamUtils.initHadoopConf(conf);
HDFSLogQueryParam hdfsLogQueryParam = ParamUtils.initHadoopConf(conf);
ParameterTool parameterTool = ParameterTool.fromMap(conf);
env.getConfig().setGlobalJobParameters(parameterTool);
List<String> logFiles = filterHdfsLogFiles(hadoopParam.getHdfsSrc(), hadoopParam.getDate(),
hadoopParam.getStartTime(), hadoopParam.getEndTime());
MaskUtil maskUtil = ParamUtils.initMaskUtil(conf);
String[] fieldsWhiteListArray = String.valueOf(conf.get(ParamConstants.FIELDS_WHITE_LIST)).trim().split(",");
ArrayList< String> fieldsWhiteList = new ArrayList<String>(fieldsWhiteListArray.length);
Collections.addAll(fieldsWhiteList, fieldsWhiteListArray);
List<String> logFiles = filterHdfsLogFiles(hdfsLogQueryParam.getHdfsSrc(), hdfsLogQueryParam.getDate(),
hdfsLogQueryParam.getStartTime(), hdfsLogQueryParam.getEndTime());
for (String logFile : logFiles) {
/**
......@@ -88,14 +96,14 @@ public class HadoopMask {
new TypeReference<LogData>() {
});
//根据日志事件的核心信息做过滤
if (null != hadoopParam.getCore() && logData.getDimensions().get("hostname").indexOf("c9") > -1) {
if (null != hdfsLogQueryParam.getCore() && logData.getDimensions().get("hostname").indexOf("c9") > -1 ) {
//根据日志事件的timestamp做过滤
Long timestamp = DateUtils.utc2timestamp(logData.getTimestamp());
boolean flag = null != timestamp && timestamp > hadoopParam.getStartTime()
&& timestamp < hadoopParam.getEndTime() || Boolean.TRUE;
boolean flag = null != timestamp && timestamp > hdfsLogQueryParam.getStartTime()
&& timestamp < hdfsLogQueryParam.getEndTime() || Boolean.TRUE;
if (flag) {
Map maskResult = MaskUtil.mask(logData.getNormalFields());
Map maskResult = maskUtil.mask(logData.getNormalFields(), fieldsWhiteList);
logData.setNormalFields(maskResult);
collector.collect(logData);
}
......@@ -105,7 +113,7 @@ public class HadoopMask {
// 获取目标hdfs的输出目录
String logFileName =
logFile.split(StrConstants.FILE_SEPARATOR)[logFile.split(StrConstants.FILE_SEPARATOR).length - 1];
String filePath = hadoopParam.getHdfsSrc() + logFileName.replace(StrConstants.AVRO_SUFFIX,
String filePath = hdfsLogQueryParam.getHdfsDest() + logFileName.replace(StrConstants.AVRO_SUFFIX,
StrConstants.EMPTY_STR);
HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat<>(new AvroOutputFormat(), jobConf);
FileOutputFormat.setOutputPath(jobConf, new Path(filePath));
......
package com.zorkdata.datamask.kafka;
import com.zorkdata.datamask.domain.HadoopParam;
import com.zorkdata.datamask.domain.KafkaParam;
import com.zorkdata.datamask.domain.KafkaMsgQueryParam;
import com.zorkdata.datamask.util.ParamUtils;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.utils.ParameterTool;
......@@ -13,7 +12,6 @@ import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import java.text.SimpleDateFormat;
import java.time.ZoneId;
import java.util.Date;
import java.util.Map;
import java.util.Properties;
......@@ -35,14 +33,13 @@ public class KafkaMask {
env.setParallelism(1);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
KafkaParam kafkaParam = ParamUtils.initKafkaConf(conf);
KafkaMsgQueryParam kafkaMsgQueryParam = ParamUtils.initKafkaConf(conf);
ParameterTool parameterTool = ParameterTool.fromMap(conf);
env.getConfig().setGlobalJobParameters(parameterTool);
Properties props = new Properties();
props.put("bootstrap.servers", kafkaParam.getServers());
props.put("zookeeper.connect", kafkaParam.getZookeeper());
props.put("bootstrap.servers", kafkaMsgQueryParam.getServers());
props.put("zookeeper.connect", kafkaMsgQueryParam.getZookeeper());
props.put("group.id", "group1");
props.put("enable.auto.commit", false);
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
......@@ -50,11 +47,11 @@ public class KafkaMask {
props.put("auto.offset.reset", "earliest");
props.put("max.poll.records", 1000);
SingleOutputStreamOperator<String> dataStreamSource =
env.addSource(new FlinkKafkaConsumer<>(kafkaParam.getTopic(),
env.addSource(new FlinkKafkaConsumer<>(kafkaMsgQueryParam.getTopic(),
new SimpleStringSchema(), props)).setParallelism(1);
// TODO 根据date、startTime、endTime过滤
BucketingSink<String> hdfsSink = new BucketingSink<>(kafkaParam.getHdfsDest());
BucketingSink<String> hdfsSink = new BucketingSink<>(kafkaMsgQueryParam.getHdfsDest());
//创建一个按照时间创建目录的bucketer,默认是yyyy-MM-dd--HH,时区默认是美国时间。这里我都改了,一天创建一次目录,上海时间
hdfsSink.setBucketer(new DateTimeBucketer<String>("yyyy-MM-dd", ZoneId.of("Asia/Shanghai")));
//设置每个文件的最大大小 ,默认是384M(1024 * 1024 * 384)
......
package com.zorkdata.datamask.util;
/**
* Description :
*
* @author : wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* Date : Create in 2020/10/19 16:43
*/
public class MaskRegexConfig {
private String fieldsWhiteList;
private String nameRegExp;
private String mobileRegExp;
private String phoneRegExp;
private String emailRegExp;
private String idRegExp15;
private String idRegExp18;
private String addressRegExp;
private String ipRegExp;
private String macRegExp;
}
package com.zorkdata.datamask.util;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.io.Serializable;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Description :
* Description:
*
* @author : wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* Date : Create in 2020/9/23 9:30
* RegularExpression
* @author: wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* Date: Create in 2020/9/23 9:30
*/
public class MaskUtil {
public class MaskUtil implements Serializable {
private static final long serialVersionUID = 1L;
public static final int DEFAULT_MAP_CAPACITY = 16;
private MaskRegexConfig maskRegexConfig;
/**
* 姓名正则
*/
static Pattern namePattern = Pattern.compile("([\\u4e00-\\u9fa5]{1,20}|[a-zA-Z\\.\\s]{1,20})");
private String nameRegExp;
/**
* 手机号正则
*/
static Pattern mobilePattern = Pattern.compile("((13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))" +
"\\d{8}");
private String mobileRegExp;
/**
* 电话号码正则
*/
static Pattern phonePattern = Pattern.compile("(\\d{3,4}-)?\\d{6,8}");
private String phoneRegExp;
/**
* 邮箱正则
*/
static Pattern emailPattern = Pattern.compile("\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*");
private String emailRegExp;
/**
* 身份证号码(15位)正则
*/
static Pattern idPattern15 = Pattern.compile("[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}");
private String idRegExp15;
/**
* 身份证号码(18位)正则
*/
static Pattern idPattern18 = Pattern.compile("[1-9]\\d{5}[1-9]\\d{3}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}" +
"([0-9Xx])");
private String idRegExp18;
/**
* 银行卡号码正则
*/
private String bankCardRegExp;
/**
* 家庭住址正则
*/
static Pattern addressPattern = Pattern.compile("([\\u4E00-\\u9FA5A-Za-z0-9_]+(省|市|区|县|道|路|街|号|弄|条|室)){2,}");
private String addressRegExp;
/**
* ip地址正则
* // static Pattern ipPattern = Pattern.compile("^((\\d|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5]|[*])\\.){3}
* // (\\d|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5]|[*])$");
*/
static Pattern ipPattern = Pattern.compile("((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}" +
"(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)");
private String ipRegExp;
/**
* mac地址正则
*/
static Pattern macPattern = Pattern.compile("([A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}");
static List<Pattern> patterns = new ArrayList<Pattern>() {{
add(macPattern);
add(emailPattern);
add(ipPattern);
add(namePattern);
add(idPattern18);
add(idPattern15);
add(mobilePattern);
add(phonePattern);
add(addressPattern);
private String macRegExp;
List<Pattern> patterns = new ArrayList<Pattern>() {{
}};
public static Map mask(Map map) {
public MaskUtil(String nameRegExp, String mobileRegExp, String phoneRegExp, String emailRegExp, String idRegExp15, String idRegExp18, String bankCardRegExp, String addressRegExp, String ipRegExp, String macRegExp) {
this.nameRegExp = nameRegExp;
this.mobileRegExp = mobileRegExp;
this.phoneRegExp = phoneRegExp;
this.emailRegExp = emailRegExp;
this.idRegExp15 = idRegExp15;
this.idRegExp18 = idRegExp18;
this.bankCardRegExp = bankCardRegExp;
this.addressRegExp = addressRegExp;
this.ipRegExp = ipRegExp;
this.macRegExp = macRegExp;
}
public Map mask(Map map, ArrayList whiteList) {
patterns.add(Pattern.compile(this.nameRegExp));
patterns.add(Pattern.compile(this.macRegExp));
patterns.add(Pattern.compile(this.emailRegExp));
patterns.add(Pattern.compile(this.ipRegExp));
patterns.add(Pattern.compile(this.nameRegExp));
patterns.add(Pattern.compile(this.idRegExp18));
patterns.add(Pattern.compile(this.idRegExp15));
patterns.add(Pattern.compile(this.bankCardRegExp));
patterns.add(Pattern.compile(this.mobileRegExp));
patterns.add(Pattern.compile(this.phoneRegExp));
patterns.add(Pattern.compile(this.addressRegExp));
map.forEach((k, v) -> {
String value = v.toString();
for (Pattern pattern : patterns) {
Matcher matcher = pattern.matcher(value);
if (matcher.find()) {
String replaceStr = "";
for (int i = 0; i < matcher.group().length(); i++) {
replaceStr = replaceStr.concat("*");
if (!whiteList.contains(k)) {
String value = v.toString();
for (Pattern pattern : patterns) {
Matcher matcher = pattern.matcher(value);
if (matcher.find()) {
String replaceStr = "";
for (int i = 0; i < matcher.group().length(); i++) {
replaceStr = replaceStr.concat("*");
}
value = value.replace(matcher.group(), replaceStr);
}
value = value.replace(matcher.group(), replaceStr);
}
map.put(k, value);
} else {
map.put(k, v);
}
map.put(k, value);
});
return map;
}
public static void main(String[] args) {
MaskUtil maskUtil = new MaskUtil();
MaskUtil maskUtil = new MaskUtil("[\\u4e00-\\u9fa5]{1,20}|[a-zA-Z\\\\.\\\\s]{1,20}", "((13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}",
"(\\d{3,4}-)?\\d{6,8}", "\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*", "[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}",
"[1-9]\\d{5}[1-9]\\d{3}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}([0-9Xx])", "([1-9]{1})(\\d{11}|\\d{15}|\\d{16}|\\d{17}|\\d{18})",
"([\u4E00-\u9FA5A-Za-z0-9_]+(省|市|区|县|道|路|街|号|弄|条|室)){2,}", "((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)",
"([A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}");
Map map = new HashMap(DEFAULT_MAP_CAPACITY);
map.put("姓名", "王海鹰");
map.put("身份证号", "372925199008075158");
map.put("手机号", "15000101879");
map.put("电话", "021-61341606");
map.put("邮箱", "wanghaiying@zork.com");
map.put("住址", "上海市浦东新区碧波路690号1弄");
map.put("住址2", "上海市浦东新区张江微电子港304-2室");
map.put("ip地址", "192.168.70.2");
map.put("mac地址", "3c-78-43-25-80-bd");
map.put("message", "王海鹰,372925199008075158#15000101879");
// map.put("身份证号", "372925199008075158");
// map.put("手机号", "15000101879");
// map.put("电话", "021-61341606");
// map.put("邮箱", "wanghaiying@zork.com");
// map.put("住址", "上海市浦东新区碧波路690号1弄");
// map.put("住址2", "上海市浦东新区张江微电子港304-2室");
// map.put("ip地址", "192.168.70.2");
// map.put("mac地址", "3c-78-43-25-80-bd");
// map.put("message", "王海鹰,372925199008075158#15000101879");
map.put("messid", "0000011404342B32233DDCDA");
System.out.println(maskUtil.mask(map));
map.put("bsflag", "0000011404342B32233DDCDA");
map.put("normalFields", "13811110000-110101199003075517-上海市浦东新区张江微电子港-zorkdata@163.com-123456789-wanghaiying123-王海鹰-192.168.1.1-00-50-56-C0-00-08-6227002470170278192");
String[] fieldsWhiteListArray = "messid,fundid,custid,orgid,brhid,secuid,bankcode,market,ordersno,ordergroup,count,poststr,stkcode,bsflag,orderamt,price,qty,bankcode,tacode,ofcode,transacc,taacc".split(",");
ArrayList< String> fieldsWhiteList = new ArrayList<String>(fieldsWhiteListArray.length);
Collections.addAll(fieldsWhiteList, fieldsWhiteListArray);
System.out.println(maskUtil.mask(map, fieldsWhiteList));
}
}
package com.zorkdata.datamask.util;
import com.zorkdata.datamask.constant.ParamConstants;
import com.zorkdata.datamask.domain.HadoopParam;
import com.zorkdata.datamask.domain.KafkaParam;
import com.zorkdata.datamask.constant.RegExpConstants;
import com.zorkdata.datamask.domain.HDFSLogQueryParam;
import com.zorkdata.datamask.domain.KafkaMsgQueryParam;
import java.util.HashMap;
import java.util.Map;
/**
......@@ -19,7 +21,7 @@ public class ParamUtils {
*
* @param conf
*/
public static HadoopParam initHadoopConf(Map conf) {
public static HDFSLogQueryParam initHadoopConf(Map conf) {
String source = String.valueOf(conf.get(ParamConstants.SOURCE)).trim();
String hdfsSrc = String.valueOf(conf.get(ParamConstants.HDFS_SRC)).trim();
String hdfsDest = String.valueOf(conf.get(ParamConstants.HDFS_DEST)).trim();
......@@ -27,10 +29,10 @@ public class ParamUtils {
String date = String.valueOf(conf.get(ParamConstants.DATE)).trim();
Long startTime = Long.parseLong(String.valueOf(conf.get(ParamConstants.START_TIME)).trim());
Long endTime = Long.parseLong(String.valueOf(conf.get(ParamConstants.END_TIME)).trim());
return new HadoopParam(source, hdfsSrc, hdfsDest, core, date, startTime, endTime);
return new HDFSLogQueryParam(source, hdfsSrc, hdfsDest, core, date, startTime, endTime);
}
public static KafkaParam initKafkaConf(Map conf) {
public static KafkaMsgQueryParam initKafkaConf(Map conf) {
String servers = String.valueOf(conf.get(ParamConstants.SERVERS)).trim();
String zookeeper = String.valueOf(conf.get(ParamConstants.ZOOKEEPER)).trim();
String topic = String.valueOf(conf.get(ParamConstants.TOPIC)).trim();
......@@ -39,6 +41,22 @@ public class ParamUtils {
String date = String.valueOf(conf.get(ParamConstants.DATE)).trim();
Long startTime = Long.parseLong(String.valueOf(conf.get(ParamConstants.START_TIME)).trim());
Long endTime = Long.parseLong(String.valueOf(conf.get(ParamConstants.END_TIME)).trim());
return new KafkaParam(servers, zookeeper, topic, hdfsDest, core, date, startTime, endTime);
return new KafkaMsgQueryParam(servers, zookeeper, topic, hdfsDest, core, date, startTime, endTime);
}
public static MaskUtil initMaskUtil(Map conf) {
Map regularExpressions = (HashMap)conf.get(RegExpConstants.REG_EXP);
String nameRegExp = String.valueOf(regularExpressions.get(RegExpConstants.NAME_REG_EXP)).trim();
String mobileRegExp = String.valueOf(regularExpressions.get(RegExpConstants.MOBILE_REG_EXP)).trim();
String phoneRegExp = String.valueOf(regularExpressions.get(RegExpConstants.PHONE_REG_EXP)).trim();
String emailRegExp = String.valueOf(regularExpressions.get(RegExpConstants.EMAIL_REG_EXP)).trim();
String idRegExp15 = String.valueOf(regularExpressions.get(RegExpConstants.ID15_REG_EXP)).trim();
String idRegExp18 = String.valueOf(regularExpressions.get(RegExpConstants.ID18_REG_EXP)).trim();
String bankCardRegExp = String.valueOf(regularExpressions.get(RegExpConstants.BANK_CARD_REG_EXP)).trim();
String addressRegExp = String.valueOf(regularExpressions.get(RegExpConstants.ADDRESS_REG_EXP)).trim();
String ipRegExp = String.valueOf(regularExpressions.get(RegExpConstants.IP_REG_EXP)).trim();
String macRegExp = String.valueOf(regularExpressions.get(RegExpConstants.MAC_REG_EXP)).trim();
return new MaskUtil(nameRegExp, mobileRegExp, phoneRegExp, emailRegExp, idRegExp15, idRegExp18, bankCardRegExp, addressRegExp, ipRegExp, macRegExp);
}
}
......@@ -5,7 +5,7 @@ source: "hdfs"
hdfs_src: "hdfs://cdh-2:8020/tmp/datawarehouse4/jzjy/kcbp_biz_log"
# hdfs日志写入地址,非必传,默认写到hdfs-src目录下的output目录下
hdfs_dest: "hdfs://cdh-2:8020/tmp/datawarehouse/jzjy/kcbp_biz_log/output2/"
hdfs_dest: "hdfs://cdh-2:8020/tmp/datawarehouse/jzjy/kcbp_biz_log/output4/"
# 脱敏结果下载到的本地路径
download_path: "/tmp"
......@@ -23,25 +23,28 @@ start_time: 1601348849900
end_time: 1601348850000
# 不做脱敏的字段白名单
fieldsWhiteList: "fundid,custid,orgid,brhid,secuid,bankcode,market,ordersno,ordergroup,count,poststr,stkcode,bsflag,\
fieldsWhiteList: "messid,fundid,custid,orgid,brhid,secuid,bankcode,market,ordersno,ordergroup,count,poststr,stkcode,bsflag,\
orderamt,price,qty,bankcode,tacode,ofcode,transacc,taacc"
# 脱敏用的正则表达式
# 脱敏用的正则表达式
reg_exp:
# 姓名正则
nameRegExp: "[\u4e00-\u9fa5]{1,20}|[a-zA-Z\\.\\s]{1,20}"
name: "[\u4e00-\u9fa5]{1,20}|[a-zA-Z\\.\\s]{1,20}"
# 手机号正则
mobileRegExp: "(13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}"
mobile: "((13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}"
# 电话号码正则
phoneRegExp: "(\\d{3,4}-)?\\d{6,8}"
phone: "(\\d{3,4}-)?\\d{6,8}"
# 邮箱正则
emailRegExp: "\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*"
email: "\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*"
# 身份证号码(15位)正则
idRegExp15: "[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}"
#身份证号码(18位)正则
idRegExp18: "[1-9]\\d{5}[1-9]\\d{3}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}([0-9Xx])"
id15: "[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}"
# 身份证号码(18位)正则
id18: "[1-9]\\d{5}[1-9]\\d{3}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}([0-9Xx])"
# 银行卡号
bank_card: "([1-9]{1})(\d{11}|\d{15}|\d{16}|\d{17}|\d{18})"
# 家庭住址正则
addressRegExp: "([\u4E00-\u9FA5A-Za-z0-9_]+(省|市|区|县|道|路|街|号|弄|条|室)){2,}"
address: "([\u4E00-\u9FA5A-Za-z0-9_]+(省|市|区|县|道|路|街|号|弄|条|室)){2,}"
# ip地址正则
ipRegExp: "((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)"
ip: "((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)"
# mac地址正则
macRegExp: "([A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}"
\ No newline at end of file
mac: "([A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment