Commit aa763a65 authored by 王海鹰's avatar 王海鹰

白名单逻辑优化

正则表达式完善
parent cee08142
Pipeline #14431 failed with stages
in 30 seconds
...@@ -23,7 +23,7 @@ public class TransactionLogMask { ...@@ -23,7 +23,7 @@ public class TransactionLogMask {
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
if (args.length != PARAM_LENGTH) { if (args.length != PARAM_LENGTH) {
String error = "参数缺失,请输入配置文件,例如: --conf --conf /opt/TransactionLogMask/application.yml"; String error = "参数缺失,请输入配置文件,例如: --conf /opt/TransactionLogMask/application.yml";
logger.error(error); logger.error(error);
throw new RuntimeException(error); throw new RuntimeException(error);
} }
......
...@@ -24,9 +24,5 @@ public interface ParamConstants { ...@@ -24,9 +24,5 @@ public interface ParamConstants {
String HDFS ="hdfs"; String HDFS ="hdfs";
String KAFKA ="kafka"; String KAFKA ="kafka";
String NAME_REG_EXP = "name_reg_exp"; String FIELDS_WHITE_LIST = "fieldsWhiteList";
String MOBILE_REG_EXP = "mobile_reg_exp";
String PHONE_REG_EXP = "phone_reg_exp";
String EMAIL_REG_EXP = "email_reg_exp";
} }
package com.zorkdata.datamask.constant;
/**
* Description :
*
* @author : wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* Date : Create in 2020/10/20 15:32
*/
public interface RegExpConstants {
  • 🚫 Move constants to a class or enum. 📘

Please register or sign in to reply
String REG_EXP = "reg_exp";
String NAME_REG_EXP = "name";
String MOBILE_REG_EXP = "mobile";
String PHONE_REG_EXP = "phone";
String EMAIL_REG_EXP = "email";
String ID15_REG_EXP = "id15";
String ID18_REG_EXP = "id18";
String BANK_CARD_REG_EXP = "bank_card";
String ADDRESS_REG_EXP = "address";
String IP_REG_EXP = "ip";
String MAC_REG_EXP = "mac";
}
...@@ -2,6 +2,8 @@ package com.zorkdata.datamask.domain; ...@@ -2,6 +2,8 @@ package com.zorkdata.datamask.domain;
import lombok.Data; import lombok.Data;
import java.io.Serializable;
/** /**
* @author 谢森 * @author 谢森
* @Description 参数实体类 * @Description 参数实体类
...@@ -9,7 +11,10 @@ import lombok.Data; ...@@ -9,7 +11,10 @@ import lombok.Data;
* @Date 2020/10/21 14:33 * @Date 2020/10/21 14:33
*/ */
@Data @Data
public class HadoopParam { public class HDFSLogQueryParam implements Serializable {
  • 【HDFSLogQueryParam】不符合UpperCamelCase命名风格 📘

Please register or sign in to reply
private static final long serialVersionUID = 1L;
private String source; private String source;
private String hdfsSrc; private String hdfsSrc;
private String hdfsDest; private String hdfsDest;
...@@ -18,8 +23,8 @@ public class HadoopParam { ...@@ -18,8 +23,8 @@ public class HadoopParam {
private Long startTime; private Long startTime;
private Long endTime; private Long endTime;
public HadoopParam(String source, String hdfsSrc, String hdfsDest, String core, String date, Long startTime, public HDFSLogQueryParam(String source, String hdfsSrc, String hdfsDest, String core, String date, Long startTime,
Long endTime) { Long endTime) {
this.source = source; this.source = source;
this.hdfsSrc = hdfsSrc; this.hdfsSrc = hdfsSrc;
this.hdfsDest = hdfsDest; this.hdfsDest = hdfsDest;
...@@ -28,5 +33,4 @@ public class HadoopParam { ...@@ -28,5 +33,4 @@ public class HadoopParam {
this.startTime = startTime; this.startTime = startTime;
this.endTime = endTime; this.endTime = endTime;
} }
} }
...@@ -2,6 +2,8 @@ package com.zorkdata.datamask.domain; ...@@ -2,6 +2,8 @@ package com.zorkdata.datamask.domain;
import lombok.Data; import lombok.Data;
import java.io.Serializable;
/** /**
* @author 谢森 * @author 谢森
* @Description kafka 参数实体类 * @Description kafka 参数实体类
...@@ -9,7 +11,10 @@ import lombok.Data; ...@@ -9,7 +11,10 @@ import lombok.Data;
* @Date 2020/10/21 15:07 * @Date 2020/10/21 15:07
*/ */
@Data @Data
public class KafkaParam { public class KafkaMsgQueryParam implements Serializable {
private static final long serialVersionUID = 1L;
private String servers; private String servers;
private String zookeeper; private String zookeeper;
private String topic; private String topic;
...@@ -19,8 +24,8 @@ public class KafkaParam { ...@@ -19,8 +24,8 @@ public class KafkaParam {
private Long startTime; private Long startTime;
private Long endTime; private Long endTime;
public KafkaParam(String servers, String zookeeper, String topic, String hdfsDest, String core, String date, public KafkaMsgQueryParam(String servers, String zookeeper, String topic, String hdfsDest, String core, String date,
  • Constructor has 8 parameters, which is greater than 7 authorized. 📘

Please register or sign in to reply
Long startTime, Long endTime) { Long startTime, Long endTime) {
this.servers = servers; this.servers = servers;
this.zookeeper = zookeeper; this.zookeeper = zookeeper;
this.topic = topic; this.topic = topic;
......
...@@ -2,9 +2,10 @@ package com.zorkdata.datamask.hadoop; ...@@ -2,9 +2,10 @@ package com.zorkdata.datamask.hadoop;
import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.TypeReference; import com.alibaba.fastjson.TypeReference;
import com.zorkdata.datamask.constant.ParamConstants;
import com.zorkdata.datamask.constant.StrConstants; import com.zorkdata.datamask.constant.StrConstants;
import com.zorkdata.datamask.domain.LogData; import com.zorkdata.datamask.domain.LogData;
import com.zorkdata.datamask.domain.HadoopParam; import com.zorkdata.datamask.domain.HDFSLogQueryParam;
import com.zorkdata.datamask.domain.TransactionLog; import com.zorkdata.datamask.domain.TransactionLog;
import com.zorkdata.datamask.util.DateUtils; import com.zorkdata.datamask.util.DateUtils;
import com.zorkdata.datamask.util.MaskUtil; import com.zorkdata.datamask.util.MaskUtil;
...@@ -38,14 +39,15 @@ import java.io.IOException; ...@@ -38,14 +39,15 @@ import java.io.IOException;
import java.net.URI; import java.net.URI;
import java.net.URISyntaxException; import java.net.URISyntaxException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
/** /**
* @author 谢森 * Description: hdfs日志文件脱敏
* @Description hadoop 文件数据脱敏 *
* @Email xiesen310@163.com * @author: wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* @Date 2020/10/21 14:29 * Date: Create in 2020/9/23 9:30
*/ */
public class HadoopMask { public class HadoopMask {
private static final Logger logger = LoggerFactory.getLogger(HadoopMask.class); private static final Logger logger = LoggerFactory.getLogger(HadoopMask.class);
...@@ -61,12 +63,18 @@ public class HadoopMask { ...@@ -61,12 +63,18 @@ public class HadoopMask {
env.setParallelism(1); env.setParallelism(1);
JobConf jobConf = new JobConf(); JobConf jobConf = new JobConf();
jobConf.set("avro.output.schema", TransactionLog.SCHEMA$.toString(true)); jobConf.set("avro.output.schema", TransactionLog.SCHEMA$.toString(true));
HadoopParam hadoopParam = ParamUtils.initHadoopConf(conf); HDFSLogQueryParam hdfsLogQueryParam = ParamUtils.initHadoopConf(conf);
ParameterTool parameterTool = ParameterTool.fromMap(conf); ParameterTool parameterTool = ParameterTool.fromMap(conf);
env.getConfig().setGlobalJobParameters(parameterTool); env.getConfig().setGlobalJobParameters(parameterTool);
List<String> logFiles = filterHdfsLogFiles(hadoopParam.getHdfsSrc(), hadoopParam.getDate(), MaskUtil maskUtil = ParamUtils.initMaskUtil(conf);
hadoopParam.getStartTime(), hadoopParam.getEndTime());
String[] fieldsWhiteListArray = String.valueOf(conf.get(ParamConstants.FIELDS_WHITE_LIST)).trim().split(",");
ArrayList< String> fieldsWhiteList = new ArrayList<String>(fieldsWhiteListArray.length);
  • 🔽 Replace the type specification in this constructor call with the diamond operator ("<>"). (sonar.java.source not set. Assuming 7 or greater.) 📘

Please register or sign in to reply
Collections.addAll(fieldsWhiteList, fieldsWhiteListArray);
List<String> logFiles = filterHdfsLogFiles(hdfsLogQueryParam.getHdfsSrc(), hdfsLogQueryParam.getDate(),
hdfsLogQueryParam.getStartTime(), hdfsLogQueryParam.getEndTime());
for (String logFile : logFiles) { for (String logFile : logFiles) {
/** /**
...@@ -88,14 +96,14 @@ public class HadoopMask { ...@@ -88,14 +96,14 @@ public class HadoopMask {
new TypeReference<LogData>() { new TypeReference<LogData>() {
}); });
//根据日志事件的核心信息做过滤 //根据日志事件的核心信息做过滤
if (null != hadoopParam.getCore() && logData.getDimensions().get("hostname").indexOf("c9") > -1) { if (null != hdfsLogQueryParam.getCore() && logData.getDimensions().get("hostname").indexOf("c9") > -1 ) {
//根据日志事件的timestamp做过滤 //根据日志事件的timestamp做过滤
Long timestamp = DateUtils.utc2timestamp(logData.getTimestamp()); Long timestamp = DateUtils.utc2timestamp(logData.getTimestamp());
boolean flag = null != timestamp && timestamp > hadoopParam.getStartTime() boolean flag = null != timestamp && timestamp > hdfsLogQueryParam.getStartTime()
&& timestamp < hadoopParam.getEndTime() || Boolean.TRUE; && timestamp < hdfsLogQueryParam.getEndTime() || Boolean.TRUE;
if (flag) { if (flag) {
Map maskResult = MaskUtil.mask(logData.getNormalFields()); Map maskResult = maskUtil.mask(logData.getNormalFields(), fieldsWhiteList);
logData.setNormalFields(maskResult); logData.setNormalFields(maskResult);
collector.collect(logData); collector.collect(logData);
} }
...@@ -105,7 +113,7 @@ public class HadoopMask { ...@@ -105,7 +113,7 @@ public class HadoopMask {
// 获取目标hdfs的输出目录 // 获取目标hdfs的输出目录
String logFileName = String logFileName =
logFile.split(StrConstants.FILE_SEPARATOR)[logFile.split(StrConstants.FILE_SEPARATOR).length - 1]; logFile.split(StrConstants.FILE_SEPARATOR)[logFile.split(StrConstants.FILE_SEPARATOR).length - 1];
String filePath = hadoopParam.getHdfsSrc() + logFileName.replace(StrConstants.AVRO_SUFFIX, String filePath = hdfsLogQueryParam.getHdfsDest() + logFileName.replace(StrConstants.AVRO_SUFFIX,
StrConstants.EMPTY_STR); StrConstants.EMPTY_STR);
HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat<>(new AvroOutputFormat(), jobConf); HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat<>(new AvroOutputFormat(), jobConf);
FileOutputFormat.setOutputPath(jobConf, new Path(filePath)); FileOutputFormat.setOutputPath(jobConf, new Path(filePath));
......
package com.zorkdata.datamask.kafka; package com.zorkdata.datamask.kafka;
import com.zorkdata.datamask.domain.HadoopParam; import com.zorkdata.datamask.domain.KafkaMsgQueryParam;
import com.zorkdata.datamask.domain.KafkaParam;
import com.zorkdata.datamask.util.ParamUtils; import com.zorkdata.datamask.util.ParamUtils;
import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.utils.ParameterTool; import org.apache.flink.api.java.utils.ParameterTool;
...@@ -13,7 +12,6 @@ import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; ...@@ -13,7 +12,6 @@ import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.time.ZoneId; import java.time.ZoneId;
import java.util.Date;
import java.util.Map; import java.util.Map;
import java.util.Properties; import java.util.Properties;
...@@ -35,14 +33,13 @@ public class KafkaMask { ...@@ -35,14 +33,13 @@ public class KafkaMask {
env.setParallelism(1); env.setParallelism(1);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
KafkaParam kafkaParam = ParamUtils.initKafkaConf(conf); KafkaMsgQueryParam kafkaMsgQueryParam = ParamUtils.initKafkaConf(conf);
ParameterTool parameterTool = ParameterTool.fromMap(conf); ParameterTool parameterTool = ParameterTool.fromMap(conf);
env.getConfig().setGlobalJobParameters(parameterTool); env.getConfig().setGlobalJobParameters(parameterTool);
Properties props = new Properties(); Properties props = new Properties();
props.put("bootstrap.servers", kafkaParam.getServers()); props.put("bootstrap.servers", kafkaMsgQueryParam.getServers());
props.put("zookeeper.connect", kafkaParam.getZookeeper()); props.put("zookeeper.connect", kafkaMsgQueryParam.getZookeeper());
props.put("group.id", "group1"); props.put("group.id", "group1");
props.put("enable.auto.commit", false); props.put("enable.auto.commit", false);
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
...@@ -50,11 +47,11 @@ public class KafkaMask { ...@@ -50,11 +47,11 @@ public class KafkaMask {
props.put("auto.offset.reset", "earliest"); props.put("auto.offset.reset", "earliest");
props.put("max.poll.records", 1000); props.put("max.poll.records", 1000);
SingleOutputStreamOperator<String> dataStreamSource = SingleOutputStreamOperator<String> dataStreamSource =
env.addSource(new FlinkKafkaConsumer<>(kafkaParam.getTopic(), env.addSource(new FlinkKafkaConsumer<>(kafkaMsgQueryParam.getTopic(),
new SimpleStringSchema(), props)).setParallelism(1); new SimpleStringSchema(), props)).setParallelism(1);
// TODO 根据date、startTime、endTime过滤 // TODO 根据date、startTime、endTime过滤
BucketingSink<String> hdfsSink = new BucketingSink<>(kafkaParam.getHdfsDest()); BucketingSink<String> hdfsSink = new BucketingSink<>(kafkaMsgQueryParam.getHdfsDest());
//创建一个按照时间创建目录的bucketer,默认是yyyy-MM-dd--HH,时区默认是美国时间。这里我都改了,一天创建一次目录,上海时间 //创建一个按照时间创建目录的bucketer,默认是yyyy-MM-dd--HH,时区默认是美国时间。这里我都改了,一天创建一次目录,上海时间
hdfsSink.setBucketer(new DateTimeBucketer<String>("yyyy-MM-dd", ZoneId.of("Asia/Shanghai"))); hdfsSink.setBucketer(new DateTimeBucketer<String>("yyyy-MM-dd", ZoneId.of("Asia/Shanghai")));
//设置每个文件的最大大小 ,默认是384M(1024 * 1024 * 384) //设置每个文件的最大大小 ,默认是384M(1024 * 1024 * 384)
......
package com.zorkdata.datamask.util;
/**
* Description :
*
* @author : wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* Date : Create in 2020/10/19 16:43
*/
public class MaskRegexConfig {
private String fieldsWhiteList;
private String nameRegExp;
private String mobileRegExp;
private String phoneRegExp;
private String emailRegExp;
private String idRegExp15;
private String idRegExp18;
private String addressRegExp;
private String ipRegExp;
private String macRegExp;
}
package com.zorkdata.datamask.util; package com.zorkdata.datamask.util;
import java.util.ArrayList; import java.io.Serializable;
import java.util.HashMap; import java.util.*;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
/** /**
* Description : * Description:
* *
* @author : wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>) * @author: wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* Date : Create in 2020/9/23 9:30 * Date: Create in 2020/9/23 9:30
* RegularExpression
*/ */
public class MaskUtil { public class MaskUtil implements Serializable {
private static final long serialVersionUID = 1L;
public static final int DEFAULT_MAP_CAPACITY = 16; public static final int DEFAULT_MAP_CAPACITY = 16;
private MaskRegexConfig maskRegexConfig;
/** /**
* 姓名正则 * 姓名正则
*/ */
static Pattern namePattern = Pattern.compile("([\\u4e00-\\u9fa5]{1,20}|[a-zA-Z\\.\\s]{1,20})"); private String nameRegExp;
/** /**
* 手机号正则 * 手机号正则
*/ */
static Pattern mobilePattern = Pattern.compile("((13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))" + private String mobileRegExp;
"\\d{8}");
/** /**
* 电话号码正则 * 电话号码正则
*/ */
static Pattern phonePattern = Pattern.compile("(\\d{3,4}-)?\\d{6,8}"); private String phoneRegExp;
/** /**
* 邮箱正则 * 邮箱正则
*/ */
static Pattern emailPattern = Pattern.compile("\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*"); private String emailRegExp;
/** /**
* 身份证号码(15位)正则 * 身份证号码(15位)正则
*/ */
static Pattern idPattern15 = Pattern.compile("[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}"); private String idRegExp15;
/** /**
* 身份证号码(18位)正则 * 身份证号码(18位)正则
*/ */
static Pattern idPattern18 = Pattern.compile("[1-9]\\d{5}[1-9]\\d{3}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}" + private String idRegExp18;
"([0-9Xx])");
/**
* 银行卡号码正则
*/
private String bankCardRegExp;
/** /**
* 家庭住址正则 * 家庭住址正则
*/ */
static Pattern addressPattern = Pattern.compile("([\\u4E00-\\u9FA5A-Za-z0-9_]+(省|市|区|县|道|路|街|号|弄|条|室)){2,}"); private String addressRegExp;
/** /**
* ip地址正则 * ip地址正则
* // static Pattern ipPattern = Pattern.compile("^((\\d|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5]|[*])\\.){3}
* // (\\d|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5]|[*])$");
*/ */
static Pattern ipPattern = Pattern.compile("((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}" + private String ipRegExp;
"(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)");
/** /**
* mac地址正则 * mac地址正则
*/ */
static Pattern macPattern = Pattern.compile("([A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}"); private String macRegExp;
static List<Pattern> patterns = new ArrayList<Pattern>() {{ List<Pattern> patterns = new ArrayList<Pattern>() {{
  • 🚫 Make "patterns" private or transient. 📘 Move the contents of this initializer to a standard constructor or to field initializers. 📘 Either remove or fill this block of code. 📘 🔽 Use another way to initialize this instance. 📘

Please register or sign in to reply
add(macPattern);
add(emailPattern);
add(ipPattern);
add(namePattern);
add(idPattern18);
add(idPattern15);
add(mobilePattern);
add(phonePattern);
add(addressPattern);
}}; }};
public static Map mask(Map map) { public MaskUtil(String nameRegExp, String mobileRegExp, String phoneRegExp, String emailRegExp, String idRegExp15, String idRegExp18, String bankCardRegExp, String addressRegExp, String ipRegExp, String macRegExp) {
  • Constructor has 10 parameters, which is greater than 7 authorized. 📘

Please register or sign in to reply
this.nameRegExp = nameRegExp;
this.mobileRegExp = mobileRegExp;
this.phoneRegExp = phoneRegExp;
this.emailRegExp = emailRegExp;
this.idRegExp15 = idRegExp15;
this.idRegExp18 = idRegExp18;
this.bankCardRegExp = bankCardRegExp;
this.addressRegExp = addressRegExp;
this.ipRegExp = ipRegExp;
this.macRegExp = macRegExp;
}
public Map mask(Map map, ArrayList whiteList) {
  • 🔽 The type of the "whiteList" object should be an interface such as "List" rather than the implementation "ArrayList". 📘

Please register or sign in to reply
patterns.add(Pattern.compile(this.nameRegExp));
patterns.add(Pattern.compile(this.macRegExp));
patterns.add(Pattern.compile(this.emailRegExp));
patterns.add(Pattern.compile(this.ipRegExp));
patterns.add(Pattern.compile(this.nameRegExp));
patterns.add(Pattern.compile(this.idRegExp18));
patterns.add(Pattern.compile(this.idRegExp15));
patterns.add(Pattern.compile(this.bankCardRegExp));
patterns.add(Pattern.compile(this.mobileRegExp));
patterns.add(Pattern.compile(this.phoneRegExp));
patterns.add(Pattern.compile(this.addressRegExp));
map.forEach((k, v) -> { map.forEach((k, v) -> {
String value = v.toString(); if (!whiteList.contains(k)) {
for (Pattern pattern : patterns) { String value = v.toString();
Matcher matcher = pattern.matcher(value); for (Pattern pattern : patterns) {
if (matcher.find()) { Matcher matcher = pattern.matcher(value);
String replaceStr = ""; if (matcher.find()) {
for (int i = 0; i < matcher.group().length(); i++) { String replaceStr = "";
replaceStr = replaceStr.concat("*"); for (int i = 0; i < matcher.group().length(); i++) {
replaceStr = replaceStr.concat("*");
}
value = value.replace(matcher.group(), replaceStr);
} }
value = value.replace(matcher.group(), replaceStr);
} }
map.put(k, value);
} else {
map.put(k, v);
} }
map.put(k, value);
}); });
return map; return map;
} }
public static void main(String[] args) { public static void main(String[] args) {
MaskUtil maskUtil = new MaskUtil(); MaskUtil maskUtil = new MaskUtil("[\\u4e00-\\u9fa5]{1,20}|[a-zA-Z\\\\.\\\\s]{1,20}", "((13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}",
"(\\d{3,4}-)?\\d{6,8}", "\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*", "[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}",
"[1-9]\\d{5}[1-9]\\d{3}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}([0-9Xx])", "([1-9]{1})(\\d{11}|\\d{15}|\\d{16}|\\d{17}|\\d{18})",
"([\u4E00-\u9FA5A-Za-z0-9_]+(省|市|区|县|道|路|街|号|弄|条|室)){2,}", "((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)",
"([A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}");
Map map = new HashMap(DEFAULT_MAP_CAPACITY); Map map = new HashMap(DEFAULT_MAP_CAPACITY);
map.put("姓名", "王海鹰"); map.put("姓名", "王海鹰");
map.put("身份证号", "372925199008075158"); // map.put("身份证号", "372925199008075158");
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
map.put("手机号", "15000101879"); // map.put("手机号", "15000101879");
map.put("电话", "021-61341606"); // map.put("电话", "021-61341606");
map.put("邮箱", "wanghaiying@zork.com"); // map.put("邮箱", "wanghaiying@zork.com");
map.put("住址", "上海市浦东新区碧波路690号1弄"); // map.put("住址", "上海市浦东新区碧波路690号1弄");
map.put("住址2", "上海市浦东新区张江微电子港304-2室"); // map.put("住址2", "上海市浦东新区张江微电子港304-2室");
map.put("ip地址", "192.168.70.2"); // map.put("ip地址", "192.168.70.2");
map.put("mac地址", "3c-78-43-25-80-bd"); // map.put("mac地址", "3c-78-43-25-80-bd");
map.put("message", "王海鹰,372925199008075158#15000101879"); // map.put("message", "王海鹰,372925199008075158#15000101879");
  • 及时清理不再使用的代码段或配置信息。 📘

Please register or sign in to reply
map.put("messid", "0000011404342B32233DDCDA"); map.put("messid", "0000011404342B32233DDCDA");
System.out.println(maskUtil.mask(map)); map.put("bsflag", "0000011404342B32233DDCDA");
map.put("normalFields", "13811110000-110101199003075517-上海市浦东新区张江微电子港-zorkdata@163.com-123456789-wanghaiying123-王海鹰-192.168.1.1-00-50-56-C0-00-08-6227002470170278192");
String[] fieldsWhiteListArray = "messid,fundid,custid,orgid,brhid,secuid,bankcode,market,ordersno,ordergroup,count,poststr,stkcode,bsflag,orderamt,price,qty,bankcode,tacode,ofcode,transacc,taacc".split(",");
ArrayList< String> fieldsWhiteList = new ArrayList<String>(fieldsWhiteListArray.length);
  • 🔽 Replace the type specification in this constructor call with the diamond operator ("<>"). (sonar.java.source not set. Assuming 7 or greater.) 📘

Please register or sign in to reply
Collections.addAll(fieldsWhiteList, fieldsWhiteListArray);
System.out.println(maskUtil.mask(map, fieldsWhiteList));
  • Replace this use of System.out or System.err by a logger. 📘

Please register or sign in to reply
} }
} }
package com.zorkdata.datamask.util; package com.zorkdata.datamask.util;
import com.zorkdata.datamask.constant.ParamConstants; import com.zorkdata.datamask.constant.ParamConstants;
import com.zorkdata.datamask.domain.HadoopParam; import com.zorkdata.datamask.constant.RegExpConstants;
import com.zorkdata.datamask.domain.KafkaParam; import com.zorkdata.datamask.domain.HDFSLogQueryParam;
import com.zorkdata.datamask.domain.KafkaMsgQueryParam;
import java.util.HashMap;
import java.util.Map; import java.util.Map;
/** /**
...@@ -19,7 +21,7 @@ public class ParamUtils { ...@@ -19,7 +21,7 @@ public class ParamUtils {
* *
* @param conf * @param conf
*/ */
public static HadoopParam initHadoopConf(Map conf) { public static HDFSLogQueryParam initHadoopConf(Map conf) {
String source = String.valueOf(conf.get(ParamConstants.SOURCE)).trim(); String source = String.valueOf(conf.get(ParamConstants.SOURCE)).trim();
String hdfsSrc = String.valueOf(conf.get(ParamConstants.HDFS_SRC)).trim(); String hdfsSrc = String.valueOf(conf.get(ParamConstants.HDFS_SRC)).trim();
String hdfsDest = String.valueOf(conf.get(ParamConstants.HDFS_DEST)).trim(); String hdfsDest = String.valueOf(conf.get(ParamConstants.HDFS_DEST)).trim();
...@@ -27,10 +29,10 @@ public class ParamUtils { ...@@ -27,10 +29,10 @@ public class ParamUtils {
String date = String.valueOf(conf.get(ParamConstants.DATE)).trim(); String date = String.valueOf(conf.get(ParamConstants.DATE)).trim();
Long startTime = Long.parseLong(String.valueOf(conf.get(ParamConstants.START_TIME)).trim()); Long startTime = Long.parseLong(String.valueOf(conf.get(ParamConstants.START_TIME)).trim());
Long endTime = Long.parseLong(String.valueOf(conf.get(ParamConstants.END_TIME)).trim()); Long endTime = Long.parseLong(String.valueOf(conf.get(ParamConstants.END_TIME)).trim());
return new HadoopParam(source, hdfsSrc, hdfsDest, core, date, startTime, endTime); return new HDFSLogQueryParam(source, hdfsSrc, hdfsDest, core, date, startTime, endTime);
} }
public static KafkaParam initKafkaConf(Map conf) { public static KafkaMsgQueryParam initKafkaConf(Map conf) {
String servers = String.valueOf(conf.get(ParamConstants.SERVERS)).trim(); String servers = String.valueOf(conf.get(ParamConstants.SERVERS)).trim();
String zookeeper = String.valueOf(conf.get(ParamConstants.ZOOKEEPER)).trim(); String zookeeper = String.valueOf(conf.get(ParamConstants.ZOOKEEPER)).trim();
String topic = String.valueOf(conf.get(ParamConstants.TOPIC)).trim(); String topic = String.valueOf(conf.get(ParamConstants.TOPIC)).trim();
...@@ -39,6 +41,22 @@ public class ParamUtils { ...@@ -39,6 +41,22 @@ public class ParamUtils {
String date = String.valueOf(conf.get(ParamConstants.DATE)).trim(); String date = String.valueOf(conf.get(ParamConstants.DATE)).trim();
Long startTime = Long.parseLong(String.valueOf(conf.get(ParamConstants.START_TIME)).trim()); Long startTime = Long.parseLong(String.valueOf(conf.get(ParamConstants.START_TIME)).trim());
Long endTime = Long.parseLong(String.valueOf(conf.get(ParamConstants.END_TIME)).trim()); Long endTime = Long.parseLong(String.valueOf(conf.get(ParamConstants.END_TIME)).trim());
return new KafkaParam(servers, zookeeper, topic, hdfsDest, core, date, startTime, endTime); return new KafkaMsgQueryParam(servers, zookeeper, topic, hdfsDest, core, date, startTime, endTime);
}
public static MaskUtil initMaskUtil(Map conf) {
Map regularExpressions = (HashMap)conf.get(RegExpConstants.REG_EXP);
String nameRegExp = String.valueOf(regularExpressions.get(RegExpConstants.NAME_REG_EXP)).trim();
String mobileRegExp = String.valueOf(regularExpressions.get(RegExpConstants.MOBILE_REG_EXP)).trim();
String phoneRegExp = String.valueOf(regularExpressions.get(RegExpConstants.PHONE_REG_EXP)).trim();
String emailRegExp = String.valueOf(regularExpressions.get(RegExpConstants.EMAIL_REG_EXP)).trim();
String idRegExp15 = String.valueOf(regularExpressions.get(RegExpConstants.ID15_REG_EXP)).trim();
String idRegExp18 = String.valueOf(regularExpressions.get(RegExpConstants.ID18_REG_EXP)).trim();
String bankCardRegExp = String.valueOf(regularExpressions.get(RegExpConstants.BANK_CARD_REG_EXP)).trim();
String addressRegExp = String.valueOf(regularExpressions.get(RegExpConstants.ADDRESS_REG_EXP)).trim();
String ipRegExp = String.valueOf(regularExpressions.get(RegExpConstants.IP_REG_EXP)).trim();
String macRegExp = String.valueOf(regularExpressions.get(RegExpConstants.MAC_REG_EXP)).trim();
return new MaskUtil(nameRegExp, mobileRegExp, phoneRegExp, emailRegExp, idRegExp15, idRegExp18, bankCardRegExp, addressRegExp, ipRegExp, macRegExp);
} }
} }
...@@ -5,7 +5,7 @@ source: "hdfs" ...@@ -5,7 +5,7 @@ source: "hdfs"
hdfs_src: "hdfs://cdh-2:8020/tmp/datawarehouse4/jzjy/kcbp_biz_log" hdfs_src: "hdfs://cdh-2:8020/tmp/datawarehouse4/jzjy/kcbp_biz_log"
# hdfs日志写入地址,非必传,默认写到hdfs-src目录下的output目录下 # hdfs日志写入地址,非必传,默认写到hdfs-src目录下的output目录下
hdfs_dest: "hdfs://cdh-2:8020/tmp/datawarehouse/jzjy/kcbp_biz_log/output2/" hdfs_dest: "hdfs://cdh-2:8020/tmp/datawarehouse/jzjy/kcbp_biz_log/output4/"
# 脱敏结果下载到的本地路径 # 脱敏结果下载到的本地路径
download_path: "/tmp" download_path: "/tmp"
...@@ -23,25 +23,28 @@ start_time: 1601348849900 ...@@ -23,25 +23,28 @@ start_time: 1601348849900
end_time: 1601348850000 end_time: 1601348850000
# 不做脱敏的字段白名单 # 不做脱敏的字段白名单
fieldsWhiteList: "fundid,custid,orgid,brhid,secuid,bankcode,market,ordersno,ordergroup,count,poststr,stkcode,bsflag,\ fieldsWhiteList: "messid,fundid,custid,orgid,brhid,secuid,bankcode,market,ordersno,ordergroup,count,poststr,stkcode,bsflag,\
orderamt,price,qty,bankcode,tacode,ofcode,transacc,taacc" orderamt,price,qty,bankcode,tacode,ofcode,transacc,taacc"
# 脱敏用的正则表达式 # 脱敏用的正则表达式
reg_exp:
# 姓名正则 # 姓名正则
nameRegExp: "[\u4e00-\u9fa5]{1,20}|[a-zA-Z\\.\\s]{1,20}" name: "[\u4e00-\u9fa5]{1,20}|[a-zA-Z\\.\\s]{1,20}"
# 手机号正则 # 手机号正则
mobileRegExp: "(13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}" mobile: "((13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}"
# 电话号码正则 # 电话号码正则
phoneRegExp: "(\\d{3,4}-)?\\d{6,8}" phone: "(\\d{3,4}-)?\\d{6,8}"
# 邮箱正则 # 邮箱正则
emailRegExp: "\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*" email: "\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*"
# 身份证号码(15位)正则 # 身份证号码(15位)正则
idRegExp15: "[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}" id15: "[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}"
#身份证号码(18位)正则 # 身份证号码(18位)正则
idRegExp18: "[1-9]\\d{5}[1-9]\\d{3}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}([0-9Xx])" id18: "[1-9]\\d{5}[1-9]\\d{3}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}([0-9Xx])"
# 银行卡号
bank_card: "([1-9]{1})(\d{11}|\d{15}|\d{16}|\d{17}|\d{18})"
# 家庭住址正则 # 家庭住址正则
addressRegExp: "([\u4E00-\u9FA5A-Za-z0-9_]+(省|市|区|县|道|路|街|号|弄|条|室)){2,}" address: "([\u4E00-\u9FA5A-Za-z0-9_]+(省|市|区|县|道|路|街|号|弄|条|室)){2,}"
# ip地址正则 # ip地址正则
ipRegExp: "((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)" ip: "((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)"
# mac地址正则 # mac地址正则
macRegExp: "([A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}" mac: "([A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}"
\ No newline at end of file
  • SonarQube analysis reported 151 issues

    • 🚫 19 critical
    • 83 major
    • 🔽 48 minor
    • 1 info

    Watch the comments in this conversation to review them.

    Top 30 extra issues

    Note: The following issues were found on lines that were not modified in the commit. Because these issues can't be reported as line comments, they are summarized here:

    1. 🚫 Move constants to a class or enum. 📘
    2. 🚫 Move constants to a class or enum. 📘
    3. 🚫 Add a nested comment explaining why this method is empty, throw an UnsupportedOperationException or complete the implementation. 📘
    4. 🚫 Change this "try" to a try-with-resources. (sonar.java.source not set. Assuming 7 or greater.) 📘
    5. 🚫 Refactor this code to not throw exceptions in finally blocks. 📘
    6. 🚫 Refactor this code to not throw exceptions in finally blocks. 📘
    7. 🚫 Define a constant instead of duplicating this literal "序列化失败" 15 times. 📘
    8. 🚫 Define a constant instead of duplicating this literal " {\n" 7 times. 📘
    9. 🚫 [Define a constant instead of duplicating this literal " "type": \n" 7 times. 📘
    10. 🚫 Define a constant instead of duplicating this literal " "string",\n" 4 times. 📘
    11. 🚫 Define a constant instead of duplicating this literal " "null"\n" 4 times. 📘
    12. 🚫 [Define a constant instead of duplicating this literal " ]\n" 7 times.](https://git.zorkdata.com/wanghaiying/transactionlogmask/blob/aa763a65a73ff81969cf0be645465505d846bd66/src/main/java/com/zorkdata/datamask/util/avro/LogAvroMacroDef.java#L20) 📘
    13. 🚫 Define a constant instead of duplicating this literal " },\n" 6 times. 📘
    14. 🚫 Define a constant instead of duplicating this literal " "null",\n" 3 times. 📘
    15. 🚫 Define a constant instead of duplicating this literal " {\n" 3 times. 📘
    16. 🚫 Define a constant instead of duplicating this literal " "type": "map",\n" 3 times. 📘
    17. 🚫 Define a constant instead of duplicating this literal " }\n" 3 times. 📘
    18. Define and throw a dedicated exception instead of using a generic one. 📘
    19. Remove this unused "source" private field. 📘
    20. Remove this unused "hdfsSrc" private field. 📘
    21. Remove this unused "hdfsDest" private field. 📘
    22. Remove this unused "core" private field. 📘
    23. Remove this unused "date" private field. 📘
    24. Remove this unused "startTime" private field. 📘
    25. Remove this unused "endTime" private field. 📘
    26. Remove this unused "servers" private field. 📘
    27. Remove this unused "zookeeper" private field. 📘
    28. Remove this unused "topic" private field. 📘
    29. Remove this unused "hdfsDest" private field. 📘
    30. Remove this unused "core" private field. 📘
    • ... 107 more
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment