Commit 243313f1 authored by 王海鹰's avatar 王海鹰

脱敏保留源数据格式;

代码规范
parent eff193d3
......@@ -37,5 +37,4 @@ public class TransactionLogMask {
KafkaMsgMaskUtil.maskKafkaMsg(conf);
}
}
}
package com.zorkdata.datamask.constant;
import java.util.Date;
/**
* Description :
* Description: 查询参数常量
*
* @author : wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* Date : Create in 2020/10/20 15:32
* @author: wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* Date: Create in 2020/10/20 15:32
*/
public interface ParamConstants {
......
package com.zorkdata.datamask.constant;
/**
* Description :
* Description : 正则表达式常量
*
* @author : wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* Date : Create in 2020/10/20 15:32
......
......@@ -10,6 +10,4 @@ public interface StrConstants {
String FILE_SEPARATOR = "/";
String AVRO_SUFFIX = ".avro";
String EMPTY_STR = "";
}
package com.zorkdata.datamask.domain;
import lombok.Data;
import org.apache.avro.mapred.AvroWrapper;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
......@@ -12,7 +11,7 @@ import java.util.Map;
/**
* @author wanghaiying
* @Description LogData
* @Description 日志实体类
* @Email wanghaiying@zork.com.cn
* @Date 2020/9/25 10:00
*/
......@@ -62,11 +61,4 @@ public class LogData implements Serializable, WritableComparable {
@Override
public void readFields(DataInput dataInput) throws IOException {
}
// @Override
// public String toString() {
// return new DateTime(timestamp).toDate().getTime() + " ZorkLogData{" + "logTypeName='" + logTypeName + '\'' + ", timestamp='" + timestamp + '\'' + ", source='"
// + source + '\'' + ", offset='" + offset + '\'' + ", dimensions=" + dimensions + ", measures=" + measures
// + ", normalFields=" + normalFields + '}';
// }
}
......@@ -102,7 +102,7 @@ public class HdfsLogMaskUtil {
//根据日志事件的timestamp做过滤
Long timestamp = DateUtils.utc2timestamp(logData.getTimestamp());
boolean flag = null != timestamp && timestamp > hdfsLogQueryParam.getStartTime()
&& timestamp < hdfsLogQueryParam.getEndTime() || Boolean.TRUE;
&& timestamp < hdfsLogQueryParam.getEndTime();
if (flag) {
Map maskResult = maskUtil.mask(logData.getNormalFields(), fieldsWhiteList);
......@@ -132,7 +132,6 @@ public class HdfsLogMaskUtil {
return tupple;
}
}).output(hadoopOutputFormat);
try {
env.execute("国泰交易日志脱敏job");
} catch (Exception e) {
......
......@@ -17,6 +17,17 @@ public class MaskUtil implements Serializable {
public static final int DEFAULT_MAP_CAPACITY = 16;
/**
* 数据格式信息
*/
//todo 抽取到配置文件
private List<String> dataFormats = new ArrayList<String>(){{
add(",");
add(".");
add("@");
add("-");
}};
/**
* 姓名正则
*/
......@@ -88,7 +99,6 @@ public class MaskUtil implements Serializable {
patterns.add(Pattern.compile(this.macRegExp));
patterns.add(Pattern.compile(this.emailRegExp));
patterns.add(Pattern.compile(this.ipRegExp));
patterns.add(Pattern.compile(this.nameRegExp));
patterns.add(Pattern.compile(this.idRegExp18));
patterns.add(Pattern.compile(this.idRegExp15));
patterns.add(Pattern.compile(this.bankCardRegExp));
......@@ -104,9 +114,15 @@ public class MaskUtil implements Serializable {
if (matcher.find()) {
String replaceStr = "";
for (int i = 0; i < matcher.group().length(); i++) {
replaceStr = replaceStr.concat("*");
String s = String.valueOf(matcher.group().charAt(i));
if(dataFormats.contains(s)){
replaceStr = replaceStr.concat(s);
}else{
replaceStr = replaceStr.concat("*");
}
}
value = value.replace(matcher.group(), replaceStr);
System.out.println("\n");
}
}
map.put(k, value);
......@@ -118,15 +134,20 @@ public class MaskUtil implements Serializable {
}
public static void main(String[] args) {
MaskUtil maskUtil = new MaskUtil("[\\u4e00-\\u9fa5]{1,20}|[a-zA-Z\\\\.\\\\s]{1,20}", "((13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}",
"(\\d{3,4}-)?\\d{6,8}", "\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*", "[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}",
"[1-9]\\d{5}[1-9]\\d{3}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}([0-9Xx])", "([1-9]{1})(\\d{11}|\\d{15}|\\d{16}|\\d{17}|\\d{18})",
"([\u4E00-\u9FA5A-Za-z0-9_]+(省|市|区|县|道|路|街|号|弄|条|室)){2,}", "((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)",
MaskUtil maskUtil = new MaskUtil("[\\u4e00-\\u9fa5]{1,20}|[a-zA-Z\\\\.\\\\s]{1,20}",
"((13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}",
"0\\d{2,3}-\\d{7,8}",
"[a-zA-Z0-9]+@[a-zA-Z0-9]+(\\.[a-zA-Z0-9]+)+",
"[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}",
"[1-9]\\d{5}[1-9]\\d{3}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}([0-9Xx])",
"([1-9]{1})(\\d{11}|\\d{15}|\\d{16}|\\d{17}|\\d{18})",
"([\u4E00-\u9FA5A-Za-z0-9_]+(省|市|区|县|道|路|街|号|弄|条|室)){2,}",
"((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)",
"([A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}");
Map map = new HashMap(DEFAULT_MAP_CAPACITY);
map.put("姓名", "王海鹰");
map.put("身份证号", "372925199008075158");
map.put("身份证号", "372925199101195158");
map.put("手机号", "15000101879");
map.put("电话", "021-61341606");
map.put("邮箱", "wanghaiying@zork.com");
......@@ -137,6 +158,7 @@ public class MaskUtil implements Serializable {
map.put("message", "王海鹰,372925199008075158#15000101879");
map.put("messid", "0000011404342B32233DDCDA");
map.put("bsflag", "0000011404342B32233DDCDA");
map.put("test", "wanghaiying123");
map.put("normalFields", "13811110000-110101199003075517-上海市浦东新区张江微电子港-zorkdata@163.com-123456789-wanghaiying123-王海鹰-192.168.1.1-00-50-56-C0-00-08-6227002470170278192");
String[] fieldsWhiteListArray = "messid,fundid,custid,orgid,brhid,secuid,bankcode,market,ordersno,ordergroup,count,poststr,stkcode,bsflag,orderamt,price,qty,bankcode,tacode,ofcode,transacc,taacc".split(",");
......
......@@ -33,9 +33,9 @@ reg_exp:
# 手机号正则
mobile: "((13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}"
# 电话号码正则
phone: "(\\d{3,4}-)?\\d{6,8}"
phone: "0\\d{2,3}-\\d{7,8}"
# 邮箱正则
email: "\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*"
email: "[a-zA-Z0-9]+@[a-zA-Z0-9]+(\\.[a-zA-Z0-9]+)+"
# 身份证号码(15位)正则
id15: "[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}"
# 身份证号码(18位)正则
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment