Commit 243313f1 authored by 王海鹰's avatar 王海鹰

脱敏保留源数据格式;

代码规范
parent eff193d3
...@@ -37,5 +37,4 @@ public class TransactionLogMask { ...@@ -37,5 +37,4 @@ public class TransactionLogMask {
KafkaMsgMaskUtil.maskKafkaMsg(conf); KafkaMsgMaskUtil.maskKafkaMsg(conf);
} }
} }
} }
package com.zorkdata.datamask.constant; package com.zorkdata.datamask.constant;
import java.util.Date;
/** /**
* Description : * Description: 查询参数常量
* *
* @author : wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>) * @author: wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* Date : Create in 2020/10/20 15:32 * Date: Create in 2020/10/20 15:32
*/ */
public interface ParamConstants { public interface ParamConstants {
......
package com.zorkdata.datamask.constant; package com.zorkdata.datamask.constant;
/** /**
* Description : * Description : 正则表达式常量
* *
* @author : wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>) * @author : wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* Date : Create in 2020/10/20 15:32 * Date : Create in 2020/10/20 15:32
......
...@@ -10,6 +10,4 @@ public interface StrConstants { ...@@ -10,6 +10,4 @@ public interface StrConstants {
String FILE_SEPARATOR = "/"; String FILE_SEPARATOR = "/";
String AVRO_SUFFIX = ".avro"; String AVRO_SUFFIX = ".avro";
String EMPTY_STR = ""; String EMPTY_STR = "";
} }
package com.zorkdata.datamask.domain; package com.zorkdata.datamask.domain;
import lombok.Data; import lombok.Data;
import org.apache.avro.mapred.AvroWrapper;
import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput; import java.io.DataInput;
...@@ -12,7 +11,7 @@ import java.util.Map; ...@@ -12,7 +11,7 @@ import java.util.Map;
/** /**
* @author wanghaiying * @author wanghaiying
* @Description LogData * @Description 日志实体类
* @Email wanghaiying@zork.com.cn * @Email wanghaiying@zork.com.cn
* @Date 2020/9/25 10:00 * @Date 2020/9/25 10:00
*/ */
...@@ -62,11 +61,4 @@ public class LogData implements Serializable, WritableComparable { ...@@ -62,11 +61,4 @@ public class LogData implements Serializable, WritableComparable {
@Override @Override
public void readFields(DataInput dataInput) throws IOException { public void readFields(DataInput dataInput) throws IOException {
} }
// @Override
// public String toString() {
// return new DateTime(timestamp).toDate().getTime() + " ZorkLogData{" + "logTypeName='" + logTypeName + '\'' + ", timestamp='" + timestamp + '\'' + ", source='"
// + source + '\'' + ", offset='" + offset + '\'' + ", dimensions=" + dimensions + ", measures=" + measures
// + ", normalFields=" + normalFields + '}';
// }
} }
...@@ -102,7 +102,7 @@ public class HdfsLogMaskUtil { ...@@ -102,7 +102,7 @@ public class HdfsLogMaskUtil {
//根据日志事件的timestamp做过滤 //根据日志事件的timestamp做过滤
Long timestamp = DateUtils.utc2timestamp(logData.getTimestamp()); Long timestamp = DateUtils.utc2timestamp(logData.getTimestamp());
boolean flag = null != timestamp && timestamp > hdfsLogQueryParam.getStartTime() boolean flag = null != timestamp && timestamp > hdfsLogQueryParam.getStartTime()
&& timestamp < hdfsLogQueryParam.getEndTime() || Boolean.TRUE; && timestamp < hdfsLogQueryParam.getEndTime();
if (flag) { if (flag) {
Map maskResult = maskUtil.mask(logData.getNormalFields(), fieldsWhiteList); Map maskResult = maskUtil.mask(logData.getNormalFields(), fieldsWhiteList);
...@@ -132,7 +132,6 @@ public class HdfsLogMaskUtil { ...@@ -132,7 +132,6 @@ public class HdfsLogMaskUtil {
return tupple; return tupple;
} }
}).output(hadoopOutputFormat); }).output(hadoopOutputFormat);
try { try {
env.execute("国泰交易日志脱敏job"); env.execute("国泰交易日志脱敏job");
} catch (Exception e) { } catch (Exception e) {
......
...@@ -17,6 +17,17 @@ public class MaskUtil implements Serializable { ...@@ -17,6 +17,17 @@ public class MaskUtil implements Serializable {
public static final int DEFAULT_MAP_CAPACITY = 16; public static final int DEFAULT_MAP_CAPACITY = 16;
/**
* 数据格式信息
*/
//todo 抽取到配置文件
private List<String> dataFormats = new ArrayList<String>(){{
add(",");
add(".");
add("@");
add("-");
}};
/** /**
* 姓名正则 * 姓名正则
*/ */
...@@ -88,7 +99,6 @@ public class MaskUtil implements Serializable { ...@@ -88,7 +99,6 @@ public class MaskUtil implements Serializable {
patterns.add(Pattern.compile(this.macRegExp)); patterns.add(Pattern.compile(this.macRegExp));
patterns.add(Pattern.compile(this.emailRegExp)); patterns.add(Pattern.compile(this.emailRegExp));
patterns.add(Pattern.compile(this.ipRegExp)); patterns.add(Pattern.compile(this.ipRegExp));
patterns.add(Pattern.compile(this.nameRegExp));
patterns.add(Pattern.compile(this.idRegExp18)); patterns.add(Pattern.compile(this.idRegExp18));
patterns.add(Pattern.compile(this.idRegExp15)); patterns.add(Pattern.compile(this.idRegExp15));
patterns.add(Pattern.compile(this.bankCardRegExp)); patterns.add(Pattern.compile(this.bankCardRegExp));
...@@ -104,9 +114,15 @@ public class MaskUtil implements Serializable { ...@@ -104,9 +114,15 @@ public class MaskUtil implements Serializable {
if (matcher.find()) { if (matcher.find()) {
String replaceStr = ""; String replaceStr = "";
for (int i = 0; i < matcher.group().length(); i++) { for (int i = 0; i < matcher.group().length(); i++) {
replaceStr = replaceStr.concat("*"); String s = String.valueOf(matcher.group().charAt(i));
if(dataFormats.contains(s)){
replaceStr = replaceStr.concat(s);
}else{
replaceStr = replaceStr.concat("*");
}
} }
value = value.replace(matcher.group(), replaceStr); value = value.replace(matcher.group(), replaceStr);
System.out.println("\n");
} }
} }
map.put(k, value); map.put(k, value);
...@@ -118,15 +134,20 @@ public class MaskUtil implements Serializable { ...@@ -118,15 +134,20 @@ public class MaskUtil implements Serializable {
} }
public static void main(String[] args) { public static void main(String[] args) {
MaskUtil maskUtil = new MaskUtil("[\\u4e00-\\u9fa5]{1,20}|[a-zA-Z\\\\.\\\\s]{1,20}", "((13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}", MaskUtil maskUtil = new MaskUtil("[\\u4e00-\\u9fa5]{1,20}|[a-zA-Z\\\\.\\\\s]{1,20}",
"(\\d{3,4}-)?\\d{6,8}", "\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*", "[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}", "((13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}",
"[1-9]\\d{5}[1-9]\\d{3}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}([0-9Xx])", "([1-9]{1})(\\d{11}|\\d{15}|\\d{16}|\\d{17}|\\d{18})", "0\\d{2,3}-\\d{7,8}",
"([\u4E00-\u9FA5A-Za-z0-9_]+(省|市|区|县|道|路|街|号|弄|条|室)){2,}", "((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)", "[a-zA-Z0-9]+@[a-zA-Z0-9]+(\\.[a-zA-Z0-9]+)+",
"[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}",
"[1-9]\\d{5}[1-9]\\d{3}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}([0-9Xx])",
"([1-9]{1})(\\d{11}|\\d{15}|\\d{16}|\\d{17}|\\d{18})",
"([\u4E00-\u9FA5A-Za-z0-9_]+(省|市|区|县|道|路|街|号|弄|条|室)){2,}",
"((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)",
"([A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}"); "([A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}");
Map map = new HashMap(DEFAULT_MAP_CAPACITY); Map map = new HashMap(DEFAULT_MAP_CAPACITY);
map.put("姓名", "王海鹰"); map.put("姓名", "王海鹰");
map.put("身份证号", "372925199008075158"); map.put("身份证号", "372925199101195158");
map.put("手机号", "15000101879"); map.put("手机号", "15000101879");
map.put("电话", "021-61341606"); map.put("电话", "021-61341606");
map.put("邮箱", "wanghaiying@zork.com"); map.put("邮箱", "wanghaiying@zork.com");
...@@ -137,6 +158,7 @@ public class MaskUtil implements Serializable { ...@@ -137,6 +158,7 @@ public class MaskUtil implements Serializable {
map.put("message", "王海鹰,372925199008075158#15000101879"); map.put("message", "王海鹰,372925199008075158#15000101879");
map.put("messid", "0000011404342B32233DDCDA"); map.put("messid", "0000011404342B32233DDCDA");
map.put("bsflag", "0000011404342B32233DDCDA"); map.put("bsflag", "0000011404342B32233DDCDA");
map.put("test", "wanghaiying123");
map.put("normalFields", "13811110000-110101199003075517-上海市浦东新区张江微电子港-zorkdata@163.com-123456789-wanghaiying123-王海鹰-192.168.1.1-00-50-56-C0-00-08-6227002470170278192"); map.put("normalFields", "13811110000-110101199003075517-上海市浦东新区张江微电子港-zorkdata@163.com-123456789-wanghaiying123-王海鹰-192.168.1.1-00-50-56-C0-00-08-6227002470170278192");
String[] fieldsWhiteListArray = "messid,fundid,custid,orgid,brhid,secuid,bankcode,market,ordersno,ordergroup,count,poststr,stkcode,bsflag,orderamt,price,qty,bankcode,tacode,ofcode,transacc,taacc".split(","); String[] fieldsWhiteListArray = "messid,fundid,custid,orgid,brhid,secuid,bankcode,market,ordersno,ordergroup,count,poststr,stkcode,bsflag,orderamt,price,qty,bankcode,tacode,ofcode,transacc,taacc".split(",");
......
...@@ -33,9 +33,9 @@ reg_exp: ...@@ -33,9 +33,9 @@ reg_exp:
# 手机号正则 # 手机号正则
mobile: "((13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}" mobile: "((13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}"
# 电话号码正则 # 电话号码正则
phone: "(\\d{3,4}-)?\\d{6,8}" phone: "0\\d{2,3}-\\d{7,8}"
# 邮箱正则 # 邮箱正则
email: "\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*" email: "[a-zA-Z0-9]+@[a-zA-Z0-9]+(\\.[a-zA-Z0-9]+)+"
# 身份证号码(15位)正则 # 身份证号码(15位)正则
id15: "[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}" id15: "[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}"
# 身份证号码(18位)正则 # 身份证号码(18位)正则
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment