Commit 2a44ef5a authored by xiesen's avatar xiesen

Merge remote-tracking branch 'main/master'

parents eff193d3 243313f1
Pipeline #58759 passed with stages
in 38 seconds
......@@ -37,5 +37,4 @@ public class TransactionLogMask {
KafkaMsgMaskUtil.maskKafkaMsg(conf);
}
}
}
package com.zorkdata.datamask.constant;
import java.util.Date;
/**
* Description :
* Description: 查询参数常量
*
* @author : wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* Date : Create in 2020/10/20 15:32
* @author: wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* Date: Create in 2020/10/20 15:32
*/
public interface ParamConstants {
......
package com.zorkdata.datamask.constant;
/**
* Description :
* Description : 正则表达式常量
*
* @author : wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* Date : Create in 2020/10/20 15:32
......
......@@ -10,6 +10,4 @@ public interface StrConstants {
String FILE_SEPARATOR = "/";
String AVRO_SUFFIX = ".avro";
String EMPTY_STR = "";
}
package com.zorkdata.datamask.domain;
import lombok.Data;
import org.apache.avro.mapred.AvroWrapper;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
......@@ -12,7 +11,7 @@ import java.util.Map;
/**
* @author wanghaiying
* @Description LogData
* @Description 日志实体类
* @Email wanghaiying@zork.com.cn
* @Date 2020/9/25 10:00
*/
......@@ -62,11 +61,4 @@ public class LogData implements Serializable, WritableComparable {
@Override
public void readFields(DataInput dataInput) throws IOException {
}
// @Override
// public String toString() {
// return new DateTime(timestamp).toDate().getTime() + " ZorkLogData{" + "logTypeName='" + logTypeName + '\'' + ", timestamp='" + timestamp + '\'' + ", source='"
// + source + '\'' + ", offset='" + offset + '\'' + ", dimensions=" + dimensions + ", measures=" + measures
// + ", normalFields=" + normalFields + '}';
// }
}
......@@ -102,7 +102,7 @@ public class HdfsLogMaskUtil {
//根据日志事件的timestamp做过滤
Long timestamp = DateUtils.utc2timestamp(logData.getTimestamp());
boolean flag = null != timestamp && timestamp > hdfsLogQueryParam.getStartTime()
&& timestamp < hdfsLogQueryParam.getEndTime() || Boolean.TRUE;
&& timestamp < hdfsLogQueryParam.getEndTime();
if (flag) {
Map maskResult = maskUtil.mask(logData.getNormalFields(), fieldsWhiteList);
......@@ -132,7 +132,6 @@ public class HdfsLogMaskUtil {
return tupple;
}
}).output(hadoopOutputFormat);
try {
env.execute("国泰交易日志脱敏job");
} catch (Exception e) {
......
......@@ -17,6 +17,17 @@ public class MaskUtil implements Serializable {
public static final int DEFAULT_MAP_CAPACITY = 16;
/**
* 数据格式信息
*/
//todo 抽取到配置文件
  • 字段【dataFormats】必须使用javadoc形式的注释 📘 Complete the task associated to this TODO comment. 📘

Please register or sign in to reply
private List<String> dataFormats = new ArrayList<String>(){{
  • Move the contents of this initializer to a standard constructor or to field initializers. 📘 🔽 Use another way to initialize this instance. 📘

Please register or sign in to reply
add(",");
add(".");
add("@");
add("-");
}};
/**
* 姓名正则
*/
......@@ -88,7 +99,6 @@ public class MaskUtil implements Serializable {
patterns.add(Pattern.compile(this.macRegExp));
patterns.add(Pattern.compile(this.emailRegExp));
patterns.add(Pattern.compile(this.ipRegExp));
patterns.add(Pattern.compile(this.nameRegExp));
patterns.add(Pattern.compile(this.idRegExp18));
patterns.add(Pattern.compile(this.idRegExp15));
patterns.add(Pattern.compile(this.bankCardRegExp));
......@@ -104,9 +114,15 @@ public class MaskUtil implements Serializable {
if (matcher.find()) {
String replaceStr = "";
for (int i = 0; i < matcher.group().length(); i++) {
replaceStr = replaceStr.concat("*");
String s = String.valueOf(matcher.group().charAt(i));
if(dataFormats.contains(s)){
replaceStr = replaceStr.concat(s);
}else{
replaceStr = replaceStr.concat("*");
}
}
value = value.replace(matcher.group(), replaceStr);
System.out.println("\n");
  • Replace this use of System.out or System.err by a logger. 📘

Please register or sign in to reply
}
}
map.put(k, value);
......@@ -118,15 +134,20 @@ public class MaskUtil implements Serializable {
}
public static void main(String[] args) {
MaskUtil maskUtil = new MaskUtil("[\\u4e00-\\u9fa5]{1,20}|[a-zA-Z\\\\.\\\\s]{1,20}", "((13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}",
"(\\d{3,4}-)?\\d{6,8}", "\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*", "[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}",
"[1-9]\\d{5}[1-9]\\d{3}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}([0-9Xx])", "([1-9]{1})(\\d{11}|\\d{15}|\\d{16}|\\d{17}|\\d{18})",
"([\u4E00-\u9FA5A-Za-z0-9_]+(省|市|区|县|道|路|街|号|弄|条|室)){2,}", "((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)",
MaskUtil maskUtil = new MaskUtil("[\\u4e00-\\u9fa5]{1,20}|[a-zA-Z\\\\.\\\\s]{1,20}",
"((13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}",
"0\\d{2,3}-\\d{7,8}",
"[a-zA-Z0-9]+@[a-zA-Z0-9]+(\\.[a-zA-Z0-9]+)+",
"[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}",
"[1-9]\\d{5}[1-9]\\d{3}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}([0-9Xx])",
"([1-9]{1})(\\d{11}|\\d{15}|\\d{16}|\\d{17}|\\d{18})",
"([\u4E00-\u9FA5A-Za-z0-9_]+(省|市|区|县|道|路|街|号|弄|条|室)){2,}",
"((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)",
"([A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}");
Map map = new HashMap(DEFAULT_MAP_CAPACITY);
map.put("姓名", "王海鹰");
map.put("身份证号", "372925199008075158");
map.put("身份证号", "372925199101195158");
map.put("手机号", "15000101879");
map.put("电话", "021-61341606");
map.put("邮箱", "wanghaiying@zork.com");
......@@ -137,6 +158,7 @@ public class MaskUtil implements Serializable {
map.put("message", "王海鹰,372925199008075158#15000101879");
map.put("messid", "0000011404342B32233DDCDA");
map.put("bsflag", "0000011404342B32233DDCDA");
map.put("test", "wanghaiying123");
map.put("normalFields", "13811110000-110101199003075517-上海市浦东新区张江微电子港-zorkdata@163.com-123456789-wanghaiying123-王海鹰-192.168.1.1-00-50-56-C0-00-08-6227002470170278192");
String[] fieldsWhiteListArray = "messid,fundid,custid,orgid,brhid,secuid,bankcode,market,ordersno,ordergroup,count,poststr,stkcode,bsflag,orderamt,price,qty,bankcode,tacode,ofcode,transacc,taacc".split(",");
......
......@@ -33,9 +33,9 @@ reg_exp:
# 手机号正则
mobile: "((13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}"
# 电话号码正则
phone: "(\\d{3,4}-)?\\d{6,8}"
phone: "0\\d{2,3}-\\d{7,8}"
# 邮箱正则
email: "\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*"
email: "[a-zA-Z0-9]+@[a-zA-Z0-9]+(\\.[a-zA-Z0-9]+)+"
# 身份证号码(15位)正则
id15: "[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}"
# 身份证号码(18位)正则
......
  • SonarQube analysis reported 153 issues

    • 🚫 20 critical
    • 82 major
    • 🔽 49 minor
    • 2 info

    Watch the comments in this conversation to review them.

    Top 30 extra issues

    Note: The following issues were found on lines that were not modified in the commit. Because these issues can't be reported as line comments, they are summarized here:

    1. 🚫 Move constants to a class or enum. 📘
    2. 🚫 Move constants to a class or enum. 📘
    3. 🚫 Move constants to a class or enum. 📘
    4. 🚫 Add a nested comment explaining why this method is empty, throw an UnsupportedOperationException or complete the implementation. 📘
    5. 🚫 Change this "try" to a try-with-resources. (sonar.java.source not set. Assuming 7 or greater.) 📘
    6. 🚫 Refactor this code to not throw exceptions in finally blocks. 📘
    7. 🚫 Refactor this code to not throw exceptions in finally blocks. 📘
    8. 🚫 Make "patterns" private or transient. 📘
    9. 🚫 Refactor this method to reduce its Cognitive Complexity from 22 to the 15 allowed. 📘
    10. 🚫 Define a constant instead of duplicating this literal "序列化失败" 15 times. 📘
    11. 🚫 Define a constant instead of duplicating this literal " {\n" 7 times. 📘
    12. 🚫 [Define a constant instead of duplicating this literal " "type": \n" 7 times. 📘
    13. 🚫 Define a constant instead of duplicating this literal " "string",\n" 4 times. 📘
    14. 🚫 Define a constant instead of duplicating this literal " "null"\n" 4 times. 📘
    15. 🚫 [Define a constant instead of duplicating this literal " ]\n" 7 times.](https://git.zorkdata.com/xiesen/transactionlogmask/blob/2a44ef5a6cce90c292a0b6c04a53d6863094ba72/src/main/java/com/zorkdata/datamask/util/avro/LogAvroMacroDef.java#L20) 📘
    16. 🚫 Define a constant instead of duplicating this literal " },\n" 6 times. 📘
    17. 🚫 Define a constant instead of duplicating this literal " "null",\n" 3 times. 📘
    18. 🚫 Define a constant instead of duplicating this literal " {\n" 3 times. 📘
    19. 🚫 Define a constant instead of duplicating this literal " "type": "map",\n" 3 times. 📘
    20. 🚫 Define a constant instead of duplicating this literal " }\n" 3 times. 📘
    21. Define and throw a dedicated exception instead of using a generic one. 📘
    22. Remove this unused "source" private field. 📘
    23. Remove this unused "hdfsSrc" private field. 📘
    24. Remove this unused "hdfsDest" private field. 📘
    25. Remove this unused "core" private field. 📘
    26. Remove this unused "date" private field. 📘
    27. Remove this unused "startTime" private field. 📘
    28. Remove this unused "endTime" private field. 📘
    29. Remove this unused "servers" private field. 📘
    30. Remove this unused "zookeeper" private field. 📘
    • ... 118 more
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment