Commit 487ccca8 authored by DeleMing's avatar DeleMing

<dev>

1. 修改身份证比银行卡号先匹配
2. 新增持仓正则
3. 增加是否脱敏维度功能
4. 提取hdfs用户下载名称
parent 0cf4e4e5
Pipeline #15923 passed with stages
in 3 minutes and 18 seconds
......@@ -52,6 +52,11 @@ public class RegularExpressions implements Serializable {
*/
private String macRegExp;
/**
* 持仓信息正则
*/
private String positionExp;
  • Remove this unused "positionExp" private field. 📘

Please register or sign in to reply
public RegularExpressions(){
}
......@@ -67,5 +72,6 @@ public class RegularExpressions implements Serializable {
this.addressRegExp = MapUtils.getString(map, RegExpConstants.ADDRESS_REG_EXP);
this.ipRegExp = MapUtils.getString(map, RegExpConstants.IP_REG_EXP);
this.macRegExp = MapUtils.getString(map, RegExpConstants.MAC_REG_EXP);
this.positionExp = MapUtils.getString(map, RegExpConstants.POSITION_EXP);
}
}
......@@ -15,6 +15,7 @@ public final class ConfigConstants {
public static final String SOURCE_PARALLELISM = "source.parallelism";
public static final String TRANSFORMER_PARALLELISM = "transformer.parallelism";
public static final String SINK_PARALLELISM = "sink.parallelism";
public static final String REG_DIMENSION = "reg.dimension";
public static final String HDFS_URI = "hdfs_uri";
public static final String HDFS_USER = "hdfs_user";
......
......@@ -18,4 +18,5 @@ public final class RegExpConstants {
public static final String ADDRESS_REG_EXP = "address";
public static final String IP_REG_EXP = "ip";
public static final String MAC_REG_EXP = "mac";
public static final String POSITION_EXP = "position";
}
......@@ -23,8 +23,8 @@ public class DesensitizationFunction implements Serializable {
public DesensitizationFunction(RegularExpressions regularExpressions) {
this.regularExpressions = regularExpressions;
patterns.add(Pattern.compile(regularExpressions.getBankCardRegExp()));
patterns.add(Pattern.compile(regularExpressions.getIdRegExp()));
patterns.add(Pattern.compile(regularExpressions.getBankCardRegExp()));
patterns.add(Pattern.compile(regularExpressions.getPhoneRegExp()));
patterns.add(Pattern.compile(regularExpressions.getMobileRegExp()));
......@@ -33,6 +33,7 @@ public class DesensitizationFunction implements Serializable {
patterns.add(Pattern.compile(regularExpressions.getMacRegExp()));
patterns.add(Pattern.compile(regularExpressions.getEmailRegExp()));
patterns.add(Pattern.compile(regularExpressions.getIpRegExp()));
patterns.add(Pattern.compile(regularExpressions.getPositionExp()));
}
public Map<String, String> desensitization(Map<String, String> map,
......
......@@ -73,6 +73,10 @@ public class HdfsLogDesensitization implements Serializable {
private String hdfsDest;
private String startTime;
private String endTime;
/**
* 是否脱敏维度信息
*/
private boolean hasRegDimension;
private long startTimestamp;
private long endTimestamp;
private Map<String, String> confMap;
......@@ -101,6 +105,7 @@ public class HdfsLogDesensitization implements Serializable {
this.endTime = String.valueOf(conf.get(ConfigConstants.END_TIME));
this.startTimestamp = DateUtil.time2Timestamp(startTime);
this.endTimestamp = DateUtil.time2Timestamp(endTime);
this.hasRegDimension = Boolean.parseBoolean(conf.get(ConfigConstants.REG_DIMENSION));
this.confMap = conf;
return this;
}
......@@ -143,9 +148,16 @@ public class HdfsLogDesensitization implements Serializable {
if (null != timestamp && timestamp.compareTo(startTimestamp) >= 0 &&
timestamp.compareTo(endTimestamp) <= 0) {
Map<String, String> normalFields = logData.getNormalFields();
Map desensitization = desensitizationFunction.
desensitization(logData.getNormalFields(), fieldsWhiteList, dataFormats);
desensitization(normalFields, fieldsWhiteList, dataFormats);
logData.setNormalFields(desensitization);
if (hasRegDimension) {
Map<String, String> dimensions = logData.getDimensions();
Map desensitizationDimensions = desensitizationFunction.
desensitization(dimensions, fieldsWhiteList, dataFormats);
logData.setDimensions(desensitizationDimensions);
}
collector.collect(logData);
}
}
......
......@@ -11,6 +11,9 @@ source: "hdfs"
# 交易日志的“核心”信息,值以c开头、后面是数字序号,非必传
core: "c9"
# 维度信息是否使用正则脱敏
reg.dimension: "true"
# 查询日志起始
start_time: "2020-11-07 21:22:20"
# 查询日志结束
......@@ -29,10 +32,15 @@ hdfs_dest: "/tmp/datawarehouse/jzjy/kcbp_biz_log/output1/"
# 不做脱敏的字段白名单
fields_white_list: "funcid,count1,count2,count3,count4"
# chd下载配置
# cdh下载配置
# cdh能执行hdfs命令的机器的ip
cdh_host_ip: "192.168.70.2"
# cdh能执行hdfs命令的机器的ip的超级用户
cdh_host_user: "root"
# cdh能执行hdfs命令的机器的ip的用户密码
cdh_host_password: "NuqUtwbJUBRmUwgh"
# cdh能执行hdfs下载命令的用户名
cdh_hdfs_user: "hdfs"
# 与配置文件保持一致
download_path: "/tmp/"
......@@ -6,6 +6,7 @@ DEFAULT_SLEEP_TIME=30
CDH_HOST_IP=192.168.70.2
CDH_HOST_USER=root
CDH_HOST_PASSWORD=password
CDH_HDFS_USER=hdfs
# 与配置文件保持一致
HDFS_DEST=/tmp/datawarehouse/jzjy/kcbp_biz_log/output1/
......@@ -51,6 +52,10 @@ do
then
CDH_HOST_PASSWORD=$(echo "$v" | sed -r 's/.*"(.+)".*/\1/')
fi
if [[ "$k" == "cdh_hdfs_user" ]]
then
CDH_HDFS_USER=$(echo "$v" | sed -r 's/.*"(.+)".*/\1/')
fi
done < $DEPLOY_PATH/conf/$FLINK_TASK_CONF
echo "开始提交任务"
......@@ -77,7 +82,7 @@ expect <<EOF
"password:" { send "$CDH_HOST_PASSWORD\n" }
}
expect "]# " { send "rm -rf $SCP_PATH\n" }
expect "]# " { send "sudo -u hdfs hadoop fs -copyToLocal $HDFS_DEST $DOWNLOAD_PATH\n" }
expect "]# " { send "sudo -u $CDH_HDFS_USER hadoop fs -copyToLocal $HDFS_DEST $DOWNLOAD_PATH\n" }
expect "]# " {
send "scp -r $SCP_PATH root@$LOCAL_IP:/tmp/\n"
expect {
......
......@@ -12,8 +12,10 @@ id=[1-9]\d{5}(18|19|([23]\d))\d{2}((0[1-9])|(10|11|12))(([0-2][1-9])|10|20|30|31
# 银行卡号
bank_card=(([13-79]\d{3})|(2[1-9]\d{2})|(20[3-9]\d)|(8[01-79]\d{2}))\s?\d{4}\s?\d{4}\s?\d{4}(\s?\d{3})?
# 家庭住址正则
address=([\u4E00-\u9FA5A-Za-z0-9_]+(省|市|自治区|自治州|区|县|镇|道|路|街|号|弄|条|室)){1,}
address=([\u4E00-\u9FA5A-Za-z0-9_]+(省|市|自治区|自治州|区|县|镇|道|路|街|号|弄|条|室|港)){1,}
# ip地址正则
ip=((2[0-4]\d|25[0-5]|[01]?\d\d?)\.){3}(2[0-4]\d|25[0-5]|[01]?\d\d?)
# mac地址正则
mac=[A-F0-9]{2}([-:]?[A-F0-9]{2})([-:.]?[A-F0-9]{2})([-:]?[A-F0-9]{2})([-:.]?[A-F0-9]{2})([-:]?[A-F0-9]{2})
\ No newline at end of file
mac=[A-F0-9]{2}([-:]?[A-F0-9]{2})([-:.]?[A-F0-9]{2})([-:]?[A-F0-9]{2})([-:.]?[A-F0-9]{2})([-:]?[A-F0-9]{2})
# 持仓信息正则
position=仓(\d+(\.)?\d+)(万|千|手|股)
\ No newline at end of file
  • SonarQube analysis reported 113 issues

    • 🚫 24 critical
    • 61 major
    • 🔽 27 minor
    • 1 info

    Watch the comments in this conversation to review them.

    Top 30 extra issues

    Note: The following issues were found on lines that were not modified in the commit. Because these issues can't be reported as line comments, they are summarized here:

    1. 🚫 Add a default case to this switch. 📘
    2. 🚫 switch中每个case需要通过break/return等来终止 📘
    3. 🚫 switch块缺少default语句 📘
    4. 🚫 Define a constant instead of duplicating this literal " {\n" 11 times. 📘
    5. 🚫 [Define a constant instead of duplicating this literal " "type": \n" 11 times. 📘
    6. 🚫 Define a constant instead of duplicating this literal " "string",\n" 6 times. 📘
    7. 🚫 Define a constant instead of duplicating this literal " "null"\n" 6 times. 📘
    8. 🚫 [Define a constant instead of duplicating this literal " ]\n" 11 times.](https://git.zorkdata.com/liaomingtao/transaction_log_desensitization/blob/487ccca8343d8f795e79208b3aab4209d59a361c/src/main/java/com/zorkdata/desensitization/avro/AvroSchemaDef.java#L23) 📘
    9. 🚫 Define a constant instead of duplicating this literal " },\n" 9 times. 📘
    10. 🚫 Define a constant instead of duplicating this literal " "null",\n" 5 times. 📘
    11. 🚫 Define a constant instead of duplicating this literal " {\n" 5 times. 📘
    12. 🚫 Define a constant instead of duplicating this literal " "type": "map",\n" 5 times. 📘
    13. 🚫 Define a constant instead of duplicating this literal " "values": "string"\n" 3 times. 📘
    14. 🚫 Define a constant instead of duplicating this literal " }\n" 5 times. 📘
    15. 🚫 Define a constant instead of duplicating this literal "序列化失败" 13 times. 📘
    16. 🚫 Refactor this method to reduce its Cognitive Complexity from 22 to the 15 allowed. 📘
    17. 🚫 Refactor this method to reduce its Cognitive Complexity from 22 to the 15 allowed. 📘
    18. 🚫 Refactor this method to reduce its Cognitive Complexity from 22 to the 15 allowed. 📘
    19. 🚫 常量【dataFormats】命名应全部大写并以下划线分隔 📘
    20. 🚫 Add a nested comment explaining why this method is empty, throw an UnsupportedOperationException or complete the implementation. 📘
    21. 🚫 Change this "try" to a try-with-resources. (sonar.java.source not set. Assuming 7 or greater.) 📘
    22. 🚫 Refactor this code to not throw exceptions in finally blocks. 📘
    23. 🚫 Refactor this code to not throw exceptions in finally blocks. 📘
    24. 🚫 Change this "try" to a try-with-resources. (sonar.java.source not set. Assuming 7 or greater.) 📘
    25. This block of commented-out lines of code should be removed. 📘
    26. 及时清理不再使用的代码段或配置信息。 📘
    27. Replace this use of System.out or System.err by a logger. 📘
    28. Replace this use of System.out or System.err by a logger. 📘
    29. String contains no format specifiers. 📘
    30. Replace this use of System.out or System.err by a logger. 📘
    • ... 79 more
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment