Commit f19557f2 authored by DeleMing's avatar DeleMing

<dev>

1. 优化逻辑
parent b29c7d71
Pipeline #15939 passed with stages
in 2 minutes and 48 seconds
......@@ -99,7 +99,7 @@ public class HdfsLogDesensitization implements Serializable {
this.hdfsUri = String.valueOf(conf.get(ConfigConstants.HDFS_URI)).trim();
this.hdfsUser = String.valueOf(conf.get(ConfigConstants.HDFS_USER)).trim();
this.hdfsSrc = hdfsUri + String.valueOf(conf.get(ConfigConstants.HDFS_SRC)).trim();
this.hdfsDest = hdfsUri + String.valueOf(conf.get(ConfigConstants.HDFS_DEST)).trim();
this.hdfsDest = hdfsUri + String.valueOf(conf.get(ConfigConstants.HDFS_DEST)).trim();
this.core = String.valueOf(conf.get(ConfigConstants.CORE)).trim();
this.startTime = String.valueOf(conf.get(ConfigConstants.START_TIME));
this.endTime = String.valueOf(conf.get(ConfigConstants.END_TIME));
......@@ -142,7 +142,9 @@ public class HdfsLogDesensitization implements Serializable {
new TypeReference<LogData>() {
});
//根据日志事件的核心信息做过滤
if (null != core && logData.getDimensions().get(HOSTNAME).contains(core)) {
boolean hasCore = (null != core && logData.getDimensions().get(HOSTNAME).contains(core))
|| "*".equals(core);
if (hasCore) {
//根据日志事件的timestamp做过滤
Long timestamp = DateUtil.utc2timestamp(logData.getTimestamp());
......@@ -167,9 +169,8 @@ public class HdfsLogDesensitization implements Serializable {
String filePath = hdfsDest;
HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat<>(new AvroOutputFormat(), jobConf);
FileOutputFormat.setOutputPath(jobConf, new Path(filePath));
/**
* avro序列化算子 .writeAsText("file:///lmt/output");
*/
// avro序列化算子 .writeAsText("file:///lmt/output");
  • This block of commented-out lines of code should be removed. 📘 及时清理不再使用的代码段或配置信息。 📘

Please register or sign in to reply
maskFlatMapOperator.map(new MapFunction<Object, Tuple2<AvroWrapper<LogData>, NullWritable>>() {
@Override
public Tuple2<AvroWrapper<LogData>, NullWritable> map(Object value) throws Exception {
......
......@@ -8,7 +8,7 @@ sink.parallelism: "4"
# 数据来源,支持hdfs和kafka,必传,暂不支持kafka
source: "hdfs"
# 交易日志的“核心”信息,值以c开头、后面是数字序号,非必传
# 交易日志的“核心”信息,值以c开头、后面是数字序号,匹配维度中的hostname,不匹配hostname则传*
core: "c9"
# 维度信息是否使用正则脱敏
......
# 日志来源,支持hdfs和kafka,必传
# 任务配置
job_name: "国泰交易日志脱敏job"
# 并行度
source.parallelism: "4"
transformer.parallelism: "4"
sink.parallelism: "4"
# 数据来源,支持hdfs和kafka,必传,暂不支持kafka
source: "hdfs"
# hdfs日志源文件地址,若source为hdfs,则该地址必传
hdfs_src: "hdfs://cdh-2:8020/tmp/datawarehouse4/jzjy/kcbp_biz_log"
# hdfs日志写入地址,非必传,默认写到hdfs-src目录下的output目录下
hdfs_dest: "hdfs://cdh-2:8020/tmp/datawarehouse/jzjy/kcbp_biz_log/output7/"
# 脱敏结果下载到的本地路径
download_path: "/tmp"
# 交易日志的“核心”信息,值以c开头、后面是数字序号,非必传
core: "c9"
# 查询日志日期(默认为当天),非必传
date: 20200929
# 维度信息是否使用正则脱敏
reg.dimension: "true"
# 查询日志起始时间戳,非必传
start_time: 1601348849900
# 查询日志起始
start_time: "2020-11-24 00:00:00"
# 查询日志结束
end_time: "2020-11-24 23:00:00"
# 查询日志结束时间戳,非必传
end_time: 1601348850000
# hadoop 相关配置
# hdfs 地址,必须以斜杠结尾
hdfs_uri: "hdfs://cdh01:8020/"
# hdfs 用户名
hdfs_user: "hdfs"
# hdfs日志源文件地址,若source为hdfs,则该地址必传,必须以斜杠结尾
hdfs_src: "/tmp/datawarehouse/jzjy/kcbp_biz_log/"
# hdfs日志写入地址,非必传,默认写到hdfs-src目录下的output目录下,必须以斜杠结尾
hdfs_dest: "/tmp/datawarehouse/jzjy/kcbp_biz_log/outputlmt/"
# 不做脱敏的字段白名单
fieldsWhiteList: "messid,fundid,custid,orgid,brhid,secuid,bankcode,market,ordersno,ordergroup,count,poststr,stkcode,bsflag,orderamt,price,qty,bankcode,tacode,ofcode,transacc,taacc,indexTime,logchecktime,end_logtime,collecttime,deserializerTime,versioninfo,fmillsecond,smillsecond"
# 脱敏用的正则表达式
reg_exp:
# 姓名正则
name: "[\u4e00-\u9fa5]{1,20}|[a-zA-Z\\.\\s]{1,20}"
# 手机号正则
mobile: "((13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}"
# 电话号码正则
phone: "0\\d{2,3}-\\d{7,8}"
# 邮箱正则
email: "[a-zA-Z0-9]+@[a-zA-Z0-9]+(\\.[a-zA-Z0-9]+)+"
# 身份证号码(15位)正则
id15: "[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}"
# 身份证号码(18位)正则
id18: "[1-9]\\d{5}[1-9]\\d{3}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}([0-9Xx])"
# 银行卡号
bank_card: "([1-9]{1})(\\d{11}|\\d{15}|\\d{16}|\\d{17}|\\d{18})"
# 家庭住址正则
address: "([\u4E00-\u9FA5A-Za-z0-9_]+(省|市|区|县|道|路|街|号|弄|条|室)){2,}"
# ip地址正则
ip: "((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)"
# mac地址正则
mac: "([A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}"
# 脱敏用的正则表达式
#reg_exp:
# # 姓名正则
# name: '(\u8d75|\u94b1|\u5b59|\u674e|\u5468|\u5434|\u90d1|\u738b|\u51af|\u9648|\u696e|\u536b|\u848b|\u6c88|\u97e9|\u6768|\u6731|\u79e6|\u5c24|\u8bb8|\u4f55|\u5415|\u65bd|\u5f20|\u5b54|\u66f9|\u4e25|\u534e|\u91d1|\u9b4f|\u9676|\u59dc|\u621a|\u8c22|\u90b9|\u55bb|\u67cf|\u6c34|\u7aa6|\u7ae0|\u4e91|\u82cf|\u6f58|\u845b|\u595a|\u8303|\u5f6d|\u90ce|\u9c81|\u97e6|\u660c|\u9a6c|\u82d7|\u51e4|\u82b1|\u65b9|\u4fde|\u4efb|\u8881|\u67f3|\u9146|\u9c8d|\u53f2|\u5510|\u8d39|\u5ec9|\u5c91|\u859b|\u96f7|\u8d3a|\u502a|\u6c64|\u6ed5|\u6bb7|\u7f57|\u6bd5|\u90dd|\u90ac|\u5b89|\u5e38|\u4e50|\u4e8e|\u65f6|\u5085|\u76ae|\u535e|\u9f50|\u5eb7|\u4f0d|\u4f59|\u5143|\u535c|\u987e|\u5b5f|\u5e73|\u9ec4|\u548c|\u7a46|\u8427|\u5c39|\u59da|\u90b5|\u6e5b|\u6c6a|\u7941|\u6bdb|\u79b9|\u72c4|\u7c73|\u8d1d|\u660e|\u81e7|\u8ba1|\u4f0f|\u6210|\u6234|\u8c08|\u5b8b|\u8305|\u5e9e|\u718a|\u7eaa|\u8212|\u5c48|\u9879|\u795d|\u8463|\u6881|\u675c|\u962e|\u84dd|\u95fd|\u5e2d|\u5b63|\u9ebb|\u5f3a|\u8d3e|\u8def|\u5a04|\u5371|\u6c5f|\u7ae5|\u989c|\u90ed|\u6885|\u76db|\u6797|\u5201|\u953a|\u5f90|\u4e18|\u9a86|\u9ad8|\u590f|\u8521|\u7530|\u6a0a|\u80e1|\u51cc|\u970d|\u865e|\u4e07|\u652f|\u67ef|\u661d|\u7ba1|\u5362|\u83ab|\u7ecf|\u623f|\u88d8|\u7f2a|\u5e72|\u89e3|\u5e94|\u5b97|\u4e01|\u5ba3|\u8d32|\u9093|\u90c1|\u5355|\u676d|\u6d2a|\u5305|\u8bf8|\u5de6|\u77f3|\u5d14|\u5409|\u94ae|\u9f9a|\u7a0b|\u5d47|\u90a2|\u6ed1|\u88f4|\u9646|\u8363|\u7fc1|\u8340|\u7f8a|\u65bc|\u60e0|\u7504|\u9eb9|\u5bb6|\u5c01|\u82ae|\u7fbf|\u50a8|\u9773|\u6c72|\u90b4|\u7cdc|\u677e|\u4e95|\u6bb5|\u5bcc|\u5deb|\u4e4c|\u7126|\u5df4|\u5f13|\u7267|\u9697|\u5c71|\u8c37|\u8f66|\u4faf|\u5b93|\u84ec|\u5168|\u90d7|\u73ed|\u4ef0|\u79cb|\u4ef2|\u4f0a|\u5bab|\u5b81|\u4ec7|\u683e|\u66b4|\u7518|\u659c|\u5389|\u620e|\u7956|\u6b66|\u7b26|\u5218|\u666f|\u8a79|\u675f|\u9f99|\u53f6|\u5e78|\u53f8|\u97f6|\u90dc|\u9ece|\u84df|\u8584|\u5370|\u5bbf|\u767d|\u6000|\u84b2|\u90b0|\u4ece|\u9102|\u7d22|\u54b8|\u7c4d|\u8d56|\u5353|\u853a|\u5c60|\u8499|\u6c60|\u4e54|\u9634|\u90c1|\u80e5|\u80fd|\u82cd|\u53cc|\u95fb|\u8398|\u515a|\u7fdf|\u8c2d|\u8d21|\u52b3|\u9004|\u59ec|\u7533|\u6276|\u5835|\u5189|\u5bb0|\u90e6|\u96cd|\u90e4|\u74a9|\u6851|\u6842|\u6fee|\u725b|\u5bff|\u901a|\u8fb9|\u6248|\u71d5|\u5180|\u90cf|\u6d66|\u5c1a|\u519c|\u6e29|\u522b|\u5e84|\u664f|\u67f4|\u77bf|\u960e|\u5145|\u6155|\u8fde|\u8339|\u4e60|\u5ba6|\u827e|\u9c7c|\u5bb9|\u5411|\u53e4|\u6613|\u614e|\u6208|\u5ed6|\u5ebe|\u7ec8|\u66a8|\u5c45|\u8861|\u6b65|\u90fd|\u803f|\u6ee1|\u5f18|\u5321|\u56fd|\u6587|\u5bc7|\u5e7f|\u7984|\u9619|\u4e1c|\u6b27|\u6bb3|\u6c83|\u5229|\u851a|\u8d8a|\u5914|\u9686|\u5e08|\u5de9|\u538d|\u8042|\u6641|\u52fe|\u6556|\u878d|\u51b7|\u8a3e|\u8f9b|\u961a|\u90a3|\u7b80|\u9976|\u7a7a|\u66fe|\u6bcb|\u6c99|\u4e5c|\u517b|\u97a0|\u987b|\u4e30|\u5de2|\u5173|\u84af|\u76f8|\u67e5|\u540e|\u8346|\u7ea2|\u6e38|\u7afa|\u6743|\u9011|\u76d6|\u76ca|\u6853|\u516c|\u4e07\u4fdf|\u53f8\u9a6c|\u4e0a\u5b98|\u6b27\u9633|\u590f\u4faf|\u8bf8\u845b|\u95fb\u4eba|\u4e1c\u65b9|\u8d6b\u8fde|\u7687\u752b|\u5c09\u8fdf|\u516c\u7f8a|\u6fb9\u53f0|\u516c\u51b6|\u5b97\u653f|\u6fee\u9633|\u6df3\u4e8e|\u5355\u4e8e|\u592a\u53d4|\u7533\u5c60|\u516c\u5b59|\u4ef2\u5b59|\u8f69\u8f95|\u4ee4\u72d0|\u953a\u79bb|\u5b87\u6587|\u957f\u5b59|\u6155\u5bb9|\u9c9c\u4e8e|\u95fe\u4e18|\u53f8\u5f92|\u53f8\u7a7a|\u4e0c\u5b98|\u53f8\u5bc7|\u4ec9|\u7763|\u5b50\u8f66|\u989b\u5b59|\u7aef\u6728|\u5deb\u9a6c|\u516c\u897f|\u6f06\u96d5|\u4e50\u6b63|\u58e4\u9a77|\u516c\u826f|\u62d3\u62d4|\u5939\u8c37|\u5bb0\u7236|\u8c37\u6881|\u664b|\u695a|\u960e|\u6cd5|\u6c5d|\u9122|\u6d82|\u94a6|\u6bb5\u5e72|\u767e\u91cc|\u4e1c\u90ed|\u5357\u95e8|\u547c\u5ef6|\u5f52|\u6d77|\u7f8a\u820c|\u5fae\u751f|\u5cb3|\u5e05|\u7f11|\u4ea2|\u51b5|\u540e|\u6709|\u7434|\u6881\u4e18|\u5de6\u4e18|\u4e1c\u95e8|\u897f\u95e8|\u5546|\u725f|\u4f58|\u4f74|\u4f2f|\u8d4f|\u5357\u5bab|\u58a8|\u54c8|\u8c2f|\u7b2a|\u5e74|\u7231|\u9633|\u4f5f|\u7b2c\u4e94|\u8a00|\u798f)(\w{1,1})'
# # 手机号正则
# mobile: '((\+|00)86)?((134\d{4})|((13[0-3|5-9]|14[1|5-9]|15[0-9]|16[2|5|6|7]|17[0-8]|18[0-9]|19[0-2|5-9])\d{8}))'
# # 电话号码正则
# phone: '((((010)|(0[2-9]\d{1,2}))[-\s]?)[1-9]\d{6,7}$)|((\+?0?86\-?)?1[3|4|5|7|8][0-9]\d{8}$)'
# # 邮箱正则
# email: '([a-zA-Z0-9_-])+@([a-zA-Z0-9_-])+((\.[a-zA-Z0-9_-]{1,4}){1,4})'
# # 身份证号码正则
# id: '[1-9]\d{5}(18|19|([23]\d))\d{2}((0[1-9])|(10|11|12))(([0-2][1-9])|10|20|30|31)\d{3}[0-9Xx]$)|(^[1-9]\d{5}\d{2}((0[1-9])|(10|11|12))(([0-2][1-9])|10|20|30|31)\d{2}'
# # 银行卡号
# bank_card: '(([13-79]\d{3})|(2[1-9]\d{2})|(20[3-9]\d)|(8[01-79]\d{2}))\s?\d{4}\s?\d{4}\s?\d{4}(\s?\d{3})?$'
# # 家庭住址正则
# address: '([\u4E00-\u9FA5A-Za-z0-9_]+(省|市|自治区|自治州|区|县|镇|道|路|街|号|弄|条|室)){1,}'
# # ip地址正则
# ip: '((2[0-4]\d|25[0-5]|[01]?\d\d?)\.){3}(2[0-4]\d|25[0-5]|[01]?\d\d?)'
# # mac地址正则
# mac: '[A-F0-9]{2}([-:]?[A-F0-9]{2})([-:.]?[A-F0-9]{2})([-:]?[A-F0-9]{2})([-:.]?[A-F0-9]{2})([-:]?[A-F0-9]{2})$'
fields_white_list: "funcid,count1,count2,count3,count4,address"
# cdh下载配置
# cdh能执行hdfs命令的机器的ip
cdh_host_ip: "192.168.30.29"
# cdh能执行hdfs命令的机器的ip的超级用户
cdh_host_user: "root"
# cdh能执行hdfs命令的机器的ip的用户密码
cdh_host_password: "NuqUtwbJUBRmUwgh"
# cdh能执行hdfs下载命令的用户名
cdh_hdfs_user: "hdfs"
# 与配置文件保持一致
download_path: "/tmp/"
\ No newline at end of file
  • SonarQube analysis reported 114 issues

    • 🚫 24 critical
    • 62 major
    • 🔽 27 minor
    • 1 info

    Watch the comments in this conversation to review them.

    Top 30 extra issues

    Note: The following issues were found on lines that were not modified in the commit. Because these issues can't be reported as line comments, they are summarized here:

    1. 🚫 Add a default case to this switch. 📘
    2. 🚫 switch中每个case需要通过break/return等来终止 📘
    3. 🚫 switch块缺少default语句 📘
    4. 🚫 Define a constant instead of duplicating this literal " {\n" 11 times. 📘
    5. 🚫 [Define a constant instead of duplicating this literal " "type": \n" 11 times. 📘
    6. 🚫 Define a constant instead of duplicating this literal " "string",\n" 6 times. 📘
    7. 🚫 Define a constant instead of duplicating this literal " "null"\n" 6 times. 📘
    8. 🚫 [Define a constant instead of duplicating this literal " ]\n" 11 times.](https://git.zorkdata.com/liaomingtao/transaction_log_desensitization/blob/f19557f2029178338a115e6e5e2588a6e95a943f/src/main/java/com/zorkdata/desensitization/avro/AvroSchemaDef.java#L23) 📘
    9. 🚫 Define a constant instead of duplicating this literal " },\n" 9 times. 📘
    10. 🚫 Define a constant instead of duplicating this literal " "null",\n" 5 times. 📘
    11. 🚫 Define a constant instead of duplicating this literal " {\n" 5 times. 📘
    12. 🚫 Define a constant instead of duplicating this literal " "type": "map",\n" 5 times. 📘
    13. 🚫 Define a constant instead of duplicating this literal " "values": "string"\n" 3 times. 📘
    14. 🚫 Define a constant instead of duplicating this literal " }\n" 5 times. 📘
    15. 🚫 Define a constant instead of duplicating this literal "序列化失败" 13 times. 📘
    16. 🚫 Refactor this method to reduce its Cognitive Complexity from 22 to the 15 allowed. 📘
    17. 🚫 Refactor this method to reduce its Cognitive Complexity from 22 to the 15 allowed. 📘
    18. 🚫 Refactor this method to reduce its Cognitive Complexity from 22 to the 15 allowed. 📘
    19. 🚫 常量【dataFormats】命名应全部大写并以下划线分隔 📘
    20. 🚫 Add a nested comment explaining why this method is empty, throw an UnsupportedOperationException or complete the implementation. 📘
    21. 🚫 Change this "try" to a try-with-resources. (sonar.java.source not set. Assuming 7 or greater.) 📘
    22. 🚫 Refactor this code to not throw exceptions in finally blocks. 📘
    23. 🚫 Refactor this code to not throw exceptions in finally blocks. 📘
    24. 🚫 Change this "try" to a try-with-resources. (sonar.java.source not set. Assuming 7 or greater.) 📘
    25. This block of commented-out lines of code should be removed. 📘
    26. 及时清理不再使用的代码段或配置信息。 📘
    27. Replace this use of System.out or System.err by a logger. 📘
    28. Replace this use of System.out or System.err by a logger. 📘
    29. String contains no format specifiers. 📘
    30. Replace this use of System.out or System.err by a logger. 📘
    • ... 82 more
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment