<dev>
1. 优化hdfs下载脚本
Showing
This diff is collapsed.
| ... | ... | @@ -54,6 +54,7 @@ public class HdfsLogDesensitization implements Serializable { |
| private static final String AVRO_OUTPUT_SCHEMA = "avro.output.schema"; | ||
| private static final String HOSTNAME = "hostname"; | ||
| private String jobName; | ||
| private int parallelism; | ||
| private int maxFileNum; | ||
| private String avroOutputSchema; | ||
| ... | ... | @@ -70,6 +71,7 @@ public class HdfsLogDesensitization implements Serializable { |
| private Map<String, String> conf; | ||
| public HdfsLogDesensitization initConf(Map<String, String> conf) { | ||
| this.jobName = String.valueOf(conf.get(ConfigConstants.JOB_NAME)); | ||
| if (!conf.containsKey(PARALLELISM_KEY)) { | ||
| this.parallelism = DEFAULT_PARALLELISM; | ||
| } else { | ||
| ... | ... | @@ -107,81 +109,82 @@ public class HdfsLogDesensitization implements Serializable { |
| RegularExpressions regularExpressions = new RegularExpressions(conf); | ||
| DesensitizationFunction desensitizationFunction = new DesensitizationFunction(regularExpressions); | ||
| List<String> logFiles = filterHdfsLogFiles(hdfsSrc, hdfsUri, hdfsUser); | ||
| // String logFileListString = list2String(logFiles); | ||
| List<String> fileStringList = changeList(logFiles); | ||
| for (String logFileListString : fileStringList) { | ||
| /** | ||
| * 读取hdfs日志文件,avro反序列化处理 | ||
| */ | ||
| HadoopInputFormat<Object, Object> hadoopInputFormat = new HadoopInputFormat<> | ||
| (new AvroInputFormat(), Object.class, Object.class, jobConf); | ||
| AvroInputFormat.addInputPaths(hadoopInputFormat.getJobConf(), logFileListString); | ||
| DataSource<Tuple2<Object, Object>> hdfsLogInput = env.createInput(hadoopInputFormat); | ||
| /** | ||
| * 脱敏算子 | ||
| */ | ||
| FlatMapOperator<Tuple2<Object, Object>, Object> maskFlatMapOperator = | ||
| hdfsLogInput.flatMap(new FlatMapFunction<Tuple2<Object, Object>, Object>() { | ||
| @Override | ||
| public void flatMap(Tuple2<Object, Object> value, Collector<Object> collector) { | ||
| LogData logData = JSON.parseObject(value.getField(0).toString(), | ||
| new TypeReference<LogData>() { | ||
| }); | ||
| //根据日志事件的核心信息做过滤 | ||
| if (null != core && logData.getDimensions().get(HOSTNAME).contains(core)) { | ||
| //根据日志事件的timestamp做过滤 | ||
| Long timestamp = DateUtil.utc2timestamp(logData.getTimestamp()); | ||
| List<String> dataFormats = new ArrayList<String>() {{ | ||
| add(","); | ||
| add("."); | ||
| add("@"); | ||
| add("-"); | ||
| }}; | ||
| // Map desensitization = desensitizationFunction. | ||
| // desensitization(logData.getNormalFields(), fieldsWhiteList, dataFormats); | ||
| // logData.setNormalFields(desensitization); | ||
| // log.error("转换数据成功,转换后数据:{}", JSON.toJSONString(logData)); | ||
| // collector.collect(logData); | ||
| if (null != timestamp && timestamp.compareTo(startTimestamp) >= 0 && | ||
| timestamp.compareTo(endTimestamp) <= 0) { | ||
| Map desensitization = desensitizationFunction. | ||
| desensitization(logData.getNormalFields(), fieldsWhiteList, dataFormats); | ||
| logData.setNormalFields(desensitization); | ||
| log.error("转换数据成功,转换后数据:{}", JSON.toJSONString(logData)); | ||
| collector.collect(logData); | ||
| } else { | ||
| // log.error("转换数据失败,原始数据:{}", JSON.toJSONString(logData)); | ||
| } | ||
| String logFileListString = list2String(logFiles); | ||
| // List<String> fileStringList = changeList(logFiles); | ||
|
||
| // DataSource<Tuple2<Object, Object>> dataSource = null; | ||
| // for (String logFileListString : fileStringList) { | ||
| // | ||
| // } | ||
|
||
| /** | ||
| * 读取hdfs日志文件,avro反序列化处理 | ||
| */ | ||
| HadoopInputFormat<Object, Object> hadoopInputFormat = new HadoopInputFormat<> | ||
| (new AvroInputFormat(), Object.class, Object.class, jobConf); | ||
| AvroInputFormat.addInputPaths(hadoopInputFormat.getJobConf(), logFileListString); | ||
| DataSource<Tuple2<Object, Object>> hdfsLogInput = env.createInput(hadoopInputFormat); | ||
| hdfsLogInput.union(hdfsLogInput); | ||
| /** | ||
| * 脱敏算子 | ||
| */ | ||
| FlatMapOperator<Tuple2<Object, Object>, Object> maskFlatMapOperator = | ||
| hdfsLogInput.flatMap(new FlatMapFunction<Tuple2<Object, Object>, Object>() { | ||
| @Override | ||
| public void flatMap(Tuple2<Object, Object> value, Collector<Object> collector) { | ||
| LogData logData = JSON.parseObject(value.getField(0).toString(), | ||
| new TypeReference<LogData>() { | ||
| }); | ||
| //根据日志事件的核心信息做过滤 | ||
| if (null != core && logData.getDimensions().get(HOSTNAME).contains(core)) { | ||
| //根据日志事件的timestamp做过滤 | ||
| Long timestamp = DateUtil.utc2timestamp(logData.getTimestamp()); | ||
| List<String> dataFormats = new ArrayList<String>() {{ | ||
|
|
||
| add(","); | ||
| add("."); | ||
| add("@"); | ||
| add("-"); | ||
| }}; | ||
| // Map desensitization = desensitizationFunction. | ||
| // desensitization(logData.getNormalFields(), fieldsWhiteList, dataFormats); | ||
|
||
| // logData.setNormalFields(desensitization); | ||
| // log.error("转换数据成功,转换后数据:{}", JSON.toJSONString(logData)); | ||
| // collector.collect(logData); | ||
|
||
| if (null != timestamp && timestamp.compareTo(startTimestamp) >= 0 && | ||
| timestamp.compareTo(endTimestamp) <= 0) { | ||
| Map desensitization = desensitizationFunction. | ||
| desensitization(logData.getNormalFields(), fieldsWhiteList, dataFormats); | ||
| logData.setNormalFields(desensitization); | ||
| log.error("转换数据成功,转换后数据:{}", JSON.toJSONString(logData)); | ||
| collector.collect(logData); | ||
| } else { | ||
| // log.error("转换数据失败,原始数据:{}", JSON.toJSONString(logData)); | ||
|
||
| } | ||
| } | ||
| }); | ||
| // 获取目标hdfs的输出目录 | ||
| String logFileName = "output.avro"; | ||
| String filePath = hdfsDest + logFileName.replace(GeneralConstants.AVRO_SUFFIX, | ||
| GeneralConstants.EMPTY_STR); | ||
| HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat<>(new AvroOutputFormat(), jobConf); | ||
| FileOutputFormat.setOutputPath(jobConf, new Path(filePath)); | ||
| } | ||
| }); | ||
| // 获取目标hdfs的输出目录 | ||
|
||
| String logFileName = "output.avro"; | ||
| String filePath = hdfsDest + logFileName.replace(GeneralConstants.AVRO_SUFFIX, | ||
| GeneralConstants.EMPTY_STR); | ||
| HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat<>(new AvroOutputFormat(), jobConf); | ||
| FileOutputFormat.setOutputPath(jobConf, new Path(filePath)); | ||
| /** | ||
| * avro序列化算子 | ||
| */ | ||
| maskFlatMapOperator.map(new MapFunction<Object, Tuple2<AvroWrapper<LogData>, NullWritable>>() { | ||
| @Override | ||
| public Tuple2<AvroWrapper<LogData>, NullWritable> map(Object value) throws Exception { | ||
| AvroKey<LogData> key = new AvroKey<>((LogData) value); | ||
| Tuple2<AvroWrapper<LogData>, NullWritable> tuple = new Tuple2<>(key, NullWritable.get()); | ||
| return tuple; | ||
| } | ||
| }).output(hadoopOutputFormat); | ||
| try { | ||
| env.execute("国泰交易日志脱敏job"); | ||
| } catch (Exception e) { | ||
| System.out.println(e.getMessage()); | ||
| log.error(String.valueOf(e)); | ||
| /** | ||
| * avro序列化算子 | ||
| */ | ||
| maskFlatMapOperator.map(new MapFunction<Object, Tuple2<AvroWrapper<LogData>, NullWritable>>() { | ||
| @Override | ||
| public Tuple2<AvroWrapper<LogData>, NullWritable> map(Object value) throws Exception { | ||
| AvroKey<LogData> key = new AvroKey<>((LogData) value); | ||
| Tuple2<AvroWrapper<LogData>, NullWritable> tuple = new Tuple2<>(key, NullWritable.get()); | ||
|
||
| return tuple; | ||
| } | ||
| }).output(hadoopOutputFormat); | ||
| try { | ||
| env.execute(jobName); | ||
| } catch (Exception e) { | ||
| System.out.println(e.getMessage()); | ||
|
||
| log.error(String.valueOf(e)); | ||
| } | ||
| // for (String logFile : logFiles) { | ||
| // /** | ||
| // * 读取hdfs日志文件,avro反序列化处理 | ||
| ... | ... | |
-
SonarQube analysis reported 112 issues
-
🚫 21 critical -
⚠ 72 major -
🔽 18 minor -
ℹ 1 info
Watch the comments in this conversation to review them.
Top 30 extra issues
Note: The following issues were found on lines that were not modified in the commit. Because these issues can't be reported as line comments, they are summarized here:
-
🚫 Add a default case to this switch.📘 -
🚫 switch中每个case需要通过break/return等来终止📘 -
🚫 switch块缺少default语句📘 -
🚫 Define a constant instead of duplicating this literal " {\n" 11 times.📘 -
🚫 [Define a constant instead of duplicating this literal " "type": \n" 11 times.📘 -
🚫 Define a constant instead of duplicating this literal " "string",\n" 6 times.📘 -
🚫 Define a constant instead of duplicating this literal " "null"\n" 6 times.📘 -
🚫 [Define a constant instead of duplicating this literal " ]\n" 11 times.](https://git.zorkdata.com/liaomingtao/transaction_log_desensitization/blob/5f09be4271e1918927fe8feee24f14215090ac45/src/main/java/com/zorkdata/desensitization/avro/AvroSchemaDef.java#L23)📘 -
🚫 Define a constant instead of duplicating this literal " },\n" 9 times.📘 -
🚫 Define a constant instead of duplicating this literal " "null",\n" 5 times.📘 -
🚫 Define a constant instead of duplicating this literal " {\n" 5 times.📘 -
🚫 Define a constant instead of duplicating this literal " "type": "map",\n" 5 times.📘 -
🚫 Define a constant instead of duplicating this literal " "values": "string"\n" 3 times.📘 -
🚫 Define a constant instead of duplicating this literal " }\n" 5 times.📘 -
🚫 Define a constant instead of duplicating this literal "序列化失败" 13 times.📘 -
🚫 Refactor this method to reduce its Cognitive Complexity from 22 to the 15 allowed.📘 -
🚫 Refactor this method to reduce its Cognitive Complexity from 22 to the 15 allowed.📘 -
🚫 Add a nested comment explaining why this method is empty, throw an UnsupportedOperationException or complete the implementation.📘 -
🚫 Change this "try" to a try-with-resources. (sonar.java.source not set. Assuming 7 or greater.)📘 -
🚫 Refactor this code to not throw exceptions in finally blocks.📘 -
🚫 Refactor this code to not throw exceptions in finally blocks.📘 -
⚠ This block of commented-out lines of code should be removed.📘 -
⚠ 及时清理不再使用的代码段或配置信息。📘 -
⚠ Replace this use of System.out or System.err by a logger.📘 -
⚠ Replace this use of System.out or System.err by a logger.📘 -
⚠ String contains no format specifiers.📘 -
⚠ Replace this use of System.out or System.err by a logger.📘 -
⚠ Rename "jsonObject" which hides the field declared at line 39.📘 -
⚠ Remove this expression which always evaluates to "true"📘 -
⚠ Remove this expression which always evaluates to "true"📘
- ... 72 more
-