Commit 7bdc64f5 authored by DeleMing's avatar DeleMing

<dev>

1. 修改工程,后初始化完成
parent 85d8ca72
Pipeline #21097 failed with stages
in 2 minutes and 27 seconds
package com.zorkdata.desensitization;
import com.alibaba.fastjson.JSON;
import java.util.ArrayList;
import java.util.List;
/**
* @author: LiaoMingtao
* @date: 2020/10/29
*/
public class Test {
public static void main(String[] args) {
// String a = "{a},{b}";
// String[] pathStrings = getPathStrings(a);
// for (String b: pathStrings) {
// System.out.println(b);
// }
String a = "a,b,";
String[] pathStrings = getPathStrings(a);
for (String b: pathStrings) {
System.out.println(b);
System.out.printf("--");
}
}
private static String[] getPathStrings(String commaSeparatedPaths) {
int length = commaSeparatedPaths.length();
int curlyOpen = 0;
int pathStart = 0;
boolean globPattern = false;
List<String> pathStrings = new ArrayList();
for(int i = 0; i < length; ++i) {
char ch = commaSeparatedPaths.charAt(i);
switch(ch) {
case ',':
if (!globPattern) {
pathStrings.add(commaSeparatedPaths.substring(pathStart, i));
pathStart = i + 1;
}
break;
case '{':
++curlyOpen;
if (!globPattern) {
globPattern = true;
}
break;
case '}':
--curlyOpen;
if (curlyOpen == 0 && globPattern) {
globPattern = false;
}
}
}
pathStrings.add(commaSeparatedPaths.substring(pathStart, length));
System.out.println(JSON.toJSONString(pathStrings));
return (String[])pathStrings.toArray(new String[0]);
}
}
package com.zorkdata.desensitization;
import com.alibaba.fastjson.JSON;
import com.zorkdata.desensitization.config.RegularExpressions;
import com.zorkdata.desensitization.function.DesensitizationFunction;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.*;
/**
* @author: LiaoMingtao
* @date: 2020/11/7
*/
public class TestDesensitization {
public static void main(String[] args) {
RegularExpressions regularExpressions = new RegularExpressions();
regularExpressions.setIdRegExp("[1-9]\\d{5}(18|19|([23]\\d))\\d{2}((0[1-9])|(10|11|12))(([0-2][1-9])|10|20|30|31)\\d{3}[0-9Xx]|[1-9]\\d{5}\\d{2}((0[1-9])|(10|11|12))(([0-2][1-9])|10|20|30|31)\\d{2}");
regularExpressions.setBankCardRegExp("(([13-79]\\d{3})|(2[1-9]\\d{2})|(20[3-9]\\d)|(8[01-79]\\d{2}))\\s?\\d{4}\\s?\\d{4}\\s?\\d{4}(\\s?\\d{3})?");
regularExpressions.setPhoneRegExp("0\\d{2,3}-[1-9]\\d{6,7}");
regularExpressions.setMobileRegExp("((\\+|00)86)?((134\\d{4})|((13[0-3|5-9]|14[1|5-9]|15[0-9]|16[2|5|6|7]|17[0-8]|18[0-9]|19[0-2|5-9])\\d{8}))");
regularExpressions.setAddressRegExp("([\\u4E00-\\u9FA5A-Za-z0-9_]+(省|市|自治区|自治州|区|县|镇|道|路|街|号|弄|条|室|港)){1,}");
regularExpressions.setNameRegExp("[赵|钱|孙|李|周|吴|郑|王|冯|陈|褚|卫|蒋|沈|韩|杨|朱|秦|尤|许|何|吕|施|张|孔|曹|严|华|金|魏|陶|姜|戚|谢|邹|喻|柏|水|窦|章|云|苏|潘|葛|奚|范|彭|郎|鲁|韦|昌|马|苗|凤|花|方|俞|任|袁|柳|酆|鲍|史|唐|费|廉|岑|薛|雷|贺|倪|汤|滕|殷|罗|毕|郝|邬|安|常|乐|于|时|傅|皮|卞|齐|康|伍|余|元|卜|顾|孟|平|黄|和|穆|萧|尹|姚|邵|湛|汪|祁|毛|禹|狄|米|贝|明|臧|计|伏|成|戴|谈|宋|茅|庞|熊|纪|舒|屈|项|祝|董|梁|杜|阮|蓝|闵|席|季|麻|强|贾|路|娄|危|江|童|颜|郭|梅|盛|林|刁|锺|徐|邱|骆|高|夏|蔡|田|樊|胡|凌|霍|虞|万|支|柯|昝|管|卢|莫|经|房|裘|缪|干|解|应|宗|丁|宣|贲|邓|郁|单|杭|洪|包|诸|左|石|崔|吉|钮|龚|程|嵇|邢|滑|裴|陆|荣|翁|荀|羊|於|惠|甄|麴|家|封|芮|羿|储|靳|汲|邴|糜|松|井|段|富|巫|乌|焦|巴|弓|牧|隗|山|谷|车|侯|宓|蓬|全|郗|班|仰|秋|仲|伊|宫|宁|仇|栾|暴|甘|钭|历|戎|祖|武|符|刘|景|詹|束|龙|叶|幸|司|韶|郜|黎|溥|印|宿|白|怀|蒲|邰|从|鄂|索|咸|籍|卓|蔺|屠|蒙|池|乔|阳|郁|胥|能|苍|双|闻|莘|党|翟|谭|贡|劳|逄|姬|申|扶|堵|冉|宰|郦|雍|却|桑|桂|濮|牛|寿|通|边|扈|燕|冀|浦|尚|农|温|别|庄|晏|柴|瞿|充|慕|连|茹|习|宦|艾|鱼|容|向|古|易|慎|戈|廖|庾|终|暨|居|衡|步|都|耿|满|弘|匡|国|文|寇|广|禄|阙|东|欧|沃|利|蔚|越|夔|隆|师|巩|厍|聂|晁|勾|敖|融|冷|訾|辛|阚|那|简|饶|空|曾|毋|沙|乜|养|鞠|须|丰|巢|关|蒯|相|荆|红|游|竺|权|司马|上官|欧阳|夏侯|诸葛|闻人|东方|赫连|皇甫|尉迟|公羊|澹台|公冶宗政|濮阳|淳于|单于|太叔|申屠|公孙|仲孙|轩辕|令狐|钟离|宇文|长孙|慕容|司徒|司空|召|有|舜|岳|黄辰|寸|贰|皇|侨|彤|竭|端|赫|实|甫|集|象|翠|狂|辟|典|良|函|芒|苦|其|京|中|夕|乌孙|完颜|富察|费莫|蹇|称|诺|来|多|繁|戊|朴|回|毓|鉏|税|荤|靖|绪|愈|硕|牢|买|但|巧|枚|撒|泰|秘|亥|绍|以|壬|森|斋|释|奕|姒|朋|求|羽|用|占|真|穰|翦|闾|漆|贵|代|贯|旁|崇|栋|告|休|褒|谏|锐|皋|闳|在|歧|禾|示|是|委|钊|频|嬴|呼|大|威|昂|律|冒|保|系|抄|定|化|莱|校|么|抗|祢|綦|悟|宏|功|庚|务|敏|捷|拱|兆|丑|丙|畅|苟|随|类|卯|俟|友|答|乙|允|甲|留|尾|佼|玄|乘|裔|延|植|环|矫|赛|昔|侍|度|旷|遇|偶|前|由|咎|塞|敛|受|泷|袭|衅|叔|圣|御|夫|仆|镇|藩|邸|府|掌|首|员|焉|戏|可|智|尔|凭|悉|进|笃|厚|仁|业|肇|资|合|仍|九|衷|哀|刑|俎|仵|圭|夷|徭|蛮|汗|孛|乾|帖|罕|洛|淦|洋|邶|郸|郯|邗|邛|剑|虢|隋|蒿|茆|菅|苌|树|桐|锁|钟|机|盘|铎|斛|玉|线|针|箕|庹|绳|磨|蒉|瓮|弭|刀|疏|牵|浑|恽|势|世|仝|同|蚁|止|戢|睢|冼|种|涂|肖|己|泣|潜|卷|脱|谬|蹉|赧|浮|顿|说|次|错|念|夙|斯|完|丹|表|聊|源|姓|吾|寻|展|出|不|户|闭|才|无|书|学|愚|本|性|雪|霜|烟|寒|少|字|桥|板|斐|独|千|诗|嘉|扬|善|揭|祈|析|赤|紫|青|柔|刚|奇|拜|佛|陀|弥|阿|素|长|僧|隐|仙|隽|宇|祭|酒|淡|塔|琦|闪|始|星|南|天|接|波|碧|速|禚|腾|潮|镜|似|澄|潭|謇|纵|渠|奈|风|春|濯|沐|茂|英|兰|檀|藤|枝|检|生|折|登|驹|骑|貊|虎|肥|鹿|雀|野|禽|飞|节|宜|鲜|粟|栗|豆|帛|官|布|衣|藏|宝|钞|银|门|盈|庆|喜|及|普|建|营|巨|望|希|道|载|声|漫|犁|力|贸|勤|革|改|兴|亓|睦|修|信|闽|北|守|坚|勇|汉|练|尉|士|旅|五|令|将|旗|军|行|奉|敬|恭|仪|母|堂|丘|义|礼|慈|孝|理|伦|卿|问|永|辉|位|让|尧|依|犹|介|承|市|所|苑|杞|剧|第|零|谌|招|续|达|忻|六|鄞|战|迟|候|宛|励|粘|萨|邝|覃|辜|初|楼|城|区|局|台|原|考|妫|纳|泉|老|清|德|卑|过|麦|曲|竹|百|福|言|第五|佟|爱|年|笪|谯|哈|墨|连|南宫|赏|伯|佴|佘|牟|商|西门|东门|左丘|梁丘|琴|后|况|亢|缑|帅|微生|羊舌|海|归|呼延|南门|东郭|百里|钦|鄢|汝|法|闫|楚|晋|谷梁|宰父|夹谷|拓跋|壤驷|乐正|漆雕|公西|巫马|端木|颛孙|子车|督|仉|司寇|亓官|三小|鲜于|锺离|盖|逯|库|郏|逢|阴|薄|厉|稽|闾丘|公良|段干|开|光|操|瑞|眭|泥|运|摩|伟|铁|迮][\\u4e00-\\u9fa5]");
regularExpressions.setMacRegExp("[A-F0-9]{2}([-:]?[A-F0-9]{2})([-:.]?[A-F0-9]{2})([-:]?[A-F0-9]{2})([-:.]?[A-F0-9]{2})([-:]?[A-F0-9]{2})");
regularExpressions.setEmailRegExp("([a-zA-Z0-9_-])+@([a-zA-Z0-9_-])+((\\.[a-zA-Z0-9_-]{1,4}){1,4})");
regularExpressions.setIpRegExp("((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)");
regularExpressions.setPositionExp("仓(\\d+(\\.)?\\d+)(万|千|手|股)");
DesensitizationFunction desensitizationFunction = new DesensitizationFunction(regularExpressions);
String[] fieldsWhiteListArray = "funcid,count1,count2,count3,count".split(",");
List< String> whiteList = new ArrayList<>(fieldsWhiteListArray.length);
Collections.addAll(whiteList, fieldsWhiteListArray);
List<String> dataFormats = new ArrayList<String>(){{
add(",");
add(".");
add("@");
add("-");
add(":");
}};
Map<String, String> map = new HashMap<>(4);
map.put("name", "廖鸣韬");
map.put("name2", "王海鹰");
map.put("mobile", "18570332683");
map.put("phone", "0730-7512340");
map.put("email", "liaomingtao@zork.com.cn");
map.put("id", "430621194711110423");
map.put("bankId", "6222600260001072123");
map.put("address", "上海市浦东新区张江路");
map.put("ip", "192.168.70.1");
map.put("mac", "00:0C:29:01:98:27");
map.put("funcid", "1111");
map.put("count1", "普通字段");
map.put("count2", "普通字段4");
map.put("count3", "普通字段5");
map.put("count4", "普通字段6");
map.put("count4", "持空仓1000万");
map.put("message", "廖鸣韬|18570332683|0730-7638844|liaomingtao@zork.com.cn|430621194711110423|6222600260001072123|上海市浦东新区张江路|192.168.70.1|00:0C:29:01:98:27|1111");
map.put("message", "13811110000|110101199003075517|上海市浦东新区张江微电子港|zorkdata@163.com|123456789|wanghaiying123-王海鹰-192.168.1.1-00-50-56-C0-00-08-6227002470170278192-持仓1000万");
Map<String, String> stringStringMap = desensitizationFunction.desensitization(map, whiteList, dataFormats);
System.out.println(JSON.toJSONString(stringStringMap));
}
/**
* 深拷贝
*
* @param obj
* @return
*/
public static HashMap<String, Object> clone(Map<String, Object> obj) {
HashMap<String, Object> clonedObj = null;
if (obj.isEmpty()) {
clonedObj = new HashMap<>(50);
} else {
try {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(baos);
oos.writeObject(obj);
oos.close();
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
ObjectInputStream ois = new ObjectInputStream(bais);
clonedObj = (HashMap<String, Object>) ois.readObject();
ois.close();
} catch (Exception e) {
e.printStackTrace();
}
}
return clonedObj;
}
}
package com.zorkdata.desensitization; package com.zorkdata.desensitization;
import com.zorkdata.desensitization.constans.ConfigConstants; import com.zorkdata.desensitization.config.JobConfig;
import com.zorkdata.desensitization.config.JobInitConfig;
import com.zorkdata.desensitization.exception.ZorkException; import com.zorkdata.desensitization.exception.ZorkException;
import com.zorkdata.desensitization.hadoop.HdfsLogDesensitization; import com.zorkdata.desensitization.hadoop.HdfsLogDesensitization;
import com.zorkdata.desensitization.utils.PropertiesUtil; import com.zorkdata.desensitization.utils.YmlUtils;
import com.zorkdata.desensitization.utils.YmlUtil;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import java.util.Map; import java.util.Map;
...@@ -16,30 +16,23 @@ import java.util.Map; ...@@ -16,30 +16,23 @@ import java.util.Map;
@Slf4j @Slf4j
public class TransactionLogDesensitization { public class TransactionLogDesensitization {
private static final int PARAM_LENGTH = 4; private static final int PARAM_LENGTH = 2;
public static void main(String[] args) throws Exception{ public static void main(String[] args) throws Exception {
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
// 参数校验
if (args.length != PARAM_LENGTH) { if (args.length != PARAM_LENGTH) {
String error = "参数缺失,请输入配置文件,例如: " + String error = "参数缺失,请输入配置文件,例如: " +
"--conf E:\\Codes\\fork\\transaction_log_desensitization\\src\\main\\resources\\application.yml " + "--conf E:\\Codes\\fork\\transaction_log_desensitization\\src\\main\\resources\\application.yml";
"--regular E:\\Codes\\fork\\transaction_log_desensitization\\src\\main\\resources\\regular ";
log.error(error); log.error(error);
throw new ZorkException(error);
} }
try { Map<String, String> conf = YmlUtils.getParams(args);
Map<String, String> conf = YmlUtil.getParams(args); JobInitConfig jobInitConfig = new JobInitConfig(conf);
Map<String, String> regularMap = PropertiesUtil.getPropertiesMap(args); JobConfig jobConfig = new JobConfig(jobInitConfig);
String source = conf.get(ConfigConstants.SOURCE); new HdfsLogDesensitization().initJobConfig(jobConfig).desensitizationHdfsLog();
if (ConfigConstants.HDFS.equals(source)) { Thread.sleep(1000);
new HdfsLogDesensitization().initConf(conf).initRegular(regularMap).desensitizationHdfsLog();
}
if (ConfigConstants.KAFKA.equals(source)) {
// TODO kafka
}
} catch (ZorkException e) {
log.info(String.valueOf(e));
}
long stop = System.currentTimeMillis(); long stop = System.currentTimeMillis();
System.out.println("耗时统计:" + (stop - start) + "ms"); log.info("耗时统计: {} ms", stop - start);
} }
} }
package com.zorkdata.desensitization.config;
import com.zorkdata.desensitization.constans.GeneralConstants;
import lombok.Data;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
/**
* @author: LiaoMingtao
* @date: 2021/2/23
*/
@Data
public class JobConfig implements Serializable {
private static final long serialVersionUID = 693924914570906529L;
public JobConfig(JobInitConfig jobInitConfig) {
this.setJobName(jobInitConfig.getJobName());
this.setSourceParallelism(jobInitConfig.getSourceParallelism());
this.setTransformerParallelism(jobInitConfig.getTransformerParallelism());
this.setSinkParallelism(jobInitConfig.getSinkParallelism());
this.setAvroOutputSchema(jobInitConfig.getAvroOutputSchema());
this.setHdfsUri(jobInitConfig.getHdfsUri());
this.setHdfsUser(jobInitConfig.getHdfsUser());
this.setHdfsSrc(jobInitConfig.getHdfsUri() + jobInitConfig.getHdfsSrc());
this.setHdfsDest(jobInitConfig.getHdfsUri() + jobInitConfig.getHdfsDest());
this.setMatchHostname(jobInitConfig.getMatchHostname());
this.setStartTime(jobInitConfig.getStartTime());
this.setEndTime(jobInitConfig.getEndTime());
this.setStartTimestamp(jobInitConfig.getStartTimestamp());
this.setEndTimestamp(jobInitConfig.getEndTimestamp());
String password = jobInitConfig.getPassword() + GeneralConstants.COMMA;
String name = jobInitConfig.getName() + GeneralConstants.COMMA;
String mobile = jobInitConfig.getMobile() + GeneralConstants.COMMA;
String phone = jobInitConfig.getPhone() + GeneralConstants.COMMA;
String email = jobInitConfig.getEmail() + GeneralConstants.COMMA;
String id = jobInitConfig.getId() + GeneralConstants.COMMA;
String address = jobInitConfig.getAddress() + GeneralConstants.COMMA;
String bankCard = jobInitConfig.getBankCard() + GeneralConstants.COMMA;
String ip = jobInitConfig.getIp() + GeneralConstants.COMMA;
String mac = jobInitConfig.getMac() + GeneralConstants.COMMA;
String position = jobInitConfig.getPosition() + GeneralConstants.COMMA;
String allKey = password + name + mobile + phone + email + id + address + bankCard + ip + mac + position;
String[] keys = allKey.split(GeneralConstants.COMMA);
List<String> keyList = new ArrayList<>();
for (String key : keys) {
if (!GeneralConstants.EMPTY_STR.equals(key)) {
keyList.add(key);
}
}
keyList = keyList.stream().distinct().collect(Collectors.toList());
this.setDesensitizationKeyList(keyList);
}
private String jobName;
  • Remove this unused "jobName" private field. 📘

Please register or sign in to reply
private int sourceParallelism;
  • Remove this unused "sourceParallelism" private field. 📘

Please register or sign in to reply
private int transformerParallelism;
  • Remove this unused "transformerParallelism" private field. 📘

Please register or sign in to reply
private int sinkParallelism;
  • Remove this unused "sinkParallelism" private field. 📘

Please register or sign in to reply
private String avroOutputSchema;
  • Remove this unused "avroOutputSchema" private field. 📘

Please register or sign in to reply
private String hdfsUri;
  • Remove this unused "hdfsUri" private field. 📘

Please register or sign in to reply
private String hdfsUser;
  • Remove this unused "hdfsUser" private field. 📘

Please register or sign in to reply
private String hdfsSrc;
  • Remove this unused "hdfsSrc" private field. 📘

Please register or sign in to reply
private String hdfsDest;
  • Remove this unused "hdfsDest" private field. 📘

Please register or sign in to reply
private String matchHostname;
  • Remove this unused "matchHostname" private field. 📘

Please register or sign in to reply
private String startTime;
  • Remove this unused "startTime" private field. 📘

Please register or sign in to reply
private String endTime;
  • Remove this unused "endTime" private field. 📘

Please register or sign in to reply
private long startTimestamp;
  • Remove this unused "startTimestamp" private field. 📘

Please register or sign in to reply
private long endTimestamp;
  • Remove this unused "endTimestamp" private field. 📘

Please register or sign in to reply
private List<String> desensitizationKeyList;
  • Remove this unused "desensitizationKeyList" private field. 📘

Please register or sign in to reply
}
package com.zorkdata.desensitization.config;
import com.zorkdata.desensitization.avro.AvroSchemaDef;
import com.zorkdata.desensitization.constans.ConfigConstants;
import com.zorkdata.desensitization.utils.DateUtils;
import lombok.Data;
import org.apache.avro.Schema;
import org.apache.commons.collections.MapUtils;
import java.io.Serializable;
import java.util.Map;
/**
* @author: LiaoMingtao
* @date: 2021/2/23
*/
@Data
public class JobInitConfig implements Serializable {
private static final long serialVersionUID = -1959581564693543666L;
public JobInitConfig(Map<String, String> conf) {
this.jobName = String.valueOf(conf.get(ConfigConstants.JOB_NAME));
this.sourceParallelism = Integer.parseInt(conf.get(ConfigConstants.SOURCE_PARALLELISM));
this.transformerParallelism = Integer.parseInt(conf.get(ConfigConstants.TRANSFORMER_PARALLELISM));
this.sinkParallelism = Integer.parseInt(conf.get(ConfigConstants.SINK_PARALLELISM));
this.avroOutputSchema = new Schema.Parser().parse(AvroSchemaDef.ZORK_LOG_SCHEMA).toString(true);
this.hdfsUri = String.valueOf(conf.get(ConfigConstants.HDFS_URI)).trim();
this.hdfsUser = String.valueOf(conf.get(ConfigConstants.HDFS_USER)).trim();
this.hdfsSrc = String.valueOf(conf.get(ConfigConstants.HDFS_SRC)).trim();
this.hdfsDest = String.valueOf(conf.get(ConfigConstants.HDFS_DEST)).trim();
this.matchHostname = String.valueOf(conf.get(ConfigConstants.MATCH_HOSTNAME)).trim();
this.startTime = String.valueOf(conf.get(ConfigConstants.START_TIME));
this.endTime = String.valueOf(conf.get(ConfigConstants.END_TIME));
this.startTimestamp = DateUtils.time2Timestamp(startTime);
this.endTimestamp = DateUtils.time2Timestamp(endTime);
this.name = MapUtils.getString(conf, ConfigConstants.NAME_KEY);
this.mobile = MapUtils.getString(conf, ConfigConstants.MOBILE_KEY);
this.phone = MapUtils.getString(conf, ConfigConstants.PHONE_KEY);
this.email = MapUtils.getString(conf, ConfigConstants.EMAIL_KEY);
this.id = MapUtils.getString(conf, ConfigConstants.ID_KEY);
this.bankCard = MapUtils.getString(conf, ConfigConstants.BANK_CARD_KEY);
this.address = MapUtils.getString(conf, ConfigConstants.ADDRESS_KEY);
this.ip = MapUtils.getString(conf, ConfigConstants.IP_KEY);
this.mac = MapUtils.getString(conf, ConfigConstants.MAC_KEY);
this.position = MapUtils.getString(conf, ConfigConstants.POSITION_KEY);
this.password = MapUtils.getString(conf, ConfigConstants.POSITION_KEY);
}
private String jobName;
  • Remove this unused "jobName" private field. 📘

Please register or sign in to reply
private int sourceParallelism;
  • Remove this unused "sourceParallelism" private field. 📘

Please register or sign in to reply
private int transformerParallelism;
  • Remove this unused "transformerParallelism" private field. 📘

Please register or sign in to reply
private int sinkParallelism;
  • Remove this unused "sinkParallelism" private field. 📘

Please register or sign in to reply
private String avroOutputSchema;
  • Remove this unused "avroOutputSchema" private field. 📘

Please register or sign in to reply
private String hdfsUri;
  • Remove this unused "hdfsUri" private field. 📘

Please register or sign in to reply
private String hdfsUser;
  • Remove this unused "hdfsUser" private field. 📘

Please register or sign in to reply
private String hdfsSrc;
  • Remove this unused "hdfsSrc" private field. 📘

Please register or sign in to reply
private String hdfsDest;
  • Remove this unused "hdfsDest" private field. 📘

Please register or sign in to reply
private String matchHostname;
  • Remove this unused "matchHostname" private field. 📘

Please register or sign in to reply
private String startTime;
private String endTime;
private long startTimestamp;
  • Remove this unused "startTimestamp" private field. 📘

Please register or sign in to reply
private long endTimestamp;
  • Remove this unused "endTimestamp" private field. 📘

Please register or sign in to reply
private String name;
  • Remove this unused "name" private field. 📘

Please register or sign in to reply
private String mobile;
  • Remove this unused "mobile" private field. 📘

Please register or sign in to reply
private String phone;
  • Remove this unused "phone" private field. 📘

Please register or sign in to reply
private String email;
  • Remove this unused "email" private field. 📘

Please register or sign in to reply
/**
* 身份证号
*/
private String id;
  • Remove this unused "id" private field. 📘

Please register or sign in to reply
private String bankCard;
  • Remove this unused "bankCard" private field. 📘

Please register or sign in to reply
private String address;
  • Remove this unused "address" private field. 📘

Please register or sign in to reply
private String ip;
  • Remove this unused "ip" private field. 📘

Please register or sign in to reply
private String mac;
  • Remove this unused "mac" private field. 📘

Please register or sign in to reply
/**
* 持仓信息
*/
private String position;
  • Remove this unused "position" private field. 📘

Please register or sign in to reply
private String password;
  • Remove this unused "password" private field. 📘

Please register or sign in to reply
}
...@@ -21,10 +21,23 @@ public final class ConfigConstants { ...@@ -21,10 +21,23 @@ public final class ConfigConstants {
public static final String HDFS_USER = "hdfs_user"; public static final String HDFS_USER = "hdfs_user";
public static final String HDFS_SRC = "hdfs_src"; public static final String HDFS_SRC = "hdfs_src";
public static final String HDFS_DEST = "hdfs_dest"; public static final String HDFS_DEST = "hdfs_dest";
public static final String MATCH_HOSTNAME = "match.hostname";
public static final String CORE = "core"; public static final String CORE = "core";
public static final String START_TIME = "start_time"; public static final String START_TIME = "start_time";
public static final String END_TIME = "end_time"; public static final String END_TIME = "end_time";
public static final String NAME_KEY = "name";
public static final String MOBILE_KEY = "mobile";
public static final String PHONE_KEY = "phone";
public static final String EMAIL_KEY = "email";
public static final String ID_KEY = "id";
public static final String BANK_CARD_KEY = "bank_card";
public static final String ADDRESS_KEY = "address";
public static final String IP_KEY = "ip";
public static final String MAC_KEY = "mac";
public static final String POSITION_KEY = "position";
public static final String PASSWORD_KEY = "password";
  • Remove this hard-coded password. 📘

Please register or sign in to reply
public static final String SERVERS = "servers"; public static final String SERVERS = "servers";
public static final String ZOOKEEPER = "zookeeper"; public static final String ZOOKEEPER = "zookeeper";
public static final String TOPIC = "topic"; public static final String TOPIC = "topic";
......
package com.zorkdata.desensitization.function; package com.zorkdata.desensitization.function;
import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSON;
import com.zorkdata.desensitization.config.RegularExpressions; import com.alibaba.fastjson.TypeReference;
import com.zorkdata.desensitization.config.JobConfig;
import com.zorkdata.desensitization.schmea.LogData;
import org.apache.flink.api.common.functions.RichFlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;
import java.io.Serializable;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/** /**
* @author: LiaoMingtao * @author: LiaoMingtao
* @date: 2020/10/26 * @date: 2021/2/24
*/ */
public class DesensitizationFunction implements Serializable { public class DesensitizationFunction<T, R> extends RichFlatMapFunction<Tuple2<Object, Object>, LogData> {
  • T is not used in the class. 📘 R is not used in the class. 📘

Please register or sign in to reply
private static final long serialVersionUID = 1L; private JobConfig jobConfig;
  • Remove this unused "jobConfig" private field. 📘

Please register or sign in to reply
private RegularExpressions regularExpressions; public DesensitizationFunction(JobConfig jobConfig) {
this.jobConfig = jobConfig;
private List<Pattern> patterns = new ArrayList<>();
public DesensitizationFunction(RegularExpressions regularExpressions) {
this.regularExpressions = regularExpressions;
patterns.add(Pattern.compile(regularExpressions.getIdRegExp()));
patterns.add(Pattern.compile(regularExpressions.getBankCardRegExp()));
patterns.add(Pattern.compile(regularExpressions.getPhoneRegExp()));
patterns.add(Pattern.compile(regularExpressions.getMobileRegExp()));
patterns.add(Pattern.compile(regularExpressions.getAddressRegExp()));
patterns.add(Pattern.compile(regularExpressions.getPositionExp()));
patterns.add(Pattern.compile(regularExpressions.getNameRegExp()));
patterns.add(Pattern.compile(regularExpressions.getMacRegExp()));
patterns.add(Pattern.compile(regularExpressions.getEmailRegExp()));
patterns.add(Pattern.compile(regularExpressions.getIpRegExp()));
}
public Map<String, String> desensitization(Map<String, String> map,
List<String> whiteList, List<String> dataFormats) {
Iterator<Map.Entry<String, String>> entryIterator = map.entrySet().iterator();
while (entryIterator.hasNext()) {
Map.Entry<String, String> next = entryIterator.next();
String key = next.getKey();
String value = next.getValue();
if (!whiteList.contains(key)) {
// 执行脱敏操作
for (Pattern pattern : patterns) {
Matcher matcher = pattern.matcher(value);
if (matcher.find()) {
String replaceStr = "";
for (int i = 0; i < matcher.group().length(); i++) {
String s = String.valueOf(matcher.group().charAt(i));
if(dataFormats.contains(s)){
replaceStr = replaceStr.concat(s);
}else{
replaceStr = replaceStr.concat("*");
}
}
value = value.replace(matcher.group(), replaceStr);
map.put(key, value);
}
}
}
}
return map;
} }
public Map desensitizationTemp(Map map, List<String> whiteList, List<String> dataFormats) { @Override
map.forEach((k, v) -> { public void flatMap(Tuple2<Object, Object> value, Collector<LogData> collector) throws Exception {
if (!whiteList.contains(k)) { LogData logData = JSON.parseObject(value.getField(0).toString(), new TypeReference<LogData>() {
String value = v.toString();
for (Pattern pattern : patterns) {
Matcher matcher = pattern.matcher(value);
if (matcher.find()) {
String replaceStr = "";
for (int i = 0; i < matcher.group().length(); i++) {
String s = String.valueOf(matcher.group().charAt(i));
if(dataFormats.contains(s)){
replaceStr = replaceStr.concat(s);
}else{
replaceStr = replaceStr.concat("*");
}
}
value = value.replace(matcher.group(), replaceStr);
System.out.println("\n");
}
}
map.put(k, value);
} else {
map.put(k, v);
}
}); });
return map;
collector.collect(logData);
} }
} }
package com.zorkdata.desensitization.function;
import com.zorkdata.desensitization.config.RegularExpressions;
import java.io.Serializable;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author: LiaoMingtao
* @date: 2020/10/26
*/
public class DesensitizationFunctionOld implements Serializable {
private static final long serialVersionUID = 1L;
private RegularExpressions regularExpressions;
  • Remove this unused "regularExpressions" private field. 📘

Please register or sign in to reply
private List<Pattern> patterns = new ArrayList<>();
@Deprecated
public DesensitizationFunctionOld(RegularExpressions regularExpressions) {
  • Add the missing @deprecated Javadoc tag. 📘 Do not forget to remove this deprecated code someday. 📘

Please register or sign in to reply
this.regularExpressions = regularExpressions;
patterns.add(Pattern.compile(regularExpressions.getIdRegExp()));
patterns.add(Pattern.compile(regularExpressions.getBankCardRegExp()));
patterns.add(Pattern.compile(regularExpressions.getPhoneRegExp()));
patterns.add(Pattern.compile(regularExpressions.getMobileRegExp()));
patterns.add(Pattern.compile(regularExpressions.getAddressRegExp()));
patterns.add(Pattern.compile(regularExpressions.getPositionExp()));
patterns.add(Pattern.compile(regularExpressions.getNameRegExp()));
patterns.add(Pattern.compile(regularExpressions.getMacRegExp()));
patterns.add(Pattern.compile(regularExpressions.getEmailRegExp()));
patterns.add(Pattern.compile(regularExpressions.getIpRegExp()));
}
@Deprecated
public Map<String, String> desensitization(Map<String, String> map,
  • 🚫 Refactor this method to reduce its Cognitive Complexity from 22 to the 15 allowed. 📘 Add the missing @deprecated Javadoc tag. 📘 Do not forget to remove this deprecated code someday. 📘

Please register or sign in to reply
List<String> whiteList, List<String> dataFormats) {
Iterator<Map.Entry<String, String>> entryIterator = map.entrySet().iterator();
while (entryIterator.hasNext()) {
Map.Entry<String, String> next = entryIterator.next();
String key = next.getKey();
String value = next.getValue();
if (!whiteList.contains(key)) {
// 执行脱敏操作
for (Pattern pattern : patterns) {
Matcher matcher = pattern.matcher(value);
if (matcher.find()) {
String replaceStr = "";
for (int i = 0; i < matcher.group().length(); i++) {
String s = String.valueOf(matcher.group().charAt(i));
if(dataFormats.contains(s)){
replaceStr = replaceStr.concat(s);
}else{
replaceStr = replaceStr.concat("*");
}
}
value = value.replace(matcher.group(), replaceStr);
map.put(key, value);
}
}
}
}
return map;
}
public Map desensitizationTemp(Map map, List<String> whiteList, List<String> dataFormats) {
  • 🚫 Refactor this method to reduce its Cognitive Complexity from 22 to the 15 allowed. 📘

Please register or sign in to reply
map.forEach((k, v) -> {
if (!whiteList.contains(k)) {
  • A "List" cannot contain a "K" 📘

Please register or sign in to reply
String value = v.toString();
for (Pattern pattern : patterns) {
Matcher matcher = pattern.matcher(value);
if (matcher.find()) {
String replaceStr = "";
for (int i = 0; i < matcher.group().length(); i++) {
String s = String.valueOf(matcher.group().charAt(i));
if(dataFormats.contains(s)){
replaceStr = replaceStr.concat(s);
}else{
replaceStr = replaceStr.concat("*");
}
}
value = value.replace(matcher.group(), replaceStr);
System.out.println("\n");
  • Replace this use of System.out or System.err by a logger. 📘

Please register or sign in to reply
}
}
map.put(k, value);
} else {
map.put(k, v);
}
});
return map;
}
}
package com.zorkdata.desensitization.hadoop; package com.zorkdata.desensitization.hadoop;
import com.alibaba.fastjson.JSON; import com.zorkdata.desensitization.config.JobConfig;
import com.alibaba.fastjson.TypeReference;
import com.zorkdata.desensitization.avro.AvroSchemaDef;
import com.zorkdata.desensitization.config.RegularExpressions;
import com.zorkdata.desensitization.constans.ConfigConstants;
import com.zorkdata.desensitization.constans.GeneralConstants; import com.zorkdata.desensitization.constans.GeneralConstants;
import com.zorkdata.desensitization.exception.ZorkException;
import com.zorkdata.desensitization.function.DesensitizationFunction; import com.zorkdata.desensitization.function.DesensitizationFunction;
import com.zorkdata.desensitization.schmea.LogData; import com.zorkdata.desensitization.schmea.LogData;
import com.zorkdata.desensitization.utils.DateUtil; import com.zorkdata.desensitization.utils.DateUtils;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.apache.avro.Schema;
import org.apache.avro.mapred.AvroInputFormat; import org.apache.avro.mapred.AvroInputFormat;
import org.apache.avro.mapred.AvroKey; import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapred.AvroOutputFormat; import org.apache.avro.mapred.AvroOutputFormat;
import org.apache.avro.mapred.AvroWrapper; import org.apache.avro.mapred.AvroWrapper;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.ExecutionEnvironment; import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.hadoop.mapred.HadoopInputFormat; import org.apache.flink.api.java.hadoop.mapred.HadoopInputFormat;
...@@ -23,8 +18,6 @@ import org.apache.flink.api.java.hadoop.mapred.HadoopOutputFormat; ...@@ -23,8 +18,6 @@ import org.apache.flink.api.java.hadoop.mapred.HadoopOutputFormat;
import org.apache.flink.api.java.operators.DataSource; import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.operators.FlatMapOperator; import org.apache.flink.api.java.operators.FlatMapOperator;
import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.util.Collector;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.LocatedFileStatus;
...@@ -38,168 +31,122 @@ import java.io.IOException; ...@@ -38,168 +31,122 @@ import java.io.IOException;
import java.io.Serializable; import java.io.Serializable;
import java.net.URI; import java.net.URI;
import java.net.URISyntaxException; import java.net.URISyntaxException;
import java.util.*; import java.util.ArrayList;
import java.util.List;
/** /**
* @author: LiaoMingtao * @author: LiaoMingtao
* @date: 2020/10/26 * @date: 2021/2/24
*/ */
@Slf4j @Slf4j
public class HdfsLogDesensitization implements Serializable { public class HdfsLogDesensitization implements Serializable {
private static final long serialVersionUID = -6253583122681202967L;
private static final long serialVersionUID = 1L;
private static final String AVRO_OUTPUT_SCHEMA = "avro.output.schema"; private static final String AVRO_OUTPUT_SCHEMA = "avro.output.schema";
private static final String HOSTNAME = "hostname"; private JobConfig jobConfig;
private static final List<String> dataFormats = new ArrayList<String>() {{
add(",");
add(".");
add("@");
add("-");
add(":");
}};
private String jobName;
private int sourceParallelism;
private int transformerParallelism;
private int sinkParallelism;
private int maxFileNum;
private String avroOutputSchema;
private List<String> fieldsWhiteList;
private String core;
private String hdfsUri;
private String hdfsUser;
private String hdfsSrc;
private String hdfsDest;
private String startTime;
private String endTime;
/**
* 是否脱敏维度信息
*/
private boolean hasRegDimension;
private long startTimestamp;
private long endTimestamp;
private Map<String, String> confMap;
private Map<String, String> regularMap;
public HdfsLogDesensitization initRegular(Map<String, String> regularMap) {
this.regularMap = regularMap;
return this;
}
public HdfsLogDesensitization initConf(Map<String, String> conf) { public HdfsLogDesensitization initJobConfig(JobConfig jobConfig) {
this.jobName = String.valueOf(conf.get(ConfigConstants.JOB_NAME)); this.jobConfig = jobConfig;
this.sourceParallelism = Integer.parseInt(conf.get(ConfigConstants.SOURCE_PARALLELISM));
this.transformerParallelism = Integer.parseInt(conf.get(ConfigConstants.TRANSFORMER_PARALLELISM));
this.sinkParallelism = Integer.parseInt(conf.get(ConfigConstants.SINK_PARALLELISM));
String[] fieldsWhiteListArray = String.valueOf(conf.get(ConfigConstants.FIELDS_WHITE_LIST))
.trim().split(GeneralConstants.COMMA);
this.fieldsWhiteList = new ArrayList<>(Arrays.asList(fieldsWhiteListArray));
this.avroOutputSchema = new Schema.Parser().parse(AvroSchemaDef.ZORK_LOG_SCHEMA).toString(true);
this.hdfsUri = String.valueOf(conf.get(ConfigConstants.HDFS_URI)).trim();
this.hdfsUser = String.valueOf(conf.get(ConfigConstants.HDFS_USER)).trim();
this.hdfsSrc = hdfsUri + String.valueOf(conf.get(ConfigConstants.HDFS_SRC)).trim();
this.hdfsDest = hdfsUri + String.valueOf(conf.get(ConfigConstants.HDFS_DEST)).trim();
this.core = String.valueOf(conf.get(ConfigConstants.CORE)).trim();
this.startTime = String.valueOf(conf.get(ConfigConstants.START_TIME));
this.endTime = String.valueOf(conf.get(ConfigConstants.END_TIME));
this.startTimestamp = DateUtil.time2Timestamp(startTime);
this.endTimestamp = DateUtil.time2Timestamp(endTime);
this.hasRegDimension = Boolean.parseBoolean(conf.get(ConfigConstants.REG_DIMENSION));
this.confMap = conf;
return this; return this;
} }
public void desensitizationHdfsLog() throws Exception { public void desensitizationHdfsLog() {
desensitizationHdfsLog(this.confMap); desensitizationHdfsLog(this.jobConfig);
} }
public void desensitizationHdfsLog(Map<String, String> conf) throws Exception { public void desensitizationHdfsLog(JobConfig jobConfig) {
// 初始化env // 初始化flink job env
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
JobConf jobConf = new JobConf(); JobConf jobConf = new JobConf();
jobConf.set(AVRO_OUTPUT_SCHEMA, this.avroOutputSchema); jobConf.set(AVRO_OUTPUT_SCHEMA, jobConfig.getAvroOutputSchema());
ParameterTool parameterTool = ParameterTool.fromMap(conf); // source部分
env.getConfig().setGlobalJobParameters(parameterTool); // 1、通过时间获取文件夹信息
RegularExpressions regularExpressions = new RegularExpressions(this.regularMap); List<String> logFiles = filterHdfsLogFiles(jobConfig.getHdfsSrc(), jobConfig.getHdfsUri(), jobConfig.getHdfsUser());
DesensitizationFunction desensitizationFunction = new DesensitizationFunction(regularExpressions);
// source
List<String> logFiles = filterHdfsLogFiles(hdfsSrc, hdfsUri, hdfsUser);
String logFileListString = list2String(logFiles); String logFileListString = list2String(logFiles);
HadoopInputFormat<Object, Object> hadoopInputFormat = new HadoopInputFormat<> HadoopInputFormat<Object, Object> hadoopInputFormat = new HadoopInputFormat<>
(new AvroInputFormat(), Object.class, Object.class, jobConf); (new AvroInputFormat(), Object.class, Object.class, jobConf);
AvroInputFormat.addInputPaths(hadoopInputFormat.getJobConf(), logFileListString); AvroInputFormat.addInputPaths(hadoopInputFormat.getJobConf(), logFileListString);
// 2、创建datasource
DataSource<Tuple2<Object, Object>> hdfsLogInput = env DataSource<Tuple2<Object, Object>> hdfsLogInput = env
.createInput(hadoopInputFormat).setParallelism(sourceParallelism); .createInput(hadoopInputFormat).setParallelism(jobConfig.getSourceParallelism());
// transformer部分
// transformer FlatMapOperator<Tuple2<Object, Object>, LogData> flatMapOperator =
FlatMapOperator<Tuple2<Object, Object>, Object> maskFlatMapOperator = hdfsLogInput.flatMap(new DesensitizationFunction<Tuple2<Object, Object>, LogData>(jobConfig));
hdfsLogInput.flatMap(new FlatMapFunction<Tuple2<Object, Object>, Object>() { // sink部分
@Override
public void flatMap(Tuple2<Object, Object> value, Collector<Object> collector) {
LogData logData = JSON.parseObject(value.getField(0).toString(),
new TypeReference<LogData>() {
});
//根据日志事件的核心信息做过滤
boolean hasCore = (null != core && logData.getDimensions().get(HOSTNAME).contains(core))
|| "*".equals(core);
if (hasCore) {
//根据日志事件的timestamp做过滤
Long timestamp = DateUtil.utc2timestamp(logData.getTimestamp());
if (null != timestamp && timestamp.compareTo(startTimestamp) >= 0 &&
timestamp.compareTo(endTimestamp) <= 0) {
Map<String, String> normalFields = logData.getNormalFields();
Map desensitization = desensitizationFunction.
desensitization(normalFields, fieldsWhiteList, dataFormats);
logData.setNormalFields(desensitization);
if (hasRegDimension) {
Map<String, String> dimensions = logData.getDimensions();
Map desensitizationDimensions = desensitizationFunction.
desensitization(dimensions, fieldsWhiteList, dataFormats);
logData.setDimensions(desensitizationDimensions);
}
collector.collect(logData);
}
}
}
}).setParallelism(transformerParallelism);
// 获取目标hdfs的输出目录 // 获取目标hdfs的输出目录
String filePath = hdfsDest; String filePath = jobConfig.getHdfsDest();
HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat<>(new AvroOutputFormat(), jobConf); HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat<>(new AvroOutputFormat(), jobConf);
FileOutputFormat.setOutputPath(jobConf, new Path(filePath)); FileOutputFormat.setOutputPath(jobConf, new Path(filePath));
// avro序列化算子(.writeAsText("file:///lmt/output"); 本地写入)
// avro序列化算子 .writeAsText("file:///lmt/output"); flatMapOperator.map(new MapFunction<LogData, Tuple2<AvroWrapper<LogData>, NullWritable>>() {
maskFlatMapOperator.map(new MapFunction<Object, Tuple2<AvroWrapper<LogData>, NullWritable>>() {
@Override @Override
public Tuple2<AvroWrapper<LogData>, NullWritable> map(Object value) throws Exception { public Tuple2<AvroWrapper<LogData>, NullWritable> map(LogData value) throws Exception {
AvroKey<LogData> key = new AvroKey<>((LogData) value); AvroKey<LogData> key = new AvroKey<>(value);
Tuple2<AvroWrapper<LogData>, NullWritable> tuple = new Tuple2<>(key, NullWritable.get()); return new Tuple2<>(key, NullWritable.get());
return tuple; }
} }).setParallelism(jobConfig.getTransformerParallelism())
}).setParallelism(transformerParallelism).output(hadoopOutputFormat).setParallelism(sinkParallelism); .writeAsText("file:///lmt/output", org.apache.flink.core.fs.FileSystem.WriteMode.OVERWRITE)
// .output(hadoopOutputFormat)
.setParallelism(jobConfig.getSinkParallelism());
try { try {
env.execute(jobName); env.execute(jobConfig.getJobName());
} catch (Exception e) { } catch (Exception e) {
log.error(String.valueOf(e)); log.error(String.valueOf(e));
} }
} }
/** /**
* 分组list * 获取hdfs日志文件的所有文件路径
* *
* @param list 演示list * @param hdfsSrc hdfs地址 eg: /tmp/
* @return List<String> * @param hdfsUri hdfs的URI eg: hdfs://cdh-2:8020/
* @param hdfsUser hfs用户名 eg: hdfs
* @return hdfs日志文件的所有文件路径
*/ */
private List<String> changeList(List<String> list) { private List<String> filterHdfsLogFiles(String hdfsSrc, String hdfsUri, String hdfsUser) {
  • 🚫 Refactor this method to reduce its Cognitive Complexity from 19 to the 15 allowed. 📘

Please register or sign in to reply
List<String> resultList = new ArrayList<>(); if (!hdfsSrc.endsWith(GeneralConstants.FILE_SEPARATOR)) {
List<List<String>> lists = subList(list, maxFileNum); hdfsSrc += GeneralConstants.FILE_SEPARATOR;
  • 🔽 Introduce a new variable instead of reusing the parameter "hdfsSrc". 📘

Please register or sign in to reply
lists.forEach(item -> { }
String tempString = list2String(item); String path = hdfsSrc;
resultList.add(tempString); Configuration conf = new Configuration();
}); List<String> logFiles = new ArrayList<>();
return resultList; FileSystem fileSystem = null;
List<String> betweenDate = DateUtils.getBetweenDate(jobConfig.getStartTime(), jobConfig.getEndTime());
List<String> dateList = DateUtils.date2date(betweenDate);
if (!dateList.isEmpty()) {
try {
fileSystem = FileSystem.get(new URI(hdfsUri), conf, hdfsUser);
for (String item : dateList) {
path = hdfsSrc + item;
List<String> hdfsLogFiles = null;
try {
  • Extract this nested try block into a separate method. 📘

Please register or sign in to reply
hdfsLogFiles = getHdfsLogFilesByPath(fileSystem, path);
logFiles.addAll(hdfsLogFiles);
} catch (ZorkException e) {
e.printStackTrace();
log.error(String.valueOf(e));
}
}
} catch (IOException e) {
log.error(String.valueOf(e));
} catch (InterruptedException e) {
  • Either re-interrupt this method or rethrow the "InterruptedException". 📘 🔽 Combine this catch with the one at line 130, which has the same body. (sonar.java.source not set. Assuming 7 or greater.) 📘

Please register or sign in to reply
log.error(String.valueOf(e));
} catch (URISyntaxException e) {
  • 🔽 Combine this catch with the one at line 130, which has the same body. (sonar.java.source not set. Assuming 7 or greater.) 📘

Please register or sign in to reply
log.error(String.valueOf(e));
} finally {
if (null != fileSystem) {
try {
fileSystem.close();
} catch (IOException e) {
log.error(String.valueOf(e));
}
}
}
} else {
log.warn("{} -- {} 时间段内无数据,请注意时间范围", jobConfig.getStartTime(), jobConfig.getEndTime());
}
return logFiles;
} }
/** /**
...@@ -250,76 +197,25 @@ public class HdfsLogDesensitization implements Serializable { ...@@ -250,76 +197,25 @@ public class HdfsLogDesensitization implements Serializable {
* @param path 目录路径 * @param path 目录路径
* @return 文件路径下所有文件全路径 * @return 文件路径下所有文件全路径
*/ */
private List<String> getHdfsLogFilesByPath(FileSystem fileSystem, String path) { private List<String> getHdfsLogFilesByPath(FileSystem fileSystem, String path) throws ZorkException {
List<String> logFiles = new ArrayList<>(); List<String> logFiles = new ArrayList<>();
try { try {
RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator = fileSystem.listFiles(new Path(path), RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator =
false); fileSystem.listFiles(new Path(path), false);
while (locatedFileStatusRemoteIterator.hasNext()) { while (locatedFileStatusRemoteIterator.hasNext()) {
LocatedFileStatus next = locatedFileStatusRemoteIterator.next(); LocatedFileStatus next = locatedFileStatusRemoteIterator.next();
long modificationTime = next.getModificationTime(); long modificationTime = next.getModificationTime();
// 根据文件的修改时间做过滤,获取用户指定时间段内的文件 // 根据文件的修改时间做过滤,获取用户指定时间段内的文件
if (modificationTime > startTimestamp) { if (modificationTime > jobConfig.getStartTimestamp()) {
Path hdfsFilePath = next.getPath(); Path hdfsFilePath = next.getPath();
logFiles.add(hdfsFilePath.toString()); logFiles.add(hdfsFilePath.toString());
} }
} }
} catch (IOException e) { } catch (IOException e) {
log.error(String.valueOf(e)); log.error(String.valueOf(e));
throw new ZorkException(String.format("IO流异常:%s", e.getMessage()));
} }
return logFiles; return logFiles;
} }
/**
* 获取hdfs日志文件的所有文件路径
*
* @param hdfsSrc hdfs地址
* @param hdfsUri hdfs的URI
* @param hdfsUser hfs用户名
* @return hdfs日志文件的所有文件路径
*/
private List<String> filterHdfsLogFiles(String hdfsSrc, String hdfsUri, String hdfsUser) {
// hdfs://cdh-2:8020/ hdfs
if (!hdfsSrc.endsWith(GeneralConstants.FILE_SEPARATOR)) {
hdfsSrc += GeneralConstants.FILE_SEPARATOR;
}
String path = hdfsSrc;
Configuration conf = new Configuration();
List<String> logFiles = new ArrayList<>();
FileSystem fileSystem = null;
List<String> betweenDate = DateUtil.getBetweenDate(startTime, endTime);
List<String> dateList = DateUtil.date2date(betweenDate);
if (dateList.size() > 0) {
try {
fileSystem = FileSystem.get(new URI(hdfsUri), conf, hdfsUser);
for (String item : dateList) {
path = hdfsSrc + item;
List<String> hdfsLogFiles = getHdfsLogFilesByPath(fileSystem, path);
logFiles.addAll(hdfsLogFiles);
}
} catch (IOException e) {
log.error(String.valueOf(e));
} catch (InterruptedException e) {
log.error(String.valueOf(e));
} catch (URISyntaxException e) {
log.error(String.valueOf(e));
} finally {
if (null != fileSystem) {
try {
fileSystem.close();
} catch (IOException e) {
log.error(String.valueOf(e));
}
}
}
} else {
log.warn("{} -- {} 时间段内无数据,请注意时间范围", startTime, endTime);
}
return logFiles;
}
public HdfsLogDesensitization() {
}
} }
// package com.zorkdata.desensitization.hadoop;
//
// import com.alibaba.fastjson.JSON;
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// import com.alibaba.fastjson.TypeReference;
// import com.zorkdata.desensitization.avro.AvroSchemaDef;
// import com.zorkdata.desensitization.config.RegularExpressions;
// import com.zorkdata.desensitization.constans.ConfigConstants;
// import com.zorkdata.desensitization.constans.GeneralConstants;
// import com.zorkdata.desensitization.function.DesensitizationFunctionOld;
// import com.zorkdata.desensitization.schmea.LogData;
// import com.zorkdata.desensitization.utils.DateUtils;
// import lombok.extern.slf4j.Slf4j;
// import org.apache.avro.Schema;
// import org.apache.avro.mapred.AvroInputFormat;
// import org.apache.avro.mapred.AvroKey;
// import org.apache.avro.mapred.AvroOutputFormat;
// import org.apache.avro.mapred.AvroWrapper;
// import org.apache.flink.api.common.functions.FlatMapFunction;
// import org.apache.flink.api.common.functions.MapFunction;
// import org.apache.flink.api.java.ExecutionEnvironment;
// import org.apache.flink.api.java.hadoop.mapred.HadoopInputFormat;
// import org.apache.flink.api.java.hadoop.mapred.HadoopOutputFormat;
// import org.apache.flink.api.java.operators.DataSource;
// import org.apache.flink.api.java.operators.FlatMapOperator;
// import org.apache.flink.api.java.tuple.Tuple2;
// import org.apache.flink.api.java.utils.ParameterTool;
// import org.apache.flink.util.Collector;
// import org.apache.hadoop.conf.Configuration;
// import org.apache.hadoop.fs.FileSystem;
// import org.apache.hadoop.fs.LocatedFileStatus;
// import org.apache.hadoop.fs.Path;
// import org.apache.hadoop.fs.RemoteIterator;
// import org.apache.hadoop.io.NullWritable;
// import org.apache.hadoop.mapred.FileOutputFormat;
// import org.apache.hadoop.mapred.JobConf;
//
// import java.io.IOException;
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// import java.io.Serializable;
// import java.net.URI;
// import java.net.URISyntaxException;
// import java.util.*;
//
// /**
// * @author: LiaoMingtao
// * @date: 2020/10/26
// */
// @Slf4j
// public class HdfsLogDesensitizationOld implements Serializable {
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
//
// private static final long serialVersionUID = 1L;
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
//
// private static final String AVRO_OUTPUT_SCHEMA = "avro.output.schema";
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// private static final String HOSTNAME = "hostname";
// private static final List<String> dataFormats = new ArrayList<String>() {{
// add(",");
// add(".");
// add("@");
// add("-");
// add(":");
// }};
//
// private String jobName;
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// private int sourceParallelism;
// private int transformerParallelism;
// private int sinkParallelism;
// private int maxFileNum;
// private String avroOutputSchema;
// private List<String> fieldsWhiteList;
// private String core;
// private String hdfsUri;
// private String hdfsUser;
// private String hdfsSrc;
// private String hdfsDest;
// private String startTime;
// private String endTime;
// /**
// * 是否脱敏维度信息
// */
// private boolean hasRegDimension;
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// private long startTimestamp;
// private long endTimestamp;
// private Map<String, String> confMap;
// private Map<String, String> regularMap;
//
// public HdfsLogDesensitizationOld initRegular(Map<String, String> regularMap) {
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// this.regularMap = regularMap;
// return this;
// }
//
// public HdfsLogDesensitizationOld initConf(Map<String, String> conf) {
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// this.jobName = String.valueOf(conf.get(ConfigConstants.JOB_NAME));
// this.sourceParallelism = Integer.parseInt(conf.get(ConfigConstants.SOURCE_PARALLELISM));
// this.transformerParallelism = Integer.parseInt(conf.get(ConfigConstants.TRANSFORMER_PARALLELISM));
// this.sinkParallelism = Integer.parseInt(conf.get(ConfigConstants.SINK_PARALLELISM));
// String[] fieldsWhiteListArray = String.valueOf(conf.get(ConfigConstants.FIELDS_WHITE_LIST))
// .trim().split(GeneralConstants.COMMA);
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// this.fieldsWhiteList = new ArrayList<>(Arrays.asList(fieldsWhiteListArray));
// this.avroOutputSchema = new Schema.Parser().parse(AvroSchemaDef.ZORK_LOG_SCHEMA).toString(true);
// this.hdfsUri = String.valueOf(conf.get(ConfigConstants.HDFS_URI)).trim();
// this.hdfsUser = String.valueOf(conf.get(ConfigConstants.HDFS_USER)).trim();
// this.hdfsSrc = hdfsUri + String.valueOf(conf.get(ConfigConstants.HDFS_SRC)).trim();
// this.hdfsDest = hdfsUri + String.valueOf(conf.get(ConfigConstants.HDFS_DEST)).trim();
// this.core = String.valueOf(conf.get(ConfigConstants.CORE)).trim();
// this.startTime = String.valueOf(conf.get(ConfigConstants.START_TIME));
// this.endTime = String.valueOf(conf.get(ConfigConstants.END_TIME));
// this.startTimestamp = DateUtils.time2Timestamp(startTime);
// this.endTimestamp = DateUtils.time2Timestamp(endTime);
// this.hasRegDimension = Boolean.parseBoolean(conf.get(ConfigConstants.REG_DIMENSION));
// this.confMap = conf;
// return this;
// }
//
// public void desensitizationHdfsLog() throws Exception {
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// desensitizationHdfsLog(this.confMap);
// }
//
// public void desensitizationHdfsLog(Map<String, String> conf) throws Exception {
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// // 初始化env
// ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// JobConf jobConf = new JobConf();
// jobConf.set(AVRO_OUTPUT_SCHEMA, this.avroOutputSchema);
// ParameterTool parameterTool = ParameterTool.fromMap(conf);
// env.getConfig().setGlobalJobParameters(parameterTool);
// RegularExpressions regularExpressions = new RegularExpressions(this.regularMap);
// DesensitizationFunctionOld desensitizationFunctionOld = new DesensitizationFunctionOld(regularExpressions);
//
// // source
// List<String> logFiles = filterHdfsLogFiles(hdfsSrc, hdfsUri, hdfsUser);
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// String logFileListString = list2String(logFiles);
// HadoopInputFormat<Object, Object> hadoopInputFormat = new HadoopInputFormat<>
// (new AvroInputFormat(), Object.class, Object.class, jobConf);
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// AvroInputFormat.addInputPaths(hadoopInputFormat.getJobConf(), logFileListString);
// DataSource<Tuple2<Object, Object>> hdfsLogInput = env
// .createInput(hadoopInputFormat).setParallelism(sourceParallelism);
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
//
// // transformer
// FlatMapOperator<Tuple2<Object, Object>, Object> maskFlatMapOperator =
// hdfsLogInput.flatMap(new FlatMapFunction<Tuple2<Object, Object>, Object>() {
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// @Override
// public void flatMap(Tuple2<Object, Object> value, Collector<Object> collector) {
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// LogData logData = JSON.parseObject(value.getField(0).toString(),
// new TypeReference<LogData>() {
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// });
// //根据日志事件的核心信息做过滤
// boolean hasCore = (null != core && logData.getDimensions().get(HOSTNAME).contains(core))
// || "*".equals(core);
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// if (hasCore) {
// //根据日志事件的timestamp做过滤
// Long timestamp = DateUtils.utc2timestamp(logData.getTimestamp());
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
//
// if (null != timestamp && timestamp.compareTo(startTimestamp) >= 0 &&
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// timestamp.compareTo(endTimestamp) <= 0) {
// Map<String, String> normalFields = logData.getNormalFields();
// Map desensitization = desensitizationFunctionOld.
// desensitization(normalFields, fieldsWhiteList, dataFormats);
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// logData.setNormalFields(desensitization);
// if (hasRegDimension) {
// Map<String, String> dimensions = logData.getDimensions();
// Map desensitizationDimensions = desensitizationFunctionOld.
// desensitization(dimensions, fieldsWhiteList, dataFormats);
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// logData.setDimensions(desensitizationDimensions);
// }
// collector.collect(logData);
// }
// }
// }
// }).setParallelism(transformerParallelism);
// // 获取目标hdfs的输出目录
// String filePath = hdfsDest;
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat<>(new AvroOutputFormat(), jobConf);
// FileOutputFormat.setOutputPath(jobConf, new Path(filePath));
//
// // avro序列化算子 .writeAsText("file:///lmt/output");
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// maskFlatMapOperator.map(new MapFunction<Object, Tuple2<AvroWrapper<LogData>, NullWritable>>() {
// @Override
// public Tuple2<AvroWrapper<LogData>, NullWritable> map(Object value) throws Exception {
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// AvroKey<LogData> key = new AvroKey<>((LogData) value);
// Tuple2<AvroWrapper<LogData>, NullWritable> tuple = new Tuple2<>(key, NullWritable.get());
// return tuple;
// }
// }).setParallelism(transformerParallelism).output(hadoopOutputFormat).setParallelism(sinkParallelism);
// try {
// env.execute(jobName);
// } catch (Exception e) {
// log.error(String.valueOf(e));
// }
// }
//
// /**
// * 分组list
// *
// * @param list 演示list
// * @return List<String>
// */
// private List<String> changeList(List<String> list) {
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// List<String> resultList = new ArrayList<>();
// List<List<String>> lists = subList(list, maxFileNum);
// lists.forEach(item -> {
// String tempString = list2String(item);
// resultList.add(tempString);
// });
// return resultList;
// }
//
// /**
// * 将List按照每组n个元素进行分组
// *
// * @param sourceList 原始list
// * @param n n个元素
// * @param <T> 泛型
// * @return List<List < T>>
// */
// private <T> List<List<T>> subList(List<T> sourceList, int n) {
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// List<List<T>> rsList = new ArrayList<>();
// if (n <= 0) {
// rsList.add(sourceList);
// return rsList;
// }
// int listSize = sourceList.size();
// int groupNum = (sourceList.size() / n) + 1;
// for (int i = 0; i < groupNum; i++) {
// List<T> subList = new ArrayList<>();
// for (int j = i * n; j < (i + 1) * n; j++) {
// if (j < listSize) {
// subList.add(sourceList.get(j));
// }
// }
// rsList.add(subList);
// }
// if (rsList.get(rsList.size() - 1).isEmpty()) {
// rsList.remove(rsList.size() - 1);
// }
// return rsList;
// }
//
// /**
// * list<string>转逗号分割的string
// *
// * @param list list<string>
// * @return String
// */
// private String list2String(List<String> list) {
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// return String.join(GeneralConstants.COMMA, list);
// }
//
// /**
// * 通过路径获取
// *
// * @param fileSystem 文件系统
// * @param path 目录路径
// * @return 文件路径下所有文件全路径
// */
// private List<String> getHdfsLogFilesByPath(FileSystem fileSystem, String path) {
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// List<String> logFiles = new ArrayList<>();
// try {
// RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator = fileSystem.listFiles(new Path(path),
// false);
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// while (locatedFileStatusRemoteIterator.hasNext()) {
// LocatedFileStatus next = locatedFileStatusRemoteIterator.next();
// long modificationTime = next.getModificationTime();
// // 根据文件的修改时间做过滤,获取用户指定时间段内的文件
// if (modificationTime > startTimestamp) {
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// Path hdfsFilePath = next.getPath();
// logFiles.add(hdfsFilePath.toString());
// }
// }
// } catch (IOException e) {
// log.error(String.valueOf(e));
// }
// return logFiles;
// }
//
// /**
// * 获取hdfs日志文件的所有文件路径
// *
// * @param hdfsSrc hdfs地址
// * @param hdfsUri hdfs的URI
// * @param hdfsUser hfs用户名
// * @return hdfs日志文件的所有文件路径
// */
// @Deprecated
// private List<String> filterHdfsLogFiles(String hdfsSrc, String hdfsUri, String hdfsUser) {
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// // hdfs://cdh-2:8020/ hdfs
// if (!hdfsSrc.endsWith(GeneralConstants.FILE_SEPARATOR)) {
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// hdfsSrc += GeneralConstants.FILE_SEPARATOR;
// }
// String path = hdfsSrc;
// Configuration conf = new Configuration();
// List<String> logFiles = new ArrayList<>();
// FileSystem fileSystem = null;
// List<String> betweenDate = DateUtils.getBetweenDate(startTime, endTime);
// List<String> dateList = DateUtils.date2date(betweenDate);
// if (dateList.size() > 0) {
// try {
// fileSystem = FileSystem.get(new URI(hdfsUri), conf, hdfsUser);
// for (String item : dateList) {
// path = hdfsSrc + item;
// List<String> hdfsLogFiles = getHdfsLogFilesByPath(fileSystem, path);
// logFiles.addAll(hdfsLogFiles);
// }
// } catch (IOException e) {
// log.error(String.valueOf(e));
// } catch (InterruptedException e) {
// log.error(String.valueOf(e));
// } catch (URISyntaxException e) {
// log.error(String.valueOf(e));
// } finally {
// if (null != fileSystem) {
// try {
// fileSystem.close();
// } catch (IOException e) {
// log.error(String.valueOf(e));
// }
// }
// }
// } else {
// log.warn("{} -- {} 时间段内无数据,请注意时间范围", startTime, endTime);
// }
//
// return logFiles;
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// }
//
// public HdfsLogDesensitizationOld() {
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
//
// }
  • This block of commented-out lines of code should be removed. 📘

Please register or sign in to reply
// }
...@@ -11,7 +11,7 @@ import java.util.*; ...@@ -11,7 +11,7 @@ import java.util.*;
* @date: 2020/10/22 * @date: 2020/10/22
*/ */
@Slf4j @Slf4j
public class DateUtil { public class DateUtils {
  • Add a private constructor to hide the implicit public one. 📘

Please register or sign in to reply
private static final String NULL = ""; private static final String NULL = "";
private static final String BAR_STRING = "-"; private static final String BAR_STRING = "-";
......
package com.zorkdata.desensitization.utils; package com.zorkdata.desensitization.utils;
import com.alibaba.fastjson.JSON;
import com.zorkdata.desensitization.constans.GeneralConstants; import com.zorkdata.desensitization.constans.GeneralConstants;
import com.zorkdata.desensitization.exception.ZorkException; import com.zorkdata.desensitization.exception.ZorkException;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
...@@ -14,20 +13,12 @@ import java.util.*; ...@@ -14,20 +13,12 @@ import java.util.*;
* @date: 2020/8/7 * @date: 2020/8/7
*/ */
@Slf4j @Slf4j
public class PropertiesUtil { public class PropertiesUtils {
  • Add a private constructor to hide the implicit public one. 📘

Please register or sign in to reply
private static final int DEFAULT_PARAMS_MAP_LENGTH = 10; private static final int DEFAULT_PARAMS_MAP_LENGTH = 10;
private static final String REGULAR = "regular"; private static final String REGULAR = "regular";
public static void main(String[] args) {
List<String> propertiesContentList = PropertiesUtil.getPropertiesContentList("/regular");
System.out.println(JSON.toJSONString(propertiesContentList));
Map<String, String> propertiesMap = getPropertiesMap(propertiesContentList);
System.out.println(JSON.toJSONString(propertiesMap));
}
/** /**
* 获取配置文件map * 获取配置文件map
* *
...@@ -39,8 +30,8 @@ public class PropertiesUtil { ...@@ -39,8 +30,8 @@ public class PropertiesUtil {
ParameterTool parameterTool = ParameterTool.fromArgs(args); ParameterTool parameterTool = ParameterTool.fromArgs(args);
configPath = parameterTool.get(REGULAR); configPath = parameterTool.get(REGULAR);
log.info("read config path is {}", configPath); log.info("read config path is {}", configPath);
List<String> propertiesContentList = PropertiesUtil.getPropertiesContentList(configPath); List<String> propertiesContentList = PropertiesUtils.getPropertiesContentList(configPath);
Map<String, String> confMap = PropertiesUtil.getPropertiesMap(propertiesContentList); Map<String, String> confMap = PropertiesUtils.getPropertiesMap(propertiesContentList);
if (confMap.isEmpty()) { if (confMap.isEmpty()) {
log.error("配置文件regular不存在,系统退出"); log.error("配置文件regular不存在,系统退出");
throw new ZorkException("配置文件regular不存在,系统退出"); throw new ZorkException("配置文件regular不存在,系统退出");
...@@ -114,7 +105,7 @@ public class PropertiesUtil { ...@@ -114,7 +105,7 @@ public class PropertiesUtil {
InputStream inputStream = null; InputStream inputStream = null;
StringBuilder stringBuilder = new StringBuilder(); StringBuilder stringBuilder = new StringBuilder();
try { try {
inputStream = PropertiesUtil.class.getResourceAsStream(propertiesFileName); inputStream = PropertiesUtils.class.getResourceAsStream(propertiesFileName);
//存放读的字节,就是读的结果 //存放读的字节,就是读的结果
int result = -1; int result = -1;
while ((result = inputStream.read()) != -1) { while ((result = inputStream.read()) != -1) {
...@@ -144,7 +135,7 @@ public class PropertiesUtil { ...@@ -144,7 +135,7 @@ public class PropertiesUtil {
Properties properties = new Properties(); Properties properties = new Properties();
InputStream inputStream = null; InputStream inputStream = null;
try { try {
inputStream = PropertiesUtil.class.getResourceAsStream(propertiesFileName); inputStream = PropertiesUtils.class.getResourceAsStream(propertiesFileName);
properties.load(inputStream); properties.load(inputStream);
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
......
...@@ -13,9 +13,9 @@ import java.util.Map; ...@@ -13,9 +13,9 @@ import java.util.Map;
* @date: 2020/10/22 * @date: 2020/10/22
*/ */
@Slf4j @Slf4j
public class YmlUtil { public class YmlUtils {
private YmlUtil() { private YmlUtils() {
} }
private static final String YML_SUFFIX = "yml"; private static final String YML_SUFFIX = "yml";
......
# 任务配置
job_name: "国泰交易日志脱敏job"
# 并行度
source.parallelism: "4"
transformer.parallelism: "4"
sink.parallelism: "4"
# 数据来源,支持hdfs和kafka,必传,暂不支持kafka
source: "hdfs"
# 交易日志的“核心”信息,值以c开头、后面是数字序号,匹配维度中的hostname,不匹配hostname则传*
core: "c9"
# 维度信息是否使用正则脱敏
reg.dimension: "true"
# 查询日志起始 # 查询日志起始
start_time: "2020-11-07 21:22:20" start_time: "2020-11-07 21:22:20"
# 查询日志结束 # 查询日志结束
end_time: "2020-11-07 23:40:30" end_time: "2020-11-07 23:40:30"
# 任务配置
job_name: "国泰交易日志脱敏job"
# 并行度HdfsLogDesensitization
source.parallelism: "4"
transformer.parallelism: "4"
sink.parallelism: "1"
# 是否过滤维度中hostname字段,若填*则不过滤,若需要包含如‘c9’的hostname,则填写c9
match.hostname: "*"
# 密码
password: "trdpwd,newpwd,oldfundpwd,newfundpwd,bankpwd,dynamicpwd1,dynamicpwd2,dynamicpwd,fundpwd,newbankpwd"
# 姓名key
name: "custname,otherlinkman,longname,YXYHMC,deputyname"
# 手机号key
mobile: "netaddr,telno,mobileno,faxno,hometelno,worktelno,fax,mobil,call"
# 电话号码key
phone: "netaddr,telno,mobileno,faxno,hometelno,worktelno,fax,mobil,call"
# 邮箱key
email: "email"
# 身份证号码key
id: "idno,deputyidno"
# 银行卡号
bank_card: "lmbankid"
# 家庭住址key
address: "addr,workaddr,otheraddr,othertelno"
# ip地址key
ip: "printip,ipaddr,operipaddr,clientip"
# mac地址key
mac: "netaddr,clientmac"
# 持仓信息key
position: "orderamt,orderqty,orderprice,fundeffect,stkeffect,profitcost,oddqty,sumqty,ordercount,orderqty,orderamt,fundbal,fundavl,marketvalue,fund,stkvalue,stkqty,stkbal,stkavl,mktval,matchamt,fundeffect,fee_yhs,fee_jsxf,fee_sxf,fee_ghf,fee_qsf,fee_jygf,feefront,fee_jsf,fee_zgf,fundbjhgavl,bb_matchclearamt,nHQSBSL,fundeffect,fundbal,fundeffect,bondbal,bondavl,maxdraw,marketvalue,matchqty,funddraw"
# avro数据下载到本机的地址
download_path: "/tmp/"
# hadoop 相关配置 # hadoop 相关配置
# hdfs 地址,必须以斜杠结尾 # hdfs 地址,必须以斜杠结尾
hdfs_uri: "hdfs://cdh-2:8020/" hdfs_uri: "hdfs://cdh-2:8020/"
...@@ -27,10 +48,7 @@ hdfs_user: "hdfs" ...@@ -27,10 +48,7 @@ hdfs_user: "hdfs"
# hdfs日志源文件地址,若source为hdfs,则该地址必传,必须以斜杠结尾 # hdfs日志源文件地址,若source为hdfs,则该地址必传,必须以斜杠结尾
hdfs_src: "/tmp/datawarehouse4/jzjy/kcbp_biz_log/" hdfs_src: "/tmp/datawarehouse4/jzjy/kcbp_biz_log/"
# hdfs日志写入地址,非必传,默认写到hdfs-src目录下的output目录下,必须以斜杠结尾 # hdfs日志写入地址,非必传,默认写到hdfs-src目录下的output目录下,必须以斜杠结尾
hdfs_dest: "/tmp/datawarehouse/jzjy/kcbp_biz_log/output1/" hdfs_dest: "/tmp/datawarehouse/jzjy/kcbp_biz_log/output3/"
# 不做脱敏的字段白名单
fields_white_list: "funcid,count1,count2,count3,count4"
# cdh下载配置 # cdh下载配置
# cdh能执行hdfs命令的机器的ip # cdh能执行hdfs命令的机器的ip
...@@ -41,6 +59,3 @@ cdh_host_user: "root" ...@@ -41,6 +59,3 @@ cdh_host_user: "root"
cdh_host_password: "NuqUtwbJUBRmUwgh" cdh_host_password: "NuqUtwbJUBRmUwgh"
# cdh能执行hdfs下载命令的用户名 # cdh能执行hdfs下载命令的用户名
cdh_hdfs_user: "hdfs" cdh_hdfs_user: "hdfs"
# 与配置文件保持一致
download_path: "/tmp/"
# 任务配置
job_name: "国泰交易日志脱敏job"
# 并行度
source.parallelism: "4"
transformer.parallelism: "4"
sink.parallelism: "4"
# 数据来源,支持hdfs和kafka,必传,暂不支持kafka
source: "hdfs"
# 交易日志的“核心”信息,值以c开头、后面是数字序号,匹配维度中的hostname,不匹配hostname则传*
core: "c9"
# 维度信息是否使用正则脱敏
reg.dimension: "true"
# 查询日志起始
start_time: "2020-11-07 21:22:20"
# 查询日志结束
end_time: "2020-11-07 23:40:30"
# hadoop 相关配置
# hdfs 地址,必须以斜杠结尾
hdfs_uri: "hdfs://cdh-2:8020/"
# hdfs 用户名
hdfs_user: "hdfs"
# hdfs日志源文件地址,若source为hdfs,则该地址必传,必须以斜杠结尾
hdfs_src: "/tmp/datawarehouse4/jzjy/kcbp_biz_log/"
# hdfs日志写入地址,非必传,默认写到hdfs-src目录下的output目录下,必须以斜杠结尾
hdfs_dest: "/tmp/datawarehouse/jzjy/kcbp_biz_log/output1/"
# 不做脱敏的字段白名单
fields_white_list: "funcid,count1,count2,count3,count4"
# cdh下载配置
# cdh能执行hdfs命令的机器的ip
cdh_host_ip: "192.168.70.2"
# cdh能执行hdfs命令的机器的ip的超级用户
cdh_host_user: "root"
# cdh能执行hdfs命令的机器的ip的用户密码
cdh_host_password: "NuqUtwbJUBRmUwgh"
# cdh能执行hdfs下载命令的用户名
cdh_hdfs_user: "hdfs"
# 与配置文件保持一致
download_path: "/tmp/"
  • SonarQube analysis reported 174 issues

    • 1 blocker
    • 🚫 20 critical
    • 130 major
    • 🔽 21 minor
    • 2 info

    Watch the comments in this conversation to review them.

    Top 30 extra issues

    Note: The following issues were found on lines that were not modified in the commit. Because these issues can't be reported as line comments, they are summarized here:

    1. 🚫 Define a constant instead of duplicating this literal " {\n" 11 times. 📘
    2. 🚫 [Define a constant instead of duplicating this literal " "type": \n" 11 times. 📘
    3. 🚫 Define a constant instead of duplicating this literal " "string",\n" 6 times. 📘
    4. 🚫 Define a constant instead of duplicating this literal " "null"\n" 6 times. 📘
    5. 🚫 [Define a constant instead of duplicating this literal " ]\n" 11 times.](https://git.zorkdata.com/liaomingtao/transaction-log-desensitization/blob/7bdc64f5f5802beac0ab4f93b898b7cf98d58177/src/main/java/com/zorkdata/desensitization/avro/AvroSchemaDef.java#L23) 📘
    6. 🚫 Define a constant instead of duplicating this literal " },\n" 9 times. 📘
    7. 🚫 Define a constant instead of duplicating this literal " "null",\n" 5 times. 📘
    8. 🚫 Define a constant instead of duplicating this literal " {\n" 5 times. 📘
    9. 🚫 Define a constant instead of duplicating this literal " "type": "map",\n" 5 times. 📘
    10. 🚫 Define a constant instead of duplicating this literal " "values": "string"\n" 3 times. 📘
    11. 🚫 Define a constant instead of duplicating this literal " }\n" 5 times. 📘
    12. 🚫 Define a constant instead of duplicating this literal "序列化失败" 13 times. 📘
    13. 🚫 Refactor this method to reduce its Cognitive Complexity from 22 to the 15 allowed. 📘
    14. 🚫 Change this "try" to a try-with-resources. (sonar.java.source not set. Assuming 7 or greater.) 📘
    15. 🚫 Refactor this code to not throw exceptions in finally blocks. 📘
    16. 🚫 Refactor this code to not throw exceptions in finally blocks. 📘
    17. 🚫 Change this "try" to a try-with-resources. (sonar.java.source not set. Assuming 7 or greater.) 📘
    18. Rename "jsonObject" which hides the field declared at line 39. 📘
    19. Remove this expression which always evaluates to "true" 📘
    20. Remove this expression which always evaluates to "true" 📘
    21. This block of commented-out lines of code should be removed. 📘
    22. 及时清理不再使用的代码段或配置信息。 📘
    23. Remove this expression which always evaluates to "true" 📘
    24. Iterate over the "entrySet" instead of the "keySet". 📘
    25. Remove this conditional structure or edit its code blocks so that they're not all the same. 📘
    26. Iterate over the "entrySet" instead of the "keySet". 📘
    27. Remove this conditional structure or edit its code blocks so that they're not all the same. 📘
    28. Iterate over the "entrySet" instead of the "keySet". 📘
    29. Remove this conditional structure or edit its code blocks so that they're not all the same. 📘
    30. Remove this unused private "bigDecimal2Double" method. 📘
    • ... 47 more
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment