Commit 7bdc64f5 authored by DeleMing's avatar DeleMing

<dev>

1. 修改工程,后初始化完成
parent 85d8ca72
Pipeline #21097 failed with stages
in 2 minutes and 27 seconds
package com.zorkdata.desensitization;
import com.alibaba.fastjson.JSON;
import java.util.ArrayList;
import java.util.List;
/**
* @author: LiaoMingtao
* @date: 2020/10/29
*/
public class Test {
public static void main(String[] args) {
// String a = "{a},{b}";
// String[] pathStrings = getPathStrings(a);
// for (String b: pathStrings) {
// System.out.println(b);
// }
String a = "a,b,";
String[] pathStrings = getPathStrings(a);
for (String b: pathStrings) {
System.out.println(b);
System.out.printf("--");
}
}
private static String[] getPathStrings(String commaSeparatedPaths) {
int length = commaSeparatedPaths.length();
int curlyOpen = 0;
int pathStart = 0;
boolean globPattern = false;
List<String> pathStrings = new ArrayList();
for(int i = 0; i < length; ++i) {
char ch = commaSeparatedPaths.charAt(i);
switch(ch) {
case ',':
if (!globPattern) {
pathStrings.add(commaSeparatedPaths.substring(pathStart, i));
pathStart = i + 1;
}
break;
case '{':
++curlyOpen;
if (!globPattern) {
globPattern = true;
}
break;
case '}':
--curlyOpen;
if (curlyOpen == 0 && globPattern) {
globPattern = false;
}
}
}
pathStrings.add(commaSeparatedPaths.substring(pathStart, length));
System.out.println(JSON.toJSONString(pathStrings));
return (String[])pathStrings.toArray(new String[0]);
}
}
package com.zorkdata.desensitization;
import com.alibaba.fastjson.JSON;
import com.zorkdata.desensitization.config.RegularExpressions;
import com.zorkdata.desensitization.function.DesensitizationFunction;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.*;
/**
* @author: LiaoMingtao
* @date: 2020/11/7
*/
public class TestDesensitization {
public static void main(String[] args) {
RegularExpressions regularExpressions = new RegularExpressions();
regularExpressions.setIdRegExp("[1-9]\\d{5}(18|19|([23]\\d))\\d{2}((0[1-9])|(10|11|12))(([0-2][1-9])|10|20|30|31)\\d{3}[0-9Xx]|[1-9]\\d{5}\\d{2}((0[1-9])|(10|11|12))(([0-2][1-9])|10|20|30|31)\\d{2}");
regularExpressions.setBankCardRegExp("(([13-79]\\d{3})|(2[1-9]\\d{2})|(20[3-9]\\d)|(8[01-79]\\d{2}))\\s?\\d{4}\\s?\\d{4}\\s?\\d{4}(\\s?\\d{3})?");
regularExpressions.setPhoneRegExp("0\\d{2,3}-[1-9]\\d{6,7}");
regularExpressions.setMobileRegExp("((\\+|00)86)?((134\\d{4})|((13[0-3|5-9]|14[1|5-9]|15[0-9]|16[2|5|6|7]|17[0-8]|18[0-9]|19[0-2|5-9])\\d{8}))");
regularExpressions.setAddressRegExp("([\\u4E00-\\u9FA5A-Za-z0-9_]+(省|市|自治区|自治州|区|县|镇|道|路|街|号|弄|条|室|港)){1,}");
regularExpressions.setNameRegExp("[赵|钱|孙|李|周|吴|郑|王|冯|陈|褚|卫|蒋|沈|韩|杨|朱|秦|尤|许|何|吕|施|张|孔|曹|严|华|金|魏|陶|姜|戚|谢|邹|喻|柏|水|窦|章|云|苏|潘|葛|奚|范|彭|郎|鲁|韦|昌|马|苗|凤|花|方|俞|任|袁|柳|酆|鲍|史|唐|费|廉|岑|薛|雷|贺|倪|汤|滕|殷|罗|毕|郝|邬|安|常|乐|于|时|傅|皮|卞|齐|康|伍|余|元|卜|顾|孟|平|黄|和|穆|萧|尹|姚|邵|湛|汪|祁|毛|禹|狄|米|贝|明|臧|计|伏|成|戴|谈|宋|茅|庞|熊|纪|舒|屈|项|祝|董|梁|杜|阮|蓝|闵|席|季|麻|强|贾|路|娄|危|江|童|颜|郭|梅|盛|林|刁|锺|徐|邱|骆|高|夏|蔡|田|樊|胡|凌|霍|虞|万|支|柯|昝|管|卢|莫|经|房|裘|缪|干|解|应|宗|丁|宣|贲|邓|郁|单|杭|洪|包|诸|左|石|崔|吉|钮|龚|程|嵇|邢|滑|裴|陆|荣|翁|荀|羊|於|惠|甄|麴|家|封|芮|羿|储|靳|汲|邴|糜|松|井|段|富|巫|乌|焦|巴|弓|牧|隗|山|谷|车|侯|宓|蓬|全|郗|班|仰|秋|仲|伊|宫|宁|仇|栾|暴|甘|钭|历|戎|祖|武|符|刘|景|詹|束|龙|叶|幸|司|韶|郜|黎|溥|印|宿|白|怀|蒲|邰|从|鄂|索|咸|籍|卓|蔺|屠|蒙|池|乔|阳|郁|胥|能|苍|双|闻|莘|党|翟|谭|贡|劳|逄|姬|申|扶|堵|冉|宰|郦|雍|却|桑|桂|濮|牛|寿|通|边|扈|燕|冀|浦|尚|农|温|别|庄|晏|柴|瞿|充|慕|连|茹|习|宦|艾|鱼|容|向|古|易|慎|戈|廖|庾|终|暨|居|衡|步|都|耿|满|弘|匡|国|文|寇|广|禄|阙|东|欧|沃|利|蔚|越|夔|隆|师|巩|厍|聂|晁|勾|敖|融|冷|訾|辛|阚|那|简|饶|空|曾|毋|沙|乜|养|鞠|须|丰|巢|关|蒯|相|荆|红|游|竺|权|司马|上官|欧阳|夏侯|诸葛|闻人|东方|赫连|皇甫|尉迟|公羊|澹台|公冶宗政|濮阳|淳于|单于|太叔|申屠|公孙|仲孙|轩辕|令狐|钟离|宇文|长孙|慕容|司徒|司空|召|有|舜|岳|黄辰|寸|贰|皇|侨|彤|竭|端|赫|实|甫|集|象|翠|狂|辟|典|良|函|芒|苦|其|京|中|夕|乌孙|完颜|富察|费莫|蹇|称|诺|来|多|繁|戊|朴|回|毓|鉏|税|荤|靖|绪|愈|硕|牢|买|但|巧|枚|撒|泰|秘|亥|绍|以|壬|森|斋|释|奕|姒|朋|求|羽|用|占|真|穰|翦|闾|漆|贵|代|贯|旁|崇|栋|告|休|褒|谏|锐|皋|闳|在|歧|禾|示|是|委|钊|频|嬴|呼|大|威|昂|律|冒|保|系|抄|定|化|莱|校|么|抗|祢|綦|悟|宏|功|庚|务|敏|捷|拱|兆|丑|丙|畅|苟|随|类|卯|俟|友|答|乙|允|甲|留|尾|佼|玄|乘|裔|延|植|环|矫|赛|昔|侍|度|旷|遇|偶|前|由|咎|塞|敛|受|泷|袭|衅|叔|圣|御|夫|仆|镇|藩|邸|府|掌|首|员|焉|戏|可|智|尔|凭|悉|进|笃|厚|仁|业|肇|资|合|仍|九|衷|哀|刑|俎|仵|圭|夷|徭|蛮|汗|孛|乾|帖|罕|洛|淦|洋|邶|郸|郯|邗|邛|剑|虢|隋|蒿|茆|菅|苌|树|桐|锁|钟|机|盘|铎|斛|玉|线|针|箕|庹|绳|磨|蒉|瓮|弭|刀|疏|牵|浑|恽|势|世|仝|同|蚁|止|戢|睢|冼|种|涂|肖|己|泣|潜|卷|脱|谬|蹉|赧|浮|顿|说|次|错|念|夙|斯|完|丹|表|聊|源|姓|吾|寻|展|出|不|户|闭|才|无|书|学|愚|本|性|雪|霜|烟|寒|少|字|桥|板|斐|独|千|诗|嘉|扬|善|揭|祈|析|赤|紫|青|柔|刚|奇|拜|佛|陀|弥|阿|素|长|僧|隐|仙|隽|宇|祭|酒|淡|塔|琦|闪|始|星|南|天|接|波|碧|速|禚|腾|潮|镜|似|澄|潭|謇|纵|渠|奈|风|春|濯|沐|茂|英|兰|檀|藤|枝|检|生|折|登|驹|骑|貊|虎|肥|鹿|雀|野|禽|飞|节|宜|鲜|粟|栗|豆|帛|官|布|衣|藏|宝|钞|银|门|盈|庆|喜|及|普|建|营|巨|望|希|道|载|声|漫|犁|力|贸|勤|革|改|兴|亓|睦|修|信|闽|北|守|坚|勇|汉|练|尉|士|旅|五|令|将|旗|军|行|奉|敬|恭|仪|母|堂|丘|义|礼|慈|孝|理|伦|卿|问|永|辉|位|让|尧|依|犹|介|承|市|所|苑|杞|剧|第|零|谌|招|续|达|忻|六|鄞|战|迟|候|宛|励|粘|萨|邝|覃|辜|初|楼|城|区|局|台|原|考|妫|纳|泉|老|清|德|卑|过|麦|曲|竹|百|福|言|第五|佟|爱|年|笪|谯|哈|墨|连|南宫|赏|伯|佴|佘|牟|商|西门|东门|左丘|梁丘|琴|后|况|亢|缑|帅|微生|羊舌|海|归|呼延|南门|东郭|百里|钦|鄢|汝|法|闫|楚|晋|谷梁|宰父|夹谷|拓跋|壤驷|乐正|漆雕|公西|巫马|端木|颛孙|子车|督|仉|司寇|亓官|三小|鲜于|锺离|盖|逯|库|郏|逢|阴|薄|厉|稽|闾丘|公良|段干|开|光|操|瑞|眭|泥|运|摩|伟|铁|迮][\\u4e00-\\u9fa5]");
regularExpressions.setMacRegExp("[A-F0-9]{2}([-:]?[A-F0-9]{2})([-:.]?[A-F0-9]{2})([-:]?[A-F0-9]{2})([-:.]?[A-F0-9]{2})([-:]?[A-F0-9]{2})");
regularExpressions.setEmailRegExp("([a-zA-Z0-9_-])+@([a-zA-Z0-9_-])+((\\.[a-zA-Z0-9_-]{1,4}){1,4})");
regularExpressions.setIpRegExp("((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)");
regularExpressions.setPositionExp("仓(\\d+(\\.)?\\d+)(万|千|手|股)");
DesensitizationFunction desensitizationFunction = new DesensitizationFunction(regularExpressions);
String[] fieldsWhiteListArray = "funcid,count1,count2,count3,count".split(",");
List< String> whiteList = new ArrayList<>(fieldsWhiteListArray.length);
Collections.addAll(whiteList, fieldsWhiteListArray);
List<String> dataFormats = new ArrayList<String>(){{
add(",");
add(".");
add("@");
add("-");
add(":");
}};
Map<String, String> map = new HashMap<>(4);
map.put("name", "廖鸣韬");
map.put("name2", "王海鹰");
map.put("mobile", "18570332683");
map.put("phone", "0730-7512340");
map.put("email", "liaomingtao@zork.com.cn");
map.put("id", "430621194711110423");
map.put("bankId", "6222600260001072123");
map.put("address", "上海市浦东新区张江路");
map.put("ip", "192.168.70.1");
map.put("mac", "00:0C:29:01:98:27");
map.put("funcid", "1111");
map.put("count1", "普通字段");
map.put("count2", "普通字段4");
map.put("count3", "普通字段5");
map.put("count4", "普通字段6");
map.put("count4", "持空仓1000万");
map.put("message", "廖鸣韬|18570332683|0730-7638844|liaomingtao@zork.com.cn|430621194711110423|6222600260001072123|上海市浦东新区张江路|192.168.70.1|00:0C:29:01:98:27|1111");
map.put("message", "13811110000|110101199003075517|上海市浦东新区张江微电子港|zorkdata@163.com|123456789|wanghaiying123-王海鹰-192.168.1.1-00-50-56-C0-00-08-6227002470170278192-持仓1000万");
Map<String, String> stringStringMap = desensitizationFunction.desensitization(map, whiteList, dataFormats);
System.out.println(JSON.toJSONString(stringStringMap));
}
/**
* 深拷贝
*
* @param obj
* @return
*/
public static HashMap<String, Object> clone(Map<String, Object> obj) {
HashMap<String, Object> clonedObj = null;
if (obj.isEmpty()) {
clonedObj = new HashMap<>(50);
} else {
try {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(baos);
oos.writeObject(obj);
oos.close();
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
ObjectInputStream ois = new ObjectInputStream(bais);
clonedObj = (HashMap<String, Object>) ois.readObject();
ois.close();
} catch (Exception e) {
e.printStackTrace();
}
}
return clonedObj;
}
}
package com.zorkdata.desensitization;
import com.zorkdata.desensitization.constans.ConfigConstants;
import com.zorkdata.desensitization.config.JobConfig;
import com.zorkdata.desensitization.config.JobInitConfig;
import com.zorkdata.desensitization.exception.ZorkException;
import com.zorkdata.desensitization.hadoop.HdfsLogDesensitization;
import com.zorkdata.desensitization.utils.PropertiesUtil;
import com.zorkdata.desensitization.utils.YmlUtil;
import com.zorkdata.desensitization.utils.YmlUtils;
import lombok.extern.slf4j.Slf4j;
import java.util.Map;
......@@ -16,30 +16,23 @@ import java.util.Map;
@Slf4j
public class TransactionLogDesensitization {
private static final int PARAM_LENGTH = 4;
private static final int PARAM_LENGTH = 2;
public static void main(String[] args) throws Exception{
public static void main(String[] args) throws Exception {
long start = System.currentTimeMillis();
// 参数校验
if (args.length != PARAM_LENGTH) {
String error = "参数缺失,请输入配置文件,例如: " +
"--conf E:\\Codes\\fork\\transaction_log_desensitization\\src\\main\\resources\\application.yml " +
"--regular E:\\Codes\\fork\\transaction_log_desensitization\\src\\main\\resources\\regular ";
"--conf E:\\Codes\\fork\\transaction_log_desensitization\\src\\main\\resources\\application.yml";
log.error(error);
throw new ZorkException(error);
}
try {
Map<String, String> conf = YmlUtil.getParams(args);
Map<String, String> regularMap = PropertiesUtil.getPropertiesMap(args);
String source = conf.get(ConfigConstants.SOURCE);
if (ConfigConstants.HDFS.equals(source)) {
new HdfsLogDesensitization().initConf(conf).initRegular(regularMap).desensitizationHdfsLog();
}
if (ConfigConstants.KAFKA.equals(source)) {
// TODO kafka
}
} catch (ZorkException e) {
log.info(String.valueOf(e));
}
Map<String, String> conf = YmlUtils.getParams(args);
JobInitConfig jobInitConfig = new JobInitConfig(conf);
JobConfig jobConfig = new JobConfig(jobInitConfig);
new HdfsLogDesensitization().initJobConfig(jobConfig).desensitizationHdfsLog();
Thread.sleep(1000);
long stop = System.currentTimeMillis();
System.out.println("耗时统计:" + (stop - start) + "ms");
log.info("耗时统计: {} ms", stop - start);
}
}
package com.zorkdata.desensitization.config;
import com.zorkdata.desensitization.constans.GeneralConstants;
import lombok.Data;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
/**
* @author: LiaoMingtao
* @date: 2021/2/23
*/
@Data
public class JobConfig implements Serializable {
private static final long serialVersionUID = 693924914570906529L;
public JobConfig(JobInitConfig jobInitConfig) {
this.setJobName(jobInitConfig.getJobName());
this.setSourceParallelism(jobInitConfig.getSourceParallelism());
this.setTransformerParallelism(jobInitConfig.getTransformerParallelism());
this.setSinkParallelism(jobInitConfig.getSinkParallelism());
this.setAvroOutputSchema(jobInitConfig.getAvroOutputSchema());
this.setHdfsUri(jobInitConfig.getHdfsUri());
this.setHdfsUser(jobInitConfig.getHdfsUser());
this.setHdfsSrc(jobInitConfig.getHdfsUri() + jobInitConfig.getHdfsSrc());
this.setHdfsDest(jobInitConfig.getHdfsUri() + jobInitConfig.getHdfsDest());
this.setMatchHostname(jobInitConfig.getMatchHostname());
this.setStartTime(jobInitConfig.getStartTime());
this.setEndTime(jobInitConfig.getEndTime());
this.setStartTimestamp(jobInitConfig.getStartTimestamp());
this.setEndTimestamp(jobInitConfig.getEndTimestamp());
String password = jobInitConfig.getPassword() + GeneralConstants.COMMA;
String name = jobInitConfig.getName() + GeneralConstants.COMMA;
String mobile = jobInitConfig.getMobile() + GeneralConstants.COMMA;
String phone = jobInitConfig.getPhone() + GeneralConstants.COMMA;
String email = jobInitConfig.getEmail() + GeneralConstants.COMMA;
String id = jobInitConfig.getId() + GeneralConstants.COMMA;
String address = jobInitConfig.getAddress() + GeneralConstants.COMMA;
String bankCard = jobInitConfig.getBankCard() + GeneralConstants.COMMA;
String ip = jobInitConfig.getIp() + GeneralConstants.COMMA;
String mac = jobInitConfig.getMac() + GeneralConstants.COMMA;
String position = jobInitConfig.getPosition() + GeneralConstants.COMMA;
String allKey = password + name + mobile + phone + email + id + address + bankCard + ip + mac + position;
String[] keys = allKey.split(GeneralConstants.COMMA);
List<String> keyList = new ArrayList<>();
for (String key : keys) {
if (!GeneralConstants.EMPTY_STR.equals(key)) {
keyList.add(key);
}
}
keyList = keyList.stream().distinct().collect(Collectors.toList());
this.setDesensitizationKeyList(keyList);
}
private String jobName;
  • Remove this unused "jobName" private field. 📘

Please register or sign in to reply
private int sourceParallelism;
  • Remove this unused "sourceParallelism" private field. 📘

Please register or sign in to reply
private int transformerParallelism;
  • Remove this unused "transformerParallelism" private field. 📘

Please register or sign in to reply
private int sinkParallelism;
  • Remove this unused "sinkParallelism" private field. 📘

Please register or sign in to reply
private String avroOutputSchema;
  • Remove this unused "avroOutputSchema" private field. 📘

Please register or sign in to reply
private String hdfsUri;
  • Remove this unused "hdfsUri" private field. 📘

Please register or sign in to reply
private String hdfsUser;
  • Remove this unused "hdfsUser" private field. 📘

Please register or sign in to reply
private String hdfsSrc;
  • Remove this unused "hdfsSrc" private field. 📘

Please register or sign in to reply
private String hdfsDest;
  • Remove this unused "hdfsDest" private field. 📘

Please register or sign in to reply
private String matchHostname;
  • Remove this unused "matchHostname" private field. 📘

Please register or sign in to reply
private String startTime;
  • Remove this unused "startTime" private field. 📘

Please register or sign in to reply
private String endTime;
  • Remove this unused "endTime" private field. 📘

Please register or sign in to reply
private long startTimestamp;
  • Remove this unused "startTimestamp" private field. 📘

Please register or sign in to reply
private long endTimestamp;
  • Remove this unused "endTimestamp" private field. 📘

Please register or sign in to reply
private List<String> desensitizationKeyList;
  • Remove this unused "desensitizationKeyList" private field. 📘

Please register or sign in to reply
}
package com.zorkdata.desensitization.config;
import com.zorkdata.desensitization.avro.AvroSchemaDef;
import com.zorkdata.desensitization.constans.ConfigConstants;
import com.zorkdata.desensitization.utils.DateUtils;
import lombok.Data;
import org.apache.avro.Schema;
import org.apache.commons.collections.MapUtils;
import java.io.Serializable;
import java.util.Map;
/**
* @author: LiaoMingtao
* @date: 2021/2/23
*/
@Data
public class JobInitConfig implements Serializable {
private static final long serialVersionUID = -1959581564693543666L;
public JobInitConfig(Map<String, String> conf) {
this.jobName = String.valueOf(conf.get(ConfigConstants.JOB_NAME));
this.sourceParallelism = Integer.parseInt(conf.get(ConfigConstants.SOURCE_PARALLELISM));
this.transformerParallelism = Integer.parseInt(conf.get(ConfigConstants.TRANSFORMER_PARALLELISM));
this.sinkParallelism = Integer.parseInt(conf.get(ConfigConstants.SINK_PARALLELISM));
this.avroOutputSchema = new Schema.Parser().parse(AvroSchemaDef.ZORK_LOG_SCHEMA).toString(true);
this.hdfsUri = String.valueOf(conf.get(ConfigConstants.HDFS_URI)).trim();
this.hdfsUser = String.valueOf(conf.get(ConfigConstants.HDFS_USER)).trim();
this.hdfsSrc = String.valueOf(conf.get(ConfigConstants.HDFS_SRC)).trim();
this.hdfsDest = String.valueOf(conf.get(ConfigConstants.HDFS_DEST)).trim();
this.matchHostname = String.valueOf(conf.get(ConfigConstants.MATCH_HOSTNAME)).trim();
this.startTime = String.valueOf(conf.get(ConfigConstants.START_TIME));
this.endTime = String.valueOf(conf.get(ConfigConstants.END_TIME));
this.startTimestamp = DateUtils.time2Timestamp(startTime);
this.endTimestamp = DateUtils.time2Timestamp(endTime);
this.name = MapUtils.getString(conf, ConfigConstants.NAME_KEY);
this.mobile = MapUtils.getString(conf, ConfigConstants.MOBILE_KEY);
this.phone = MapUtils.getString(conf, ConfigConstants.PHONE_KEY);
this.email = MapUtils.getString(conf, ConfigConstants.EMAIL_KEY);
this.id = MapUtils.getString(conf, ConfigConstants.ID_KEY);
this.bankCard = MapUtils.getString(conf, ConfigConstants.BANK_CARD_KEY);
this.address = MapUtils.getString(conf, ConfigConstants.ADDRESS_KEY);
this.ip = MapUtils.getString(conf, ConfigConstants.IP_KEY);
this.mac = MapUtils.getString(conf, ConfigConstants.MAC_KEY);
this.position = MapUtils.getString(conf, ConfigConstants.POSITION_KEY);
this.password = MapUtils.getString(conf, ConfigConstants.POSITION_KEY);
}
private String jobName;
  • Remove this unused "jobName" private field. 📘

Please register or sign in to reply
private int sourceParallelism;
  • Remove this unused "sourceParallelism" private field. 📘

Please register or sign in to reply
private int transformerParallelism;
  • Remove this unused "transformerParallelism" private field. 📘

Please register or sign in to reply
private int sinkParallelism;
  • Remove this unused "sinkParallelism" private field. 📘

Please register or sign in to reply
private String avroOutputSchema;
  • Remove this unused "avroOutputSchema" private field. 📘

Please register or sign in to reply
private String hdfsUri;
  • Remove this unused "hdfsUri" private field. 📘

Please register or sign in to reply
private String hdfsUser;
  • Remove this unused "hdfsUser" private field. 📘

Please register or sign in to reply
private String hdfsSrc;
  • Remove this unused "hdfsSrc" private field. 📘

Please register or sign in to reply
private String hdfsDest;
  • Remove this unused "hdfsDest" private field. 📘

Please register or sign in to reply
private String matchHostname;
  • Remove this unused "matchHostname" private field. 📘

Please register or sign in to reply
private String startTime;
private String endTime;
private long startTimestamp;
  • Remove this unused "startTimestamp" private field. 📘

Please register or sign in to reply
private long endTimestamp;
  • Remove this unused "endTimestamp" private field. 📘

Please register or sign in to reply
private String name;
  • Remove this unused "name" private field. 📘

Please register or sign in to reply
private String mobile;
  • Remove this unused "mobile" private field. 📘

Please register or sign in to reply
private String phone;
  • Remove this unused "phone" private field. 📘

Please register or sign in to reply
private String email;
  • Remove this unused "email" private field. 📘

Please register or sign in to reply
/**
* 身份证号
*/
private String id;
  • Remove this unused "id" private field. 📘

Please register or sign in to reply
private String bankCard;
  • Remove this unused "bankCard" private field. 📘

Please register or sign in to reply
private String address;
  • Remove this unused "address" private field. 📘

Please register or sign in to reply
private String ip;
  • Remove this unused "ip" private field. 📘

Please register or sign in to reply
private String mac;
  • Remove this unused "mac" private field. 📘

Please register or sign in to reply
/**
* 持仓信息
*/
private String position;
  • Remove this unused "position" private field. 📘

Please register or sign in to reply
private String password;
  • Remove this unused "password" private field. 📘

Please register or sign in to reply
}
......@@ -21,10 +21,23 @@ public final class ConfigConstants {
public static final String HDFS_USER = "hdfs_user";
public static final String HDFS_SRC = "hdfs_src";
public static final String HDFS_DEST = "hdfs_dest";
public static final String MATCH_HOSTNAME = "match.hostname";
public static final String CORE = "core";
public static final String START_TIME = "start_time";
public static final String END_TIME = "end_time";
public static final String NAME_KEY = "name";
public static final String MOBILE_KEY = "mobile";
public static final String PHONE_KEY = "phone";
public static final String EMAIL_KEY = "email";
public static final String ID_KEY = "id";
public static final String BANK_CARD_KEY = "bank_card";
public static final String ADDRESS_KEY = "address";
public static final String IP_KEY = "ip";
public static final String MAC_KEY = "mac";
public static final String POSITION_KEY = "position";
public static final String PASSWORD_KEY = "password";
  • Remove this hard-coded password. 📘

Please register or sign in to reply
public static final String SERVERS = "servers";
public static final String ZOOKEEPER = "zookeeper";
public static final String TOPIC = "topic";
......
package com.zorkdata.desensitization.function;
import com.alibaba.fastjson.JSON;
import com.zorkdata.desensitization.config.RegularExpressions;
import com.alibaba.fastjson.TypeReference;
import com.zorkdata.desensitization.config.JobConfig;
import com.zorkdata.desensitization.schmea.LogData;
import org.apache.flink.api.common.functions.RichFlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;
import java.io.Serializable;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author: LiaoMingtao
* @date: 2020/10/26
* @date: 2021/2/24
*/
public class DesensitizationFunction implements Serializable {
public class DesensitizationFunction<T, R> extends RichFlatMapFunction<Tuple2<Object, Object>, LogData> {
  • T is not used in the class. 📘 R is not used in the class. 📘

Please register or sign in to reply
private static final long serialVersionUID = 1L;
private JobConfig jobConfig;
  • Remove this unused "jobConfig" private field. 📘

Please register or sign in to reply
private RegularExpressions regularExpressions;
private List<Pattern> patterns = new ArrayList<>();
public DesensitizationFunction(RegularExpressions regularExpressions) {
this.regularExpressions = regularExpressions;
patterns.add(Pattern.compile(regularExpressions.getIdRegExp()));
patterns.add(Pattern.compile(regularExpressions.getBankCardRegExp()));
patterns.add(Pattern.compile(regularExpressions.getPhoneRegExp()));
patterns.add(Pattern.compile(regularExpressions.getMobileRegExp()));
patterns.add(Pattern.compile(regularExpressions.getAddressRegExp()));
patterns.add(Pattern.compile(regularExpressions.getPositionExp()));
patterns.add(Pattern.compile(regularExpressions.getNameRegExp()));
patterns.add(Pattern.compile(regularExpressions.getMacRegExp()));
patterns.add(Pattern.compile(regularExpressions.getEmailRegExp()));
patterns.add(Pattern.compile(regularExpressions.getIpRegExp()));
public DesensitizationFunction(JobConfig jobConfig) {
this.jobConfig = jobConfig;
}
public Map<String, String> desensitization(Map<String, String> map,
List<String> whiteList, List<String> dataFormats) {
Iterator<Map.Entry<String, String>> entryIterator = map.entrySet().iterator();
while (entryIterator.hasNext()) {
Map.Entry<String, String> next = entryIterator.next();
String key = next.getKey();
String value = next.getValue();
if (!whiteList.contains(key)) {
// 执行脱敏操作
for (Pattern pattern : patterns) {
Matcher matcher = pattern.matcher(value);
if (matcher.find()) {
String replaceStr = "";
for (int i = 0; i < matcher.group().length(); i++) {
String s = String.valueOf(matcher.group().charAt(i));
if(dataFormats.contains(s)){
replaceStr = replaceStr.concat(s);
}else{
replaceStr = replaceStr.concat("*");
}
}
value = value.replace(matcher.group(), replaceStr);
map.put(key, value);
}
}
}
}
return map;
}
public Map desensitizationTemp(Map map, List<String> whiteList, List<String> dataFormats) {
map.forEach((k, v) -> {
if (!whiteList.contains(k)) {
String value = v.toString();
for (Pattern pattern : patterns) {
Matcher matcher = pattern.matcher(value);
if (matcher.find()) {
String replaceStr = "";
for (int i = 0; i < matcher.group().length(); i++) {
String s = String.valueOf(matcher.group().charAt(i));
if(dataFormats.contains(s)){
replaceStr = replaceStr.concat(s);
}else{
replaceStr = replaceStr.concat("*");
}
}
value = value.replace(matcher.group(), replaceStr);
System.out.println("\n");
}
}
map.put(k, value);
} else {
map.put(k, v);
}
@Override
public void flatMap(Tuple2<Object, Object> value, Collector<LogData> collector) throws Exception {
LogData logData = JSON.parseObject(value.getField(0).toString(), new TypeReference<LogData>() {
});
return map;
collector.collect(logData);
}
}
package com.zorkdata.desensitization.function;
import com.zorkdata.desensitization.config.RegularExpressions;
import java.io.Serializable;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author: LiaoMingtao
* @date: 2020/10/26
*/
public class DesensitizationFunctionOld implements Serializable {
private static final long serialVersionUID = 1L;
private RegularExpressions regularExpressions;
  • Remove this unused "regularExpressions" private field. 📘

Please register or sign in to reply
private List<Pattern> patterns = new ArrayList<>();
@Deprecated
public DesensitizationFunctionOld(RegularExpressions regularExpressions) {
  • Add the missing @deprecated Javadoc tag. 📘 Do not forget to remove this deprecated code someday. 📘

Please register or sign in to reply
this.regularExpressions = regularExpressions;
patterns.add(Pattern.compile(regularExpressions.getIdRegExp()));
patterns.add(Pattern.compile(regularExpressions.getBankCardRegExp()));
patterns.add(Pattern.compile(regularExpressions.getPhoneRegExp()));
patterns.add(Pattern.compile(regularExpressions.getMobileRegExp()));
patterns.add(Pattern.compile(regularExpressions.getAddressRegExp()));
patterns.add(Pattern.compile(regularExpressions.getPositionExp()));
patterns.add(Pattern.compile(regularExpressions.getNameRegExp()));
patterns.add(Pattern.compile(regularExpressions.getMacRegExp()));
patterns.add(Pattern.compile(regularExpressions.getEmailRegExp()));
patterns.add(Pattern.compile(regularExpressions.getIpRegExp()));
}
@Deprecated
public Map<String, String> desensitization(Map<String, String> map,
  • 🚫 Refactor this method to reduce its Cognitive Complexity from 22 to the 15 allowed. 📘 Add the missing @deprecated Javadoc tag. 📘 Do not forget to remove this deprecated code someday. 📘

Please register or sign in to reply
List<String> whiteList, List<String> dataFormats) {
Iterator<Map.Entry<String, String>> entryIterator = map.entrySet().iterator();
while (entryIterator.hasNext()) {
Map.Entry<String, String> next = entryIterator.next();
String key = next.getKey();
String value = next.getValue();
if (!whiteList.contains(key)) {
// 执行脱敏操作
for (Pattern pattern : patterns) {
Matcher matcher = pattern.matcher(value);
if (matcher.find()) {
String replaceStr = "";
for (int i = 0; i < matcher.group().length(); i++) {
String s = String.valueOf(matcher.group().charAt(i));
if(dataFormats.contains(s)){
replaceStr = replaceStr.concat(s);
}else{
replaceStr = replaceStr.concat("*");
}
}
value = value.replace(matcher.group(), replaceStr);
map.put(key, value);
}
}
}
}
return map;
}
public Map desensitizationTemp(Map map, List<String> whiteList, List<String> dataFormats) {
  • 🚫 Refactor this method to reduce its Cognitive Complexity from 22 to the 15 allowed. 📘

Please register or sign in to reply
map.forEach((k, v) -> {
if (!whiteList.contains(k)) {
  • A "List" cannot contain a "K" 📘

Please register or sign in to reply
String value = v.toString();
for (Pattern pattern : patterns) {
Matcher matcher = pattern.matcher(value);
if (matcher.find()) {
String replaceStr = "";
for (int i = 0; i < matcher.group().length(); i++) {
String s = String.valueOf(matcher.group().charAt(i));
if(dataFormats.contains(s)){
replaceStr = replaceStr.concat(s);
}else{
replaceStr = replaceStr.concat("*");
}
}
value = value.replace(matcher.group(), replaceStr);
System.out.println("\n");
  • Replace this use of System.out or System.err by a logger. 📘

Please register or sign in to reply
}
}
map.put(k, value);
} else {
map.put(k, v);
}
});
return map;
}
}
......@@ -11,7 +11,7 @@ import java.util.*;
* @date: 2020/10/22
*/
@Slf4j
public class DateUtil {
public class DateUtils {
  • Add a private constructor to hide the implicit public one. 📘

Please register or sign in to reply
private static final String NULL = "";
private static final String BAR_STRING = "-";
......
package com.zorkdata.desensitization.utils;
import com.alibaba.fastjson.JSON;
import com.zorkdata.desensitization.constans.GeneralConstants;
import com.zorkdata.desensitization.exception.ZorkException;
import lombok.extern.slf4j.Slf4j;
......@@ -14,20 +13,12 @@ import java.util.*;
* @date: 2020/8/7
*/
@Slf4j
public class PropertiesUtil {
public class PropertiesUtils {
  • Add a private constructor to hide the implicit public one. 📘

Please register or sign in to reply
private static final int DEFAULT_PARAMS_MAP_LENGTH = 10;
private static final String REGULAR = "regular";
public static void main(String[] args) {
List<String> propertiesContentList = PropertiesUtil.getPropertiesContentList("/regular");
System.out.println(JSON.toJSONString(propertiesContentList));
Map<String, String> propertiesMap = getPropertiesMap(propertiesContentList);
System.out.println(JSON.toJSONString(propertiesMap));
}
/**
* 获取配置文件map
*
......@@ -39,8 +30,8 @@ public class PropertiesUtil {
ParameterTool parameterTool = ParameterTool.fromArgs(args);
configPath = parameterTool.get(REGULAR);
log.info("read config path is {}", configPath);
List<String> propertiesContentList = PropertiesUtil.getPropertiesContentList(configPath);
Map<String, String> confMap = PropertiesUtil.getPropertiesMap(propertiesContentList);
List<String> propertiesContentList = PropertiesUtils.getPropertiesContentList(configPath);
Map<String, String> confMap = PropertiesUtils.getPropertiesMap(propertiesContentList);
if (confMap.isEmpty()) {
log.error("配置文件regular不存在,系统退出");
throw new ZorkException("配置文件regular不存在,系统退出");
......@@ -114,7 +105,7 @@ public class PropertiesUtil {
InputStream inputStream = null;
StringBuilder stringBuilder = new StringBuilder();
try {
inputStream = PropertiesUtil.class.getResourceAsStream(propertiesFileName);
inputStream = PropertiesUtils.class.getResourceAsStream(propertiesFileName);
//存放读的字节,就是读的结果
int result = -1;
while ((result = inputStream.read()) != -1) {
......@@ -144,7 +135,7 @@ public class PropertiesUtil {
Properties properties = new Properties();
InputStream inputStream = null;
try {
inputStream = PropertiesUtil.class.getResourceAsStream(propertiesFileName);
inputStream = PropertiesUtils.class.getResourceAsStream(propertiesFileName);
properties.load(inputStream);
} catch (IOException e) {
e.printStackTrace();
......
......@@ -13,9 +13,9 @@ import java.util.Map;
* @date: 2020/10/22
*/
@Slf4j
public class YmlUtil {
public class YmlUtils {
private YmlUtil() {
private YmlUtils() {
}
private static final String YML_SUFFIX = "yml";
......
# 任务配置
job_name: "国泰交易日志脱敏job"
# 并行度
source.parallelism: "4"
transformer.parallelism: "4"
sink.parallelism: "4"
# 数据来源,支持hdfs和kafka,必传,暂不支持kafka
source: "hdfs"
# 交易日志的“核心”信息,值以c开头、后面是数字序号,匹配维度中的hostname,不匹配hostname则传*
core: "c9"
# 维度信息是否使用正则脱敏
reg.dimension: "true"
# 查询日志起始
start_time: "2020-11-07 21:22:20"
# 查询日志结束
end_time: "2020-11-07 23:40:30"
# 任务配置
job_name: "国泰交易日志脱敏job"
# 并行度HdfsLogDesensitization
source.parallelism: "4"
transformer.parallelism: "4"
sink.parallelism: "1"
# 是否过滤维度中hostname字段,若填*则不过滤,若需要包含如‘c9’的hostname,则填写c9
match.hostname: "*"
# 密码
password: "trdpwd,newpwd,oldfundpwd,newfundpwd,bankpwd,dynamicpwd1,dynamicpwd2,dynamicpwd,fundpwd,newbankpwd"
# 姓名key
name: "custname,otherlinkman,longname,YXYHMC,deputyname"
# 手机号key
mobile: "netaddr,telno,mobileno,faxno,hometelno,worktelno,fax,mobil,call"
# 电话号码key
phone: "netaddr,telno,mobileno,faxno,hometelno,worktelno,fax,mobil,call"
# 邮箱key
email: "email"
# 身份证号码key
id: "idno,deputyidno"
# 银行卡号
bank_card: "lmbankid"
# 家庭住址key
address: "addr,workaddr,otheraddr,othertelno"
# ip地址key
ip: "printip,ipaddr,operipaddr,clientip"
# mac地址key
mac: "netaddr,clientmac"
# 持仓信息key
position: "orderamt,orderqty,orderprice,fundeffect,stkeffect,profitcost,oddqty,sumqty,ordercount,orderqty,orderamt,fundbal,fundavl,marketvalue,fund,stkvalue,stkqty,stkbal,stkavl,mktval,matchamt,fundeffect,fee_yhs,fee_jsxf,fee_sxf,fee_ghf,fee_qsf,fee_jygf,feefront,fee_jsf,fee_zgf,fundbjhgavl,bb_matchclearamt,nHQSBSL,fundeffect,fundbal,fundeffect,bondbal,bondavl,maxdraw,marketvalue,matchqty,funddraw"
# avro数据下载到本机的地址
download_path: "/tmp/"
# hadoop 相关配置
# hdfs 地址,必须以斜杠结尾
hdfs_uri: "hdfs://cdh-2:8020/"
......@@ -27,10 +48,7 @@ hdfs_user: "hdfs"
# hdfs日志源文件地址,若source为hdfs,则该地址必传,必须以斜杠结尾
hdfs_src: "/tmp/datawarehouse4/jzjy/kcbp_biz_log/"
# hdfs日志写入地址,非必传,默认写到hdfs-src目录下的output目录下,必须以斜杠结尾
hdfs_dest: "/tmp/datawarehouse/jzjy/kcbp_biz_log/output1/"
# 不做脱敏的字段白名单
fields_white_list: "funcid,count1,count2,count3,count4"
hdfs_dest: "/tmp/datawarehouse/jzjy/kcbp_biz_log/output3/"
# cdh下载配置
# cdh能执行hdfs命令的机器的ip
......@@ -41,6 +59,3 @@ cdh_host_user: "root"
cdh_host_password: "NuqUtwbJUBRmUwgh"
# cdh能执行hdfs下载命令的用户名
cdh_hdfs_user: "hdfs"
# 与配置文件保持一致
download_path: "/tmp/"
# 任务配置
job_name: "国泰交易日志脱敏job"
# 并行度
source.parallelism: "4"
transformer.parallelism: "4"
sink.parallelism: "4"
# 数据来源,支持hdfs和kafka,必传,暂不支持kafka
source: "hdfs"
# 交易日志的“核心”信息,值以c开头、后面是数字序号,匹配维度中的hostname,不匹配hostname则传*
core: "c9"
# 维度信息是否使用正则脱敏
reg.dimension: "true"
# 查询日志起始
start_time: "2020-11-07 21:22:20"
# 查询日志结束
end_time: "2020-11-07 23:40:30"
# hadoop 相关配置
# hdfs 地址,必须以斜杠结尾
hdfs_uri: "hdfs://cdh-2:8020/"
# hdfs 用户名
hdfs_user: "hdfs"
# hdfs日志源文件地址,若source为hdfs,则该地址必传,必须以斜杠结尾
hdfs_src: "/tmp/datawarehouse4/jzjy/kcbp_biz_log/"
# hdfs日志写入地址,非必传,默认写到hdfs-src目录下的output目录下,必须以斜杠结尾
hdfs_dest: "/tmp/datawarehouse/jzjy/kcbp_biz_log/output1/"
# 不做脱敏的字段白名单
fields_white_list: "funcid,count1,count2,count3,count4"
# cdh下载配置
# cdh能执行hdfs命令的机器的ip
cdh_host_ip: "192.168.70.2"
# cdh能执行hdfs命令的机器的ip的超级用户
cdh_host_user: "root"
# cdh能执行hdfs命令的机器的ip的用户密码
cdh_host_password: "NuqUtwbJUBRmUwgh"
# cdh能执行hdfs下载命令的用户名
cdh_hdfs_user: "hdfs"
# 与配置文件保持一致
download_path: "/tmp/"
  • SonarQube analysis reported 174 issues

    • 1 blocker
    • 🚫 20 critical
    • 130 major
    • 🔽 21 minor
    • 2 info

    Watch the comments in this conversation to review them.

    Top 30 extra issues

    Note: The following issues were found on lines that were not modified in the commit. Because these issues can't be reported as line comments, they are summarized here:

    1. 🚫 Define a constant instead of duplicating this literal " {\n" 11 times. 📘
    2. 🚫 [Define a constant instead of duplicating this literal " "type": \n" 11 times. 📘
    3. 🚫 Define a constant instead of duplicating this literal " "string",\n" 6 times. 📘
    4. 🚫 Define a constant instead of duplicating this literal " "null"\n" 6 times. 📘
    5. 🚫 [Define a constant instead of duplicating this literal " ]\n" 11 times.](https://git.zorkdata.com/liaomingtao/transaction-log-desensitization/blob/7bdc64f5f5802beac0ab4f93b898b7cf98d58177/src/main/java/com/zorkdata/desensitization/avro/AvroSchemaDef.java#L23) 📘
    6. 🚫 Define a constant instead of duplicating this literal " },\n" 9 times. 📘
    7. 🚫 Define a constant instead of duplicating this literal " "null",\n" 5 times. 📘
    8. 🚫 Define a constant instead of duplicating this literal " {\n" 5 times. 📘
    9. 🚫 Define a constant instead of duplicating this literal " "type": "map",\n" 5 times. 📘
    10. 🚫 Define a constant instead of duplicating this literal " "values": "string"\n" 3 times. 📘
    11. 🚫 Define a constant instead of duplicating this literal " }\n" 5 times. 📘
    12. 🚫 Define a constant instead of duplicating this literal "序列化失败" 13 times. 📘
    13. 🚫 Refactor this method to reduce its Cognitive Complexity from 22 to the 15 allowed. 📘
    14. 🚫 Change this "try" to a try-with-resources. (sonar.java.source not set. Assuming 7 or greater.) 📘
    15. 🚫 Refactor this code to not throw exceptions in finally blocks. 📘
    16. 🚫 Refactor this code to not throw exceptions in finally blocks. 📘
    17. 🚫 Change this "try" to a try-with-resources. (sonar.java.source not set. Assuming 7 or greater.) 📘
    18. Rename "jsonObject" which hides the field declared at line 39. 📘
    19. Remove this expression which always evaluates to "true" 📘
    20. Remove this expression which always evaluates to "true" 📘
    21. This block of commented-out lines of code should be removed. 📘
    22. 及时清理不再使用的代码段或配置信息。 📘
    23. Remove this expression which always evaluates to "true" 📘
    24. Iterate over the "entrySet" instead of the "keySet". 📘
    25. Remove this conditional structure or edit its code blocks so that they're not all the same. 📘
    26. Iterate over the "entrySet" instead of the "keySet". 📘
    27. Remove this conditional structure or edit its code blocks so that they're not all the same. 📘
    28. Iterate over the "entrySet" instead of the "keySet". 📘
    29. Remove this conditional structure or edit its code blocks so that they're not all the same. 📘
    30. Remove this unused private "bigDecimal2Double" method. 📘
    • ... 47 more
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment