Commit 487ccca8 authored by DeleMing's avatar DeleMing

<dev>

1. 修改身份证比银行卡号先匹配
2. 新增持仓正则
3. 增加是否脱敏维度功能
4. 提取hdfs用户下载名称
parent 0cf4e4e5
Pipeline #15923 passed with stages
in 3 minutes and 18 seconds
...@@ -4,6 +4,10 @@ import com.alibaba.fastjson.JSON; ...@@ -4,6 +4,10 @@ import com.alibaba.fastjson.JSON;
import com.zorkdata.desensitization.config.RegularExpressions; import com.zorkdata.desensitization.config.RegularExpressions;
import com.zorkdata.desensitization.function.DesensitizationFunction; import com.zorkdata.desensitization.function.DesensitizationFunction;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.*; import java.util.*;
/** /**
...@@ -14,19 +18,19 @@ public class TestDesensitization { ...@@ -14,19 +18,19 @@ public class TestDesensitization {
public static void main(String[] args) { public static void main(String[] args) {
RegularExpressions regularExpressions = new RegularExpressions(); RegularExpressions regularExpressions = new RegularExpressions();
regularExpressions.setNameRegExp("[赵|钱|孙|李|周|吴|郑|王|冯|陈|褚|卫|蒋|沈|韩|杨|朱|秦|尤|许|何|吕|施|张|孔|曹|严|华|金|魏|陶|姜|戚|谢|邹|喻|柏|水|窦|章|云|苏|潘|葛|奚|范|彭|郎|鲁|韦|昌|马|苗|凤|花|方|俞|任|袁|柳|酆|鲍|史|唐|费|廉|岑|薛|雷|贺|倪|汤|滕|殷|罗|毕|郝|邬|安|常|乐|于|时|傅|皮|卞|齐|康|伍|余|元|卜|顾|孟|平|黄|和|穆|萧|尹|姚|邵|湛|汪|祁|毛|禹|狄|米|贝|明|臧|计|伏|成|戴|谈|宋|茅|庞|熊|纪|舒|屈|项|祝|董|梁|杜|阮|蓝|闵|席|季|麻|强|贾|路|娄|危|江|童|颜|郭|梅|盛|林|刁|锺|徐|邱|骆|高|夏|蔡|田|樊|胡|凌|霍|虞|万|支|柯|昝|管|卢|莫|经|房|裘|缪|干|解|应|宗|丁|宣|贲|邓|郁|单|杭|洪|包|诸|左|石|崔|吉|钮|龚|程|嵇|邢|滑|裴|陆|荣|翁|荀|羊|於|惠|甄|麴|家|封|芮|羿|储|靳|汲|邴|糜|松|井|段|富|巫|乌|焦|巴|弓|牧|隗|山|谷|车|侯|宓|蓬|全|郗|班|仰|秋|仲|伊|宫|宁|仇|栾|暴|甘|钭|历|戎|祖|武|符|刘|景|詹|束|龙|叶|幸|司|韶|郜|黎|溥|印|宿|白|怀|蒲|邰|从|鄂|索|咸|籍|卓|蔺|屠|蒙|池|乔|阳|郁|胥|能|苍|双|闻|莘|党|翟|谭|贡|劳|逄|姬|申|扶|堵|冉|宰|郦|雍|却|桑|桂|濮|牛|寿|通|边|扈|燕|冀|浦|尚|农|温|别|庄|晏|柴|瞿|充|慕|连|茹|习|宦|艾|鱼|容|向|古|易|慎|戈|廖|庾|终|暨|居|衡|步|都|耿|满|弘|匡|国|文|寇|广|禄|阙|东|欧|沃|利|蔚|越|夔|隆|师|巩|厍|聂|晁|勾|敖|融|冷|訾|辛|阚|那|简|饶|空|曾|毋|沙|乜|养|鞠|须|丰|巢|关|蒯|相|荆|红|游|竺|权|司马|上官|欧阳|夏侯|诸葛|闻人|东方|赫连|皇甫|尉迟|公羊|澹台|公冶宗政|濮阳|淳于|单于|太叔|申屠|公孙|仲孙|轩辕|令狐|钟离|宇文|长孙|慕容|司徒|司空|召|有|舜|岳|黄辰|寸|贰|皇|侨|彤|竭|端|赫|实|甫|集|象|翠|狂|辟|典|良|函|芒|苦|其|京|中|夕|乌孙|完颜|富察|费莫|蹇|称|诺|来|多|繁|戊|朴|回|毓|鉏|税|荤|靖|绪|愈|硕|牢|买|但|巧|枚|撒|泰|秘|亥|绍|以|壬|森|斋|释|奕|姒|朋|求|羽|用|占|真|穰|翦|闾|漆|贵|代|贯|旁|崇|栋|告|休|褒|谏|锐|皋|闳|在|歧|禾|示|是|委|钊|频|嬴|呼|大|威|昂|律|冒|保|系|抄|定|化|莱|校|么|抗|祢|綦|悟|宏|功|庚|务|敏|捷|拱|兆|丑|丙|畅|苟|随|类|卯|俟|友|答|乙|允|甲|留|尾|佼|玄|乘|裔|延|植|环|矫|赛|昔|侍|度|旷|遇|偶|前|由|咎|塞|敛|受|泷|袭|衅|叔|圣|御|夫|仆|镇|藩|邸|府|掌|首|员|焉|戏|可|智|尔|凭|悉|进|笃|厚|仁|业|肇|资|合|仍|九|衷|哀|刑|俎|仵|圭|夷|徭|蛮|汗|孛|乾|帖|罕|洛|淦|洋|邶|郸|郯|邗|邛|剑|虢|隋|蒿|茆|菅|苌|树|桐|锁|钟|机|盘|铎|斛|玉|线|针|箕|庹|绳|磨|蒉|瓮|弭|刀|疏|牵|浑|恽|势|世|仝|同|蚁|止|戢|睢|冼|种|涂|肖|己|泣|潜|卷|脱|谬|蹉|赧|浮|顿|说|次|错|念|夙|斯|完|丹|表|聊|源|姓|吾|寻|展|出|不|户|闭|才|无|书|学|愚|本|性|雪|霜|烟|寒|少|字|桥|板|斐|独|千|诗|嘉|扬|善|揭|祈|析|赤|紫|青|柔|刚|奇|拜|佛|陀|弥|阿|素|长|僧|隐|仙|隽|宇|祭|酒|淡|塔|琦|闪|始|星|南|天|接|波|碧|速|禚|腾|潮|镜|似|澄|潭|謇|纵|渠|奈|风|春|濯|沐|茂|英|兰|檀|藤|枝|检|生|折|登|驹|骑|貊|虎|肥|鹿|雀|野|禽|飞|节|宜|鲜|粟|栗|豆|帛|官|布|衣|藏|宝|钞|银|门|盈|庆|喜|及|普|建|营|巨|望|希|道|载|声|漫|犁|力|贸|勤|革|改|兴|亓|睦|修|信|闽|北|守|坚|勇|汉|练|尉|士|旅|五|令|将|旗|军|行|奉|敬|恭|仪|母|堂|丘|义|礼|慈|孝|理|伦|卿|问|永|辉|位|让|尧|依|犹|介|承|市|所|苑|杞|剧|第|零|谌|招|续|达|忻|六|鄞|战|迟|候|宛|励|粘|萨|邝|覃|辜|初|楼|城|区|局|台|原|考|妫|纳|泉|老|清|德|卑|过|麦|曲|竹|百|福|言|第五|佟|爱|年|笪|谯|哈|墨|连|南宫|赏|伯|佴|佘|牟|商|西门|东门|左丘|梁丘|琴|后|况|亢|缑|帅|微生|羊舌|海|归|呼延|南门|东郭|百里|钦|鄢|汝|法|闫|楚|晋|谷梁|宰父|夹谷|拓跋|壤驷|乐正|漆雕|公西|巫马|端木|颛孙|子车|督|仉|司寇|亓官|三小|鲜于|锺离|盖|逯|库|郏|逢|阴|薄|厉|稽|闾丘|公良|段干|开|光|操|瑞|眭|泥|运|摩|伟|铁|迮][\\u4e00-\\u9fa5]"); regularExpressions.setIdRegExp("[1-9]\\d{5}(18|19|([23]\\d))\\d{2}((0[1-9])|(10|11|12))(([0-2][1-9])|10|20|30|31)\\d{3}[0-9Xx]|[1-9]\\d{5}\\d{2}((0[1-9])|(10|11|12))(([0-2][1-9])|10|20|30|31)\\d{2}");
regularExpressions.setBankCardRegExp("(([13-79]\\d{3})|(2[1-9]\\d{2})|(20[3-9]\\d)|(8[01-79]\\d{2}))\\s?\\d{4}\\s?\\d{4}\\s?\\d{4}(\\s?\\d{3})?");
regularExpressions.setPhoneRegExp("0\\d{2,3}-[1-9]\\d{6,7}"); regularExpressions.setPhoneRegExp("0\\d{2,3}-[1-9]\\d{6,7}");
regularExpressions.setMobileRegExp("((\\+|00)86)?((134\\d{4})|((13[0-3|5-9]|14[1|5-9]|15[0-9]|16[2|5|6|7]|17[0-8]|18[0-9]|19[0-2|5-9])\\d{8}))"); regularExpressions.setMobileRegExp("((\\+|00)86)?((134\\d{4})|((13[0-3|5-9]|14[1|5-9]|15[0-9]|16[2|5|6|7]|17[0-8]|18[0-9]|19[0-2|5-9])\\d{8}))");
regularExpressions.setAddressRegExp("([\\u4E00-\\u9FA5A-Za-z0-9_]+(省|市|自治区|自治州|区|县|镇|道|路|街|号|弄|条|室|港)){1,}");
regularExpressions.setNameRegExp("[赵|钱|孙|李|周|吴|郑|王|冯|陈|褚|卫|蒋|沈|韩|杨|朱|秦|尤|许|何|吕|施|张|孔|曹|严|华|金|魏|陶|姜|戚|谢|邹|喻|柏|水|窦|章|云|苏|潘|葛|奚|范|彭|郎|鲁|韦|昌|马|苗|凤|花|方|俞|任|袁|柳|酆|鲍|史|唐|费|廉|岑|薛|雷|贺|倪|汤|滕|殷|罗|毕|郝|邬|安|常|乐|于|时|傅|皮|卞|齐|康|伍|余|元|卜|顾|孟|平|黄|和|穆|萧|尹|姚|邵|湛|汪|祁|毛|禹|狄|米|贝|明|臧|计|伏|成|戴|谈|宋|茅|庞|熊|纪|舒|屈|项|祝|董|梁|杜|阮|蓝|闵|席|季|麻|强|贾|路|娄|危|江|童|颜|郭|梅|盛|林|刁|锺|徐|邱|骆|高|夏|蔡|田|樊|胡|凌|霍|虞|万|支|柯|昝|管|卢|莫|经|房|裘|缪|干|解|应|宗|丁|宣|贲|邓|郁|单|杭|洪|包|诸|左|石|崔|吉|钮|龚|程|嵇|邢|滑|裴|陆|荣|翁|荀|羊|於|惠|甄|麴|家|封|芮|羿|储|靳|汲|邴|糜|松|井|段|富|巫|乌|焦|巴|弓|牧|隗|山|谷|车|侯|宓|蓬|全|郗|班|仰|秋|仲|伊|宫|宁|仇|栾|暴|甘|钭|历|戎|祖|武|符|刘|景|詹|束|龙|叶|幸|司|韶|郜|黎|溥|印|宿|白|怀|蒲|邰|从|鄂|索|咸|籍|卓|蔺|屠|蒙|池|乔|阳|郁|胥|能|苍|双|闻|莘|党|翟|谭|贡|劳|逄|姬|申|扶|堵|冉|宰|郦|雍|却|桑|桂|濮|牛|寿|通|边|扈|燕|冀|浦|尚|农|温|别|庄|晏|柴|瞿|充|慕|连|茹|习|宦|艾|鱼|容|向|古|易|慎|戈|廖|庾|终|暨|居|衡|步|都|耿|满|弘|匡|国|文|寇|广|禄|阙|东|欧|沃|利|蔚|越|夔|隆|师|巩|厍|聂|晁|勾|敖|融|冷|訾|辛|阚|那|简|饶|空|曾|毋|沙|乜|养|鞠|须|丰|巢|关|蒯|相|荆|红|游|竺|权|司马|上官|欧阳|夏侯|诸葛|闻人|东方|赫连|皇甫|尉迟|公羊|澹台|公冶宗政|濮阳|淳于|单于|太叔|申屠|公孙|仲孙|轩辕|令狐|钟离|宇文|长孙|慕容|司徒|司空|召|有|舜|岳|黄辰|寸|贰|皇|侨|彤|竭|端|赫|实|甫|集|象|翠|狂|辟|典|良|函|芒|苦|其|京|中|夕|乌孙|完颜|富察|费莫|蹇|称|诺|来|多|繁|戊|朴|回|毓|鉏|税|荤|靖|绪|愈|硕|牢|买|但|巧|枚|撒|泰|秘|亥|绍|以|壬|森|斋|释|奕|姒|朋|求|羽|用|占|真|穰|翦|闾|漆|贵|代|贯|旁|崇|栋|告|休|褒|谏|锐|皋|闳|在|歧|禾|示|是|委|钊|频|嬴|呼|大|威|昂|律|冒|保|系|抄|定|化|莱|校|么|抗|祢|綦|悟|宏|功|庚|务|敏|捷|拱|兆|丑|丙|畅|苟|随|类|卯|俟|友|答|乙|允|甲|留|尾|佼|玄|乘|裔|延|植|环|矫|赛|昔|侍|度|旷|遇|偶|前|由|咎|塞|敛|受|泷|袭|衅|叔|圣|御|夫|仆|镇|藩|邸|府|掌|首|员|焉|戏|可|智|尔|凭|悉|进|笃|厚|仁|业|肇|资|合|仍|九|衷|哀|刑|俎|仵|圭|夷|徭|蛮|汗|孛|乾|帖|罕|洛|淦|洋|邶|郸|郯|邗|邛|剑|虢|隋|蒿|茆|菅|苌|树|桐|锁|钟|机|盘|铎|斛|玉|线|针|箕|庹|绳|磨|蒉|瓮|弭|刀|疏|牵|浑|恽|势|世|仝|同|蚁|止|戢|睢|冼|种|涂|肖|己|泣|潜|卷|脱|谬|蹉|赧|浮|顿|说|次|错|念|夙|斯|完|丹|表|聊|源|姓|吾|寻|展|出|不|户|闭|才|无|书|学|愚|本|性|雪|霜|烟|寒|少|字|桥|板|斐|独|千|诗|嘉|扬|善|揭|祈|析|赤|紫|青|柔|刚|奇|拜|佛|陀|弥|阿|素|长|僧|隐|仙|隽|宇|祭|酒|淡|塔|琦|闪|始|星|南|天|接|波|碧|速|禚|腾|潮|镜|似|澄|潭|謇|纵|渠|奈|风|春|濯|沐|茂|英|兰|檀|藤|枝|检|生|折|登|驹|骑|貊|虎|肥|鹿|雀|野|禽|飞|节|宜|鲜|粟|栗|豆|帛|官|布|衣|藏|宝|钞|银|门|盈|庆|喜|及|普|建|营|巨|望|希|道|载|声|漫|犁|力|贸|勤|革|改|兴|亓|睦|修|信|闽|北|守|坚|勇|汉|练|尉|士|旅|五|令|将|旗|军|行|奉|敬|恭|仪|母|堂|丘|义|礼|慈|孝|理|伦|卿|问|永|辉|位|让|尧|依|犹|介|承|市|所|苑|杞|剧|第|零|谌|招|续|达|忻|六|鄞|战|迟|候|宛|励|粘|萨|邝|覃|辜|初|楼|城|区|局|台|原|考|妫|纳|泉|老|清|德|卑|过|麦|曲|竹|百|福|言|第五|佟|爱|年|笪|谯|哈|墨|连|南宫|赏|伯|佴|佘|牟|商|西门|东门|左丘|梁丘|琴|后|况|亢|缑|帅|微生|羊舌|海|归|呼延|南门|东郭|百里|钦|鄢|汝|法|闫|楚|晋|谷梁|宰父|夹谷|拓跋|壤驷|乐正|漆雕|公西|巫马|端木|颛孙|子车|督|仉|司寇|亓官|三小|鲜于|锺离|盖|逯|库|郏|逢|阴|薄|厉|稽|闾丘|公良|段干|开|光|操|瑞|眭|泥|运|摩|伟|铁|迮][\\u4e00-\\u9fa5]");
regularExpressions.setMacRegExp("[A-F0-9]{2}([-:]?[A-F0-9]{2})([-:.]?[A-F0-9]{2})([-:]?[A-F0-9]{2})([-:.]?[A-F0-9]{2})([-:]?[A-F0-9]{2})");
regularExpressions.setEmailRegExp("([a-zA-Z0-9_-])+@([a-zA-Z0-9_-])+((\\.[a-zA-Z0-9_-]{1,4}){1,4})"); regularExpressions.setEmailRegExp("([a-zA-Z0-9_-])+@([a-zA-Z0-9_-])+((\\.[a-zA-Z0-9_-]{1,4}){1,4})");
regularExpressions.setBankCardRegExp("(([13-79]\\d{3})|(2[1-9]\\d{2})|(20[3-9]\\d)|(8[01-79]\\d{2}))\\s?\\d{4}\\s?\\d{4}\\s?\\d{4}(\\s?\\d{3})?");
regularExpressions.setIdRegExp("[1-9]\\d{5}(18|19|([23]\\d))\\d{2}((0[1-9])|(10|11|12))(([0-2][1-9])|10|20|30|31)\\d{3}[0-9Xx]|[1-9]\\d{5}\\d{2}((0[1-9])|(10|11|12))(([0-2][1-9])|10|20|30|31)\\d{2}");
regularExpressions.setAddressRegExp("([\\u4E00-\\u9FA5A-Za-z0-9_]+(省|市|自治区|自治州|区|县|镇|道|路|街|号|弄|条|室)){1,}");
regularExpressions.setIpRegExp("((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)"); regularExpressions.setIpRegExp("((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)");
regularExpressions.setMacRegExp("[A-F0-9]{2}([-:]?[A-F0-9]{2})([-:.]?[A-F0-9]{2})([-:]?[A-F0-9]{2})([-:.]?[A-F0-9]{2})([-:]?[A-F0-9]{2})"); regularExpressions.setPositionExp("仓(\\d+(\\.)?\\d+)(万|千|手|股)");
DesensitizationFunction desensitizationFunction = new DesensitizationFunction(regularExpressions); DesensitizationFunction desensitizationFunction = new DesensitizationFunction(regularExpressions);
String[] fieldsWhiteListArray = "message,funcid,count1,count2,count3,count4".split(","); String[] fieldsWhiteListArray = "funcid,count1,count2,count3,count".split(",");
List< String> whiteList = new ArrayList<>(fieldsWhiteListArray.length); List< String> whiteList = new ArrayList<>(fieldsWhiteListArray.length);
Collections.addAll(whiteList, fieldsWhiteListArray); Collections.addAll(whiteList, fieldsWhiteListArray);
List<String> dataFormats = new ArrayList<String>(){{ List<String> dataFormats = new ArrayList<String>(){{
...@@ -39,6 +43,7 @@ public class TestDesensitization { ...@@ -39,6 +43,7 @@ public class TestDesensitization {
Map<String, String> map = new HashMap<>(4); Map<String, String> map = new HashMap<>(4);
map.put("name", "廖鸣韬"); map.put("name", "廖鸣韬");
map.put("name2", "王海鹰");
map.put("mobile", "18570332683"); map.put("mobile", "18570332683");
map.put("phone", "0730-7512340"); map.put("phone", "0730-7512340");
map.put("email", "liaomingtao@zork.com.cn"); map.put("email", "liaomingtao@zork.com.cn");
...@@ -52,10 +57,39 @@ public class TestDesensitization { ...@@ -52,10 +57,39 @@ public class TestDesensitization {
map.put("count2", "普通字段4"); map.put("count2", "普通字段4");
map.put("count3", "普通字段5"); map.put("count3", "普通字段5");
map.put("count4", "普通字段6"); map.put("count4", "普通字段6");
map.put("count4", "持仓1000万");
map.put("message", "廖鸣韬|18570332683|0730-7638844|liaomingtao@zork.com.cn|430621194711110423|6222600260001072123|上海市浦东新区张江路|192.168.70.1|00:0C:29:01:98:27|1111"); map.put("message", "廖鸣韬|18570332683|0730-7638844|liaomingtao@zork.com.cn|430621194711110423|6222600260001072123|上海市浦东新区张江路|192.168.70.1|00:0C:29:01:98:27|1111");
map.put("message", "13811110000|110101199003075517|上海市浦东新区张江微电子港|zorkdata@163.com|123456789|wanghaiying123-王海鹰-192.168.1.1-00-50-56-C0-00-08-6227002470170278192-持仓1000万");
Map<String, String> stringStringMap = desensitizationFunction.desensitization(map, whiteList, dataFormats); Map<String, String> stringStringMap = desensitizationFunction.desensitization(map, whiteList, dataFormats);
System.out.println(JSON.toJSONString(stringStringMap)); System.out.println(JSON.toJSONString(stringStringMap));
} }
/**
* 深拷贝
*
* @param obj
* @return
*/
public static HashMap<String, Object> clone(Map<String, Object> obj) {
  • 🔽 The return type of this method should be an interface such as "Map" rather than the implementation "HashMap". 📘

Please register or sign in to reply
HashMap<String, Object> clonedObj = null;
if (obj.isEmpty()) {
clonedObj = new HashMap<>(50);
} else {
try {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(baos);
oos.writeObject(obj);
  • Make the "java.util.Map" class "Serializable" or don't write it. 📘

Please register or sign in to reply
oos.close();
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
ObjectInputStream ois = new ObjectInputStream(bais);
clonedObj = (HashMap<String, Object>) ois.readObject();
ois.close();
} catch (Exception e) {
e.printStackTrace();
  • 🔽 Use a logger to log this exception. 📘

Please register or sign in to reply
}
}
return clonedObj;
}
} }
...@@ -52,6 +52,11 @@ public class RegularExpressions implements Serializable { ...@@ -52,6 +52,11 @@ public class RegularExpressions implements Serializable {
*/ */
private String macRegExp; private String macRegExp;
/**
* 持仓信息正则
*/
private String positionExp;
  • Remove this unused "positionExp" private field. 📘

Please register or sign in to reply
public RegularExpressions(){ public RegularExpressions(){
} }
...@@ -67,5 +72,6 @@ public class RegularExpressions implements Serializable { ...@@ -67,5 +72,6 @@ public class RegularExpressions implements Serializable {
this.addressRegExp = MapUtils.getString(map, RegExpConstants.ADDRESS_REG_EXP); this.addressRegExp = MapUtils.getString(map, RegExpConstants.ADDRESS_REG_EXP);
this.ipRegExp = MapUtils.getString(map, RegExpConstants.IP_REG_EXP); this.ipRegExp = MapUtils.getString(map, RegExpConstants.IP_REG_EXP);
this.macRegExp = MapUtils.getString(map, RegExpConstants.MAC_REG_EXP); this.macRegExp = MapUtils.getString(map, RegExpConstants.MAC_REG_EXP);
this.positionExp = MapUtils.getString(map, RegExpConstants.POSITION_EXP);
} }
} }
...@@ -15,6 +15,7 @@ public final class ConfigConstants { ...@@ -15,6 +15,7 @@ public final class ConfigConstants {
public static final String SOURCE_PARALLELISM = "source.parallelism"; public static final String SOURCE_PARALLELISM = "source.parallelism";
public static final String TRANSFORMER_PARALLELISM = "transformer.parallelism"; public static final String TRANSFORMER_PARALLELISM = "transformer.parallelism";
public static final String SINK_PARALLELISM = "sink.parallelism"; public static final String SINK_PARALLELISM = "sink.parallelism";
public static final String REG_DIMENSION = "reg.dimension";
public static final String HDFS_URI = "hdfs_uri"; public static final String HDFS_URI = "hdfs_uri";
public static final String HDFS_USER = "hdfs_user"; public static final String HDFS_USER = "hdfs_user";
......
...@@ -18,4 +18,5 @@ public final class RegExpConstants { ...@@ -18,4 +18,5 @@ public final class RegExpConstants {
public static final String ADDRESS_REG_EXP = "address"; public static final String ADDRESS_REG_EXP = "address";
public static final String IP_REG_EXP = "ip"; public static final String IP_REG_EXP = "ip";
public static final String MAC_REG_EXP = "mac"; public static final String MAC_REG_EXP = "mac";
public static final String POSITION_EXP = "position";
} }
...@@ -23,8 +23,8 @@ public class DesensitizationFunction implements Serializable { ...@@ -23,8 +23,8 @@ public class DesensitizationFunction implements Serializable {
public DesensitizationFunction(RegularExpressions regularExpressions) { public DesensitizationFunction(RegularExpressions regularExpressions) {
this.regularExpressions = regularExpressions; this.regularExpressions = regularExpressions;
patterns.add(Pattern.compile(regularExpressions.getBankCardRegExp()));
patterns.add(Pattern.compile(regularExpressions.getIdRegExp())); patterns.add(Pattern.compile(regularExpressions.getIdRegExp()));
patterns.add(Pattern.compile(regularExpressions.getBankCardRegExp()));
patterns.add(Pattern.compile(regularExpressions.getPhoneRegExp())); patterns.add(Pattern.compile(regularExpressions.getPhoneRegExp()));
patterns.add(Pattern.compile(regularExpressions.getMobileRegExp())); patterns.add(Pattern.compile(regularExpressions.getMobileRegExp()));
...@@ -33,6 +33,7 @@ public class DesensitizationFunction implements Serializable { ...@@ -33,6 +33,7 @@ public class DesensitizationFunction implements Serializable {
patterns.add(Pattern.compile(regularExpressions.getMacRegExp())); patterns.add(Pattern.compile(regularExpressions.getMacRegExp()));
patterns.add(Pattern.compile(regularExpressions.getEmailRegExp())); patterns.add(Pattern.compile(regularExpressions.getEmailRegExp()));
patterns.add(Pattern.compile(regularExpressions.getIpRegExp())); patterns.add(Pattern.compile(regularExpressions.getIpRegExp()));
patterns.add(Pattern.compile(regularExpressions.getPositionExp()));
} }
public Map<String, String> desensitization(Map<String, String> map, public Map<String, String> desensitization(Map<String, String> map,
......
...@@ -73,6 +73,10 @@ public class HdfsLogDesensitization implements Serializable { ...@@ -73,6 +73,10 @@ public class HdfsLogDesensitization implements Serializable {
private String hdfsDest; private String hdfsDest;
private String startTime; private String startTime;
private String endTime; private String endTime;
/**
* 是否脱敏维度信息
*/
private boolean hasRegDimension;
private long startTimestamp; private long startTimestamp;
private long endTimestamp; private long endTimestamp;
private Map<String, String> confMap; private Map<String, String> confMap;
...@@ -101,6 +105,7 @@ public class HdfsLogDesensitization implements Serializable { ...@@ -101,6 +105,7 @@ public class HdfsLogDesensitization implements Serializable {
this.endTime = String.valueOf(conf.get(ConfigConstants.END_TIME)); this.endTime = String.valueOf(conf.get(ConfigConstants.END_TIME));
this.startTimestamp = DateUtil.time2Timestamp(startTime); this.startTimestamp = DateUtil.time2Timestamp(startTime);
this.endTimestamp = DateUtil.time2Timestamp(endTime); this.endTimestamp = DateUtil.time2Timestamp(endTime);
this.hasRegDimension = Boolean.parseBoolean(conf.get(ConfigConstants.REG_DIMENSION));
this.confMap = conf; this.confMap = conf;
return this; return this;
} }
...@@ -143,9 +148,16 @@ public class HdfsLogDesensitization implements Serializable { ...@@ -143,9 +148,16 @@ public class HdfsLogDesensitization implements Serializable {
if (null != timestamp && timestamp.compareTo(startTimestamp) >= 0 && if (null != timestamp && timestamp.compareTo(startTimestamp) >= 0 &&
timestamp.compareTo(endTimestamp) <= 0) { timestamp.compareTo(endTimestamp) <= 0) {
Map<String, String> normalFields = logData.getNormalFields();
Map desensitization = desensitizationFunction. Map desensitization = desensitizationFunction.
desensitization(logData.getNormalFields(), fieldsWhiteList, dataFormats); desensitization(normalFields, fieldsWhiteList, dataFormats);
logData.setNormalFields(desensitization); logData.setNormalFields(desensitization);
if (hasRegDimension) {
Map<String, String> dimensions = logData.getDimensions();
Map desensitizationDimensions = desensitizationFunction.
desensitization(dimensions, fieldsWhiteList, dataFormats);
logData.setDimensions(desensitizationDimensions);
}
collector.collect(logData); collector.collect(logData);
} }
} }
......
...@@ -11,6 +11,9 @@ source: "hdfs" ...@@ -11,6 +11,9 @@ source: "hdfs"
# 交易日志的“核心”信息,值以c开头、后面是数字序号,非必传 # 交易日志的“核心”信息,值以c开头、后面是数字序号,非必传
core: "c9" core: "c9"
# 维度信息是否使用正则脱敏
reg.dimension: "true"
# 查询日志起始 # 查询日志起始
start_time: "2020-11-07 21:22:20" start_time: "2020-11-07 21:22:20"
# 查询日志结束 # 查询日志结束
...@@ -29,10 +32,15 @@ hdfs_dest: "/tmp/datawarehouse/jzjy/kcbp_biz_log/output1/" ...@@ -29,10 +32,15 @@ hdfs_dest: "/tmp/datawarehouse/jzjy/kcbp_biz_log/output1/"
# 不做脱敏的字段白名单 # 不做脱敏的字段白名单
fields_white_list: "funcid,count1,count2,count3,count4" fields_white_list: "funcid,count1,count2,count3,count4"
# chd下载配置 # cdh下载配置
# cdh能执行hdfs命令的机器的ip
cdh_host_ip: "192.168.70.2" cdh_host_ip: "192.168.70.2"
# cdh能执行hdfs命令的机器的ip的超级用户
cdh_host_user: "root" cdh_host_user: "root"
# cdh能执行hdfs命令的机器的ip的用户密码
cdh_host_password: "NuqUtwbJUBRmUwgh" cdh_host_password: "NuqUtwbJUBRmUwgh"
# cdh能执行hdfs下载命令的用户名
cdh_hdfs_user: "hdfs"
# 与配置文件保持一致 # 与配置文件保持一致
download_path: "/tmp/" download_path: "/tmp/"
...@@ -6,6 +6,7 @@ DEFAULT_SLEEP_TIME=30 ...@@ -6,6 +6,7 @@ DEFAULT_SLEEP_TIME=30
CDH_HOST_IP=192.168.70.2 CDH_HOST_IP=192.168.70.2
CDH_HOST_USER=root CDH_HOST_USER=root
CDH_HOST_PASSWORD=password CDH_HOST_PASSWORD=password
CDH_HDFS_USER=hdfs
# 与配置文件保持一致 # 与配置文件保持一致
HDFS_DEST=/tmp/datawarehouse/jzjy/kcbp_biz_log/output1/ HDFS_DEST=/tmp/datawarehouse/jzjy/kcbp_biz_log/output1/
...@@ -51,6 +52,10 @@ do ...@@ -51,6 +52,10 @@ do
then then
CDH_HOST_PASSWORD=$(echo "$v" | sed -r 's/.*"(.+)".*/\1/') CDH_HOST_PASSWORD=$(echo "$v" | sed -r 's/.*"(.+)".*/\1/')
fi fi
if [[ "$k" == "cdh_hdfs_user" ]]
then
CDH_HDFS_USER=$(echo "$v" | sed -r 's/.*"(.+)".*/\1/')
fi
done < $DEPLOY_PATH/conf/$FLINK_TASK_CONF done < $DEPLOY_PATH/conf/$FLINK_TASK_CONF
echo "开始提交任务" echo "开始提交任务"
...@@ -77,7 +82,7 @@ expect <<EOF ...@@ -77,7 +82,7 @@ expect <<EOF
"password:" { send "$CDH_HOST_PASSWORD\n" } "password:" { send "$CDH_HOST_PASSWORD\n" }
} }
expect "]# " { send "rm -rf $SCP_PATH\n" } expect "]# " { send "rm -rf $SCP_PATH\n" }
expect "]# " { send "sudo -u hdfs hadoop fs -copyToLocal $HDFS_DEST $DOWNLOAD_PATH\n" } expect "]# " { send "sudo -u $CDH_HDFS_USER hadoop fs -copyToLocal $HDFS_DEST $DOWNLOAD_PATH\n" }
expect "]# " { expect "]# " {
send "scp -r $SCP_PATH root@$LOCAL_IP:/tmp/\n" send "scp -r $SCP_PATH root@$LOCAL_IP:/tmp/\n"
expect { expect {
......
...@@ -12,8 +12,10 @@ id=[1-9]\d{5}(18|19|([23]\d))\d{2}((0[1-9])|(10|11|12))(([0-2][1-9])|10|20|30|31 ...@@ -12,8 +12,10 @@ id=[1-9]\d{5}(18|19|([23]\d))\d{2}((0[1-9])|(10|11|12))(([0-2][1-9])|10|20|30|31
# 银行卡号 # 银行卡号
bank_card=(([13-79]\d{3})|(2[1-9]\d{2})|(20[3-9]\d)|(8[01-79]\d{2}))\s?\d{4}\s?\d{4}\s?\d{4}(\s?\d{3})? bank_card=(([13-79]\d{3})|(2[1-9]\d{2})|(20[3-9]\d)|(8[01-79]\d{2}))\s?\d{4}\s?\d{4}\s?\d{4}(\s?\d{3})?
# 家庭住址正则 # 家庭住址正则
address=([\u4E00-\u9FA5A-Za-z0-9_]+(省|市|自治区|自治州|区|县|镇|道|路|街|号|弄|条|室)){1,} address=([\u4E00-\u9FA5A-Za-z0-9_]+(省|市|自治区|自治州|区|县|镇|道|路|街|号|弄|条|室|港)){1,}
# ip地址正则 # ip地址正则
ip=((2[0-4]\d|25[0-5]|[01]?\d\d?)\.){3}(2[0-4]\d|25[0-5]|[01]?\d\d?) ip=((2[0-4]\d|25[0-5]|[01]?\d\d?)\.){3}(2[0-4]\d|25[0-5]|[01]?\d\d?)
# mac地址正则 # mac地址正则
mac=[A-F0-9]{2}([-:]?[A-F0-9]{2})([-:.]?[A-F0-9]{2})([-:]?[A-F0-9]{2})([-:.]?[A-F0-9]{2})([-:]?[A-F0-9]{2}) mac=[A-F0-9]{2}([-:]?[A-F0-9]{2})([-:.]?[A-F0-9]{2})([-:]?[A-F0-9]{2})([-:.]?[A-F0-9]{2})([-:]?[A-F0-9]{2})
\ No newline at end of file # 持仓信息正则
position=仓(\d+(\.)?\d+)(万|千|手|股)
\ No newline at end of file
  • SonarQube analysis reported 113 issues

    • 🚫 24 critical
    • 61 major
    • 🔽 27 minor
    • 1 info

    Watch the comments in this conversation to review them.

    Top 30 extra issues

    Note: The following issues were found on lines that were not modified in the commit. Because these issues can't be reported as line comments, they are summarized here:

    1. 🚫 Add a default case to this switch. 📘
    2. 🚫 switch中每个case需要通过break/return等来终止 📘
    3. 🚫 switch块缺少default语句 📘
    4. 🚫 Define a constant instead of duplicating this literal " {\n" 11 times. 📘
    5. 🚫 [Define a constant instead of duplicating this literal " "type": \n" 11 times. 📘
    6. 🚫 Define a constant instead of duplicating this literal " "string",\n" 6 times. 📘
    7. 🚫 Define a constant instead of duplicating this literal " "null"\n" 6 times. 📘
    8. 🚫 [Define a constant instead of duplicating this literal " ]\n" 11 times.](https://git.zorkdata.com/liaomingtao/transaction_log_desensitization/blob/487ccca8343d8f795e79208b3aab4209d59a361c/src/main/java/com/zorkdata/desensitization/avro/AvroSchemaDef.java#L23) 📘
    9. 🚫 Define a constant instead of duplicating this literal " },\n" 9 times. 📘
    10. 🚫 Define a constant instead of duplicating this literal " "null",\n" 5 times. 📘
    11. 🚫 Define a constant instead of duplicating this literal " {\n" 5 times. 📘
    12. 🚫 Define a constant instead of duplicating this literal " "type": "map",\n" 5 times. 📘
    13. 🚫 Define a constant instead of duplicating this literal " "values": "string"\n" 3 times. 📘
    14. 🚫 Define a constant instead of duplicating this literal " }\n" 5 times. 📘
    15. 🚫 Define a constant instead of duplicating this literal "序列化失败" 13 times. 📘
    16. 🚫 Refactor this method to reduce its Cognitive Complexity from 22 to the 15 allowed. 📘
    17. 🚫 Refactor this method to reduce its Cognitive Complexity from 22 to the 15 allowed. 📘
    18. 🚫 Refactor this method to reduce its Cognitive Complexity from 22 to the 15 allowed. 📘
    19. 🚫 常量【dataFormats】命名应全部大写并以下划线分隔 📘
    20. 🚫 Add a nested comment explaining why this method is empty, throw an UnsupportedOperationException or complete the implementation. 📘
    21. 🚫 Change this "try" to a try-with-resources. (sonar.java.source not set. Assuming 7 or greater.) 📘
    22. 🚫 Refactor this code to not throw exceptions in finally blocks. 📘
    23. 🚫 Refactor this code to not throw exceptions in finally blocks. 📘
    24. 🚫 Change this "try" to a try-with-resources. (sonar.java.source not set. Assuming 7 or greater.) 📘
    25. This block of commented-out lines of code should be removed. 📘
    26. 及时清理不再使用的代码段或配置信息。 📘
    27. Replace this use of System.out or System.err by a logger. 📘
    28. Replace this use of System.out or System.err by a logger. 📘
    29. String contains no format specifiers. 📘
    30. Replace this use of System.out or System.err by a logger. 📘
    • ... 79 more
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment