Commit 1e097d43 authored by 王海鹰's avatar 王海鹰

commit

parent a8f1c55e
...@@ -26,7 +26,6 @@ under the License. ...@@ -26,7 +26,6 @@ under the License.
<packaging>jar</packaging> <packaging>jar</packaging>
<name>Guotai Transaction Log Mask Job</name> <name>Guotai Transaction Log Mask Job</name>
<properties> <properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<flink.version>1.8.1</flink.version> <flink.version>1.8.1</flink.version>
...@@ -60,20 +59,20 @@ under the License. ...@@ -60,20 +59,20 @@ under the License.
<groupId>org.apache.flink</groupId> <groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId> <artifactId>flink-java</artifactId>
<version>${flink.version}</version> <version>${flink.version}</version>
<scope>provided</scope> <!-- <scope>provided</scope>-->
</dependency> </dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-shaded-hadoop-2 --> <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-shaded-hadoop-2 -->
<dependency> <dependency>
<groupId>org.apache.flink</groupId> <groupId>org.apache.flink</groupId>
<artifactId>flink-shaded-hadoop-2</artifactId> <artifactId>flink-shaded-hadoop-2</artifactId>
<version>2.6.5-10.0</version> <version>2.6.5-10.0</version>
<scope>provided</scope> <!-- <scope>provided</scope>-->
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.flink</groupId> <groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId> <artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
<version>${flink.version}</version> <version>${flink.version}</version>
<scope>provided</scope> <!-- <scope>provided</scope>-->
</dependency> </dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka --> <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka -->
...@@ -103,7 +102,6 @@ under the License. ...@@ -103,7 +102,6 @@ under the License.
<version>${flink.version}</version> <version>${flink.version}</version>
</dependency> </dependency>
--> -->
<!-- https://mvnrepository.com/artifact/org.apache.avro/avro --> <!-- https://mvnrepository.com/artifact/org.apache.avro/avro -->
<dependency> <dependency>
<groupId>org.apache.avro</groupId> <groupId>org.apache.avro</groupId>
...@@ -159,6 +157,12 @@ under the License. ...@@ -159,6 +157,12 @@ under the License.
<version>1.18.12</version> <version>1.18.12</version>
<!-- <scope>provided</scope>--> <!-- <scope>provided</scope>-->
</dependency> </dependency>
<dependency>
<groupId>org.yaml</groupId>
<artifactId>snakeyaml</artifactId>
<version>1.16</version>
</dependency>
</dependencies> </dependencies>
<build> <build>
......
package com.zorkdata.datamask.constants;
/**
* Description :
*
* @author : wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* Date : Create in 2020/10/20 15:32
*/
public interface Constants {
String SOURCE = "source";
String HDFS_SRC = "hdfs_src";
String HDFS_DEST = "hdfs_dest";
String CORE = "core";
String DATE = "date";
String START_TIME = "startTime";
String END_TIME = "endTime";
}
package com.zorkdata.datamask.domain; package com.zorkdata.datamask.domain;
import lombok.Data; import lombok.Data;
import org.apache.avro.mapred.AvroWrapper;
import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput; import java.io.DataInput;
...@@ -29,110 +30,38 @@ public class LogData implements Serializable, WritableComparable { ...@@ -29,110 +30,38 @@ public class LogData implements Serializable, WritableComparable {
*/ */
private String timestamp; private String timestamp;
/** /**
* source * 事件来源
*/ */
private String source; private String source;
@Override
public int compareTo(Object o) {
return 0;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
}
@Override
public void readFields(DataInput dataInput) throws IOException {
}
/** /**
* offset 偏移量 * 偏移量
*/ */
private String offset; private String offset;
/** /**
* dimensions 维度 * 维度
*/ */
private Map<String, String> dimensions; private Map<String, String> dimensions;
/** /**
* measures * 指标
*/ */
private Map<String, Double> measures; private Map<String, Double> measures;
/** /**
* normalFields * 普通列
*/ */
private Map<String, String> normalFields; private Map<String, String> normalFields;
@Override
public int compareTo(Object o) {
return 0;
}
// public LogData() { @Override
// } public void write(DataOutput dataOutput) throws IOException {
}
// public String getLogTypeName() { @Override
// return logTypeName; public void readFields(DataInput dataInput) throws IOException {
// } }
//
// public void setLogTypeName(String logTypeName) {
// this.logTypeName = logTypeName;
// }
//
// public String getTimestamp() {
// return timestamp;
// }
//
// public void setTimestamp(String timestamp) {
// this.timestamp = timestamp;
// }
//
// public String getSource() {
// return source;
// }
//
// public void setSource(String source) {
// this.source = source;
// }
//
// public String getOffset() {
// return offset;
// }
//
// public void setOffset(String offset) {
// this.offset = offset;
// }
//
// public Map<String, String> getDimensions() {
// return dimensions;
// }
//
// public void setDimensions(Map<String, String> dimensions) {
// this.dimensions = new HashMap<>(50);
// for (Map.Entry entry : dimensions.entrySet()) {
// this.dimensions.put(String.valueOf(entry.getKey()), String.valueOf(entry.getValue()));
// }
// }
//
// public Map<String, Double> getMeasures() {
// return measures;
// }
//
// public void setMeasures(Map<String, Double> measures) {
// this.measures = new HashMap<>(50);
// for (Map.Entry entry : measures.entrySet()) {
// this.measures.put(String.valueOf(entry.getKey()), Double.valueOf(String.valueOf(entry.getValue())));
// }
// }
//
// public Map<String, String> getNormalFields() {
// return normalFields;
// }
//
// public void setNormalFields(Map<String, String> normalFields) {
// this.normalFields = new HashMap<>(50);
// for (Map.Entry entry : normalFields.entrySet()) {
// this.normalFields.put(String.valueOf(entry.getKey()), String.valueOf(entry.getValue()));
// }
// }
// @Override // @Override
// public String toString() { // public String toString() {
......
package com.zorkdata.datamask.function;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.TypeReference;
import com.zorkdata.datamask.domain.LogData;
import com.zorkdata.datamask.util.avro.AvroDeserializer;
import com.zorkdata.datamask.util.avro.AvroDeserializerFactory;
import lombok.extern.slf4j.Slf4j;
import org.apache.avro.generic.GenericRecord;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.RichFlatMapFunction;
import org.apache.flink.util.Collector;
/**
* @author xiese
* @Description Avro2StrFlatMapFunction
* @Email xiesen310@163.com
* @Date 2020/9/26 23:14
*/
@Slf4j
public class Avro2StrFlatMapFunction implements FlatMapFunction<String, LogData> {
@Override
public void flatMap(String value, Collector<LogData> out) throws Exception {
try {
if (null != value) {
AvroDeserializer logsDeserializer = AvroDeserializerFactory.getLogsDeserializer();
GenericRecord record = logsDeserializer.deserializing(value.getBytes());
// System.out.println("----------record---------"+record);
if (null != record) {
LogData logData = JSON.parseObject(record.toString(), new TypeReference<LogData>() {
});
// System.out.println("----------logData---------"+logData);
// out.collect(JSON.toJSONString(logData));
out.collect(logData);
}
}
} catch (Exception e) {
log.error("avro 反序列化失败,错误信息: {}", e.getMessage(), e);
}
}
}
package com.zorkdata.datamask.util;
/**
* @author 谢森
* @Description 配置文件工具类
* @Email xiesen@zork.com.cn
*/
public class ConfigUtils {
public static String getString(String value, String defaultValue) {
String result = value == null || value.equals("") || value.equals("null") ? defaultValue : value;
return result;
}
public static Integer getInteger(Integer value, Integer defaultValue) {
Integer result = value < 0 ? defaultValue : value;
return result;
}
public static Double getDouble(Double value, Double defaultValue) {
Double result = value == null ? defaultValue : value;
return result;
}
public static Float getFloat(Float value, Float defaultValue) {
Float result = value == null ? defaultValue : value;
return result;
}
public static Long getLong(Long value, Long defaultValue) {
Long result = value == null ? defaultValue : value;
return result;
}
public static Boolean getBoolean(Boolean value, Boolean defaultValue) {
Boolean result = value == null ? defaultValue : value;
return result;
}
}
package com.zorkdata.datamask.util;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.yaml.snakeyaml.Yaml;
import org.yaml.snakeyaml.constructor.SafeConstructor;
import java.io.*;
import java.net.URL;
import java.util.*;
/**
* ClassName: LoadConf
* Email: zhuzhigang@zork.com.cn
* Date: 2019\6\27 0027
*
* @author: zhuzhigang
**/
public class LoadConf {
private static final Logger LOG = LoggerFactory.getLogger(com.zorkdata.datamask.util.LoadConf.class);
public LoadConf() {
}
public static List<URL> findResources(String name) {
try {
Enumeration<URL> resources = Thread.currentThread().getContextClassLoader().getResources(name);
ArrayList ret = new ArrayList();
while (resources.hasMoreElements()) {
ret.add(resources.nextElement());
}
return ret;
} catch (IOException var3) {
throw new RuntimeException(var3);
}
}
public static Map findAndReadYaml(String name, boolean mustExist, boolean canMultiple) {
InputStream in = null;
boolean confFileEmpty = false;
try {
in = getConfigFileInputStream(name, canMultiple);
if (null != in) {
Yaml yaml = new Yaml(new SafeConstructor());
Map ret = (Map) yaml.load(new InputStreamReader(in));
if (null != ret) {
HashMap var7 = new HashMap(ret);
return var7;
}
confFileEmpty = true;
}
if (mustExist) {
if (confFileEmpty) {
throw new RuntimeException("Config file " + name + " doesn't have any valid storm configs");
} else {
throw new RuntimeException("Could not find config file on classpath " + name);
}
} else {
HashMap var19 = new HashMap();
return var19;
}
} catch (IOException var17) {
throw new RuntimeException(var17);
} finally {
if (null != in) {
try {
in.close();
} catch (IOException var16) {
throw new RuntimeException(var16);
}
}
}
}
public static InputStream getConfigFileInputStream(String configFilePath, boolean canMultiple) throws IOException {
if (null == configFilePath) {
throw new IOException("Could not find config file, name not specified");
} else {
HashSet<URL> resources = new HashSet(findResources(configFilePath));
if (resources.isEmpty()) {
File configFile = new File(configFilePath);
return configFile.exists() ? new FileInputStream(configFile) : null;
} else if (resources.size() > 1 && !canMultiple) {
throw new IOException("Found multiple " + configFilePath + " resources. You're probably bundling the Storm jars with your topology jar. " + resources);
} else {
LOG.info("Using " + configFilePath + " from resources");
URL resource = (URL) resources.iterator().next();
return resource.openStream();
}
}
}
public static InputStream getConfigFileInputStream(String configFilePath) throws IOException {
return getConfigFileInputStream(configFilePath, true);
}
public static Map LoadYaml(String confPath) {
return findAndReadYaml(confPath, true, true);
}
public static Map LoadProperty(String prop) {
InputStream in = null;
Properties properties = new Properties();
try {
in = getConfigFileInputStream(prop);
properties.load(in);
} catch (FileNotFoundException var12) {
throw new RuntimeException("No such file " + prop);
} catch (Exception var13) {
throw new RuntimeException("Failed to read config file");
} finally {
if (null != in) {
try {
in.close();
} catch (IOException var11) {
throw new RuntimeException(var11);
}
}
}
Map ret = new HashMap();
ret.putAll(properties);
return ret;
}
}
package com.zorkdata.datamask.util;
/**
* Description :
*
* @author : wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* Date : Create in 2020/10/19 16:43
*/
public class MaskRegexConfig {
private String fieldsWhiteList;
private String nameRegExp;
private String mobileRegExp;
private String phoneRegExp;
private String emailRegExp;
private String idRegExp15;
private String idRegExp18;
private String addressRegExp;
private String ipRegExp;
private String macRegExp;
}
...@@ -16,63 +16,75 @@ import java.util.regex.Pattern; ...@@ -16,63 +16,75 @@ import java.util.regex.Pattern;
*/ */
public class MaskUtil { public class MaskUtil {
private MaskRegexConfig maskRegexConfig;
/** /**
* 姓名正则 * 姓名正则
*/ */
static Pattern namePattern = Pattern.compile("([\\u4e00-\u9fa5]{1,20}|[a-zA-Z\\.\\s]{1,20})"); static Pattern namePattern = Pattern.compile("([\\u4e00-\u9fa5]{1,20}|[a-zA-Z\\.\\s]{1,20})");
// Pattern namePattern = Pattern.compile(maskRegexConfig.getNameRegExp());
/** /**
* 手机号正则 * 手机号正则
*/ */
static Pattern mobilePattern = Pattern.compile("((13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}"); static Pattern mobilePattern = Pattern.compile("((13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}");
// Pattern mobilePattern = Pattern.compile(maskRegexConfig.getMobileRegExp());
/** /**
* 电话号码正则 * 电话号码正则
*/ */
static Pattern phonePattern = Pattern.compile("(\\d{3,4}-)?\\d{6,8}"); static Pattern phonePattern = Pattern.compile("(\\d{3,4}-)?\\d{6,8}");
// Pattern phonePattern = Pattern.compile(maskRegexConfig.getPhoneRegExp());
/** /**
* 邮箱正则 * 邮箱正则
*/ */
static Pattern emailPattern = Pattern.compile("\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*"); static Pattern emailPattern = Pattern.compile("\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*");
// Pattern emailPattern = Pattern.compile(maskRegexConfig.getEmailRegExp());
/** /**
* 身份证号码(15位)正则 * 身份证号码(15位)正则
*/ */
// static Pattern idPattern15 = Pattern.compile("\\d{17}[0-9Xx]|\\d{15}");
static Pattern idPattern15 = Pattern.compile("[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}"); static Pattern idPattern15 = Pattern.compile("[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}");
// Pattern idPattern15 = Pattern.compile(maskRegexConfig.getIdRegExp15());
/** /**
* 身份证号码(18位)正则 * 身份证号码(18位)正则
*/ */
static Pattern idPattern18 = Pattern.compile("[1-9]\\d{5}[1-9]\\d{3}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}([0-9Xx])"); static Pattern idPattern18 = Pattern.compile("[1-9]\\d{5}[1-9]\\d{3}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}([0-9Xx])");
// Pattern idPattern18 = Pattern.compile(maskRegexConfig.getIdRegExp18());
/** /**
* 家庭住址正则 * 家庭住址正则
*/ */
static Pattern addressPattern = Pattern.compile("([\\u4E00-\\u9FA5A-Za-z0-9_]+(省|市|区|县|道|路|街|号)){2,}"); static Pattern addressPattern = Pattern.compile("([\\u4E00-\\u9FA5A-Za-z0-9_]+(省|市|区|县|道|路|街|号|弄|条|室)){2,}");
// Pattern addressPattern = Pattern.compile(maskRegexConfig.getAddressRegExp());
/** /**
* ip地址正则 * ip地址正则
*/ */
// static Pattern ipPattern = Pattern.compile("^((\\d|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5]|[*])\\.){3}(\\d|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5]|[*])$"); // static Pattern ipPattern = Pattern.compile("^((\\d|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5]|[*])\\.){3}(\\d|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5]|[*])$");
static Pattern ipPattern = Pattern.compile("((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)"); static Pattern ipPattern = Pattern.compile("((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)");
// Pattern ipPattern = Pattern.compile(maskRegexConfig.getIpRegExp());
/**
* mac地址正则
*/
static Pattern macPattern = Pattern.compile("([A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}"); static Pattern macPattern = Pattern.compile("([A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}");
// Pattern macPattern = Pattern.compile(maskRegexConfig.getMacRegExp());
static List<Pattern> patterns = new ArrayList<Pattern>(){{ static List<Pattern> patterns = new ArrayList<Pattern>() {{
add(macPattern);
add(emailPattern);
add(ipPattern);
add(namePattern); add(namePattern);
add(idPattern18); add(idPattern18);
add(idPattern15); add(idPattern15);
add(mobilePattern); add(mobilePattern);
add(phonePattern); add(phonePattern);
add(emailPattern);
add(addressPattern); add(addressPattern);
add(ipPattern);
add(macPattern);
}}; }};
public static Map mask(Map map){ public static Map mask(Map map) {
map.forEach((k, v) -> { map.forEach((k, v) -> {
String value = v.toString(); String value = v.toString();
for(Pattern pattern:patterns){ for (Pattern pattern : patterns) {
Matcher matcher = pattern.matcher(value); Matcher matcher = pattern.matcher(value);
if (matcher.find()){ if (matcher.find()) {
String replaceStr = ""; String replaceStr = "";
for(int i=0; i < matcher.group().length(); i++){ for (int i = 0; i < matcher.group().length(); i++) {
replaceStr = replaceStr.concat("*"); replaceStr = replaceStr.concat("*");
} }
// System.out.println(replaceStr); // System.out.println(replaceStr);
...@@ -85,17 +97,21 @@ public class MaskUtil { ...@@ -85,17 +97,21 @@ public class MaskUtil {
} }
public static void main(String[] args) { public static void main(String[] args) {
MaskUtil maskUtil = new MaskUtil();
Map map = new HashMap(); Map map = new HashMap();
// map.put("姓名", "王海鹰"); map.put("姓名", "王海鹰");
// map.put("身份证号", "372925199008075158"); map.put("身份证号", "372925199008075158");
// map.put("手机号", "15000101879"); map.put("手机号", "15000101879");
// map.put("电话", "021-61341606"); map.put("电话", "021-61341606");
// map.put("邮箱", "wanghaiying@zork.com.cn"); map.put("邮箱", "wanghaiying@zork.com");
// map.put("住址", "上海市浦东新区碧波路690号"); map.put("住址", "上海市浦东新区碧波路690号1弄");
// map.put("ip地址", "192.168.70.2"); map.put("住址2", "上海市浦东新区张江微电子港304-2室");
// map.put("mac地址", "3c-78-43-25-80-bd"); map.put("ip地址", "192.168.70.2");
map.put("message", "王海鹰-372925199008075158-15000101879"); map.put("mac地址", "3c-78-43-25-80-bd");
System.out.println(mask(map)); map.put("message", "王海鹰,372925199008075158#15000101879");
map.put("messid", "0000011404342B32233DDCDA");
System.out.println(maskUtil.mask(map));
// String mobile = "15000101879"; // String mobile = "15000101879";
// //
......
package com.zorkdata.datamask.util;
import org.apache.flink.api.java.utils.ParameterTool;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Map;
/**
* @author 谢森
* @Description 参数读取工具类
* @Email xiesen@zork.com.cn
*/
public class ZorkParameterUtil {
private static final Logger logger = LoggerFactory.getLogger(com.zorkdata.datamask.util.ZorkParameterUtil.class);
/**
* 读取参数
*
* @param args 参数名称
* 这里默认使用 configPath 参数来标识配置文件的路径
* @return
*/
public static Map<String, String> readParameter(String[] args) {
Map<String, String> conf = null;
String configPath;
try {
ParameterTool parameterTool = ParameterTool.fromArgs(args);
configPath = parameterTool.get("configPath");
} catch (Exception e) {
// configPath = "/etc/flinkConfig.yaml";
configPath = "D:\\zork\\transactionLogMask\\src\\main\\resources\\application.yml";
}
logger.info("read config path is " + configPath);
if (!configPath.endsWith("yaml")) {
System.err.println("Please input correct configuration file and flink run mode!");
System.exit(-1);
} else {
conf = LoadConf.LoadYaml(configPath);
if (conf == null) {
logger.error("配置文件" + args[0] + "不存在,系统退出");
System.exit(-1);
}
}
return conf;
}
}
# 不做脱敏的字段白名单
fieldsWhiteList=fundid,custid,orgid,brhid,secuid,bankcode,market,ordersno,ordergroup,count,poststr,stkcode,bsflag,\
orderamt,price,qty,bankcode,tacode,ofcode,transacc,taacc
# 脱敏用的正则表达式
# 姓名正则
nameRegExp = "[\一-龥]{1,20}|[a-zA-Z\\.\\s]{1,20}"
# 手机号正则
mobileRegExp = "(13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}"
# 电话号码正则
phoneRegExp = "(\\d{3,4}-)?\\d{6,8}"
# 邮箱正则
emailRegExp = "\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*"
# 身份证号码(15位)正则
idRegExp15 = "[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}"
#身份证号码(18位)正则
idRegExp18 = "[1-9]\\d{5}[1-9]\\d{3}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}([0-9Xx])"
# 家庭住址正则
addressRegExp = "([\一-\龥A-Za-z0-9_]+(省|市|区|县|道|路|街|号|弄|条|室)){2,}"
# ip地址正则
ipRegExp = "((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)"
# mac地址正则
macRegExp = "([A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}"
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment