Commit 7582bbaa authored by 王海鹰's avatar 王海鹰

<dev> 设置从配置文件中读取参数

parent 1e097d43
......@@ -63,6 +63,14 @@ public class TransactionLogMask {
private static Long startTime;
private static Long endTime;
private static String namePattern = "";
private static String mobilePattern = "";
private static String phonePattern = "";
private static String emailPattern = "";
public static void main(String[] args) throws Exception {
if ("hdfs".equals(source)) {
maskHdfsLog(args);
......@@ -126,15 +134,14 @@ public class TransactionLogMask {
LogData logData = JSON.parseObject(value.getField(0).toString(), new TypeReference<LogData>() {
});
//根据日志事件的核心信息做过滤
if (null != core && logData.getDimensions().get("hostname").indexOf("c9") == -1) {
return;
}
//根据日志事件的timestamp做过滤
Long timestamp = utc2timestamp(logData.getTimestamp());
if (null != timestamp && timestamp > startTime && timestamp < endTime || Boolean.TRUE) {
Map maskResult = MaskUtil.mask(logData.getNormalFields());
logData.setNormalFields(maskResult);
collector.collect(logData);
if (null != core && logData.getDimensions().get("hostname").indexOf("c9") > -1) {
//根据日志事件的timestamp做过滤
Long timestamp = utc2timestamp(logData.getTimestamp());
if (null != timestamp && timestamp > startTime && timestamp < endTime || Boolean.TRUE) {
Map maskResult = MaskUtil.mask(logData.getNormalFields());
logData.setNormalFields(maskResult);
collector.collect(logData);
}
}
}
});
......
......@@ -12,6 +12,12 @@ public interface Constants {
String HDFS_DEST = "hdfs_dest";
String CORE = "core";
String DATE = "date";
String START_TIME = "startTime";
String END_TIME = "endTime";
String START_TIME = "start_time";
String END_TIME = "end_time";
String NAME_REG_EXP = "name_reg_exp";
String MOBILE_REG_EXP = "mobile_reg_exp";
String PHONE_REG_EXP = "phone_reg_exp";
String EMAIL_REG_EXP = "email_reg_exp";
}
......@@ -18,7 +18,11 @@ import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificDatumWriter;
import java.io.*;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.TimeZone;
/**
* Description :
......@@ -146,7 +150,7 @@ public class AvroTest {
Schema schema = new Schema.Parser().parse(new File("d:\\log.avro"));
GenericRecord emp = new GenericData.Record(schema);
File file = new File("d:\\part-0-0.avro");
File file = new File("d:\\1 (1).avro");
DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(file, datumReader);
......@@ -156,5 +160,20 @@ public class AvroTest {
emp = dataFileReader.next();
System.out.println(emp);
}
// Long aLong = utc2Local("2020-09-29T09:36:20.626+08:00");
// System.out.println(aLong);
}
public static Long utc2Local(String utcTime) {
SimpleDateFormat utcFormater = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS+08:00");
utcFormater.setTimeZone(TimeZone.getTimeZone("UTC"));//时区定义并进行时间获取
Date gpsUTCDate = null;
try {
gpsUTCDate = utcFormater.parse(utcTime);
} catch (ParseException e) {
System.out.println("时间戳格式转换异常:" + utcTime + e.getMessage());
}
return gpsUTCDate.getTime();
}
}
......@@ -20,7 +20,7 @@ public class MaskUtil {
/**
* 姓名正则
*/
static Pattern namePattern = Pattern.compile("([\\u4e00-\u9fa5]{1,20}|[a-zA-Z\\.\\s]{1,20})");
static Pattern namePattern = Pattern.compile("([\\u4e00-\\u9fa5]{1,20}|[a-zA-Z\\.\\s]{1,20})");
// Pattern namePattern = Pattern.compile(maskRegexConfig.getNameRegExp());
/**
* 手机号正则
......
......@@ -34,7 +34,7 @@ public class ZorkParameterUtil {
logger.info("read config path is " + configPath);
if (!configPath.endsWith("yaml")) {
if (!configPath.endsWith("yml")) {
System.err.println("Please input correct configuration file and flink run mode!");
System.exit(-1);
} else {
......
# 不做脱敏的字段白名单
fieldsWhiteList=fundid,custid,orgid,brhid,secuid,bankcode,market,ordersno,ordergroup,count,poststr,stkcode,bsflag,\
orderamt,price,qty,bankcode,tacode,ofcode,transacc,taacc
# 脱敏用的正则表达式
# 姓名正则
nameRegExp = "[\一-龥]{1,20}|[a-zA-Z\\.\\s]{1,20}"
# 手机号正则
mobileRegExp = "(13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}"
# 电话号码正则
phoneRegExp = "(\\d{3,4}-)?\\d{6,8}"
# 邮箱正则
emailRegExp = "\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*"
# 身份证号码(15位)正则
idRegExp15 = "[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}"
#身份证号码(18位)正则
idRegExp18 = "[1-9]\\d{5}[1-9]\\d{3}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}([0-9Xx])"
# 家庭住址正则
addressRegExp = "([\一-\龥A-Za-z0-9_]+(省|市|区|县|道|路|街|号|弄|条|室)){2,}"
# ip地址正则
ipRegExp = "((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)"
# mac地址正则
macRegExp = "([A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}"
\ No newline at end of file
# 日志来源,支持hdfs和kafka,必传
source: "hdfs"
# hdfs日志源文件地址,若source为hdfs,则该地址必传
hdfs_src: "hdfs://cdh-2:8020/tmp/datawarehouse4/jzjy/kcbp_biz_log"
# hdfs日志写入地址,非必传,默认写到hdfs-src目录下的output目录下
hdfs_dest: "hdfs://cdh-2:8020/tmp/datawarehouse/jzjy/kcbp_biz_log/output2/"
# 脱敏结果下载到的本地路径
download_path: "/tmp"
# 交易日志的“核心”信息,值以c开头、后面是数字序号,非必传
core: "c9"
# 查询日志日期(默认为当天),非必传
date: 20200929
# 查询日志起始时间戳,非必传
start_time: 1601348849900
# 查询日志结束时间戳,非必传
end_time: 1601348850000
# 不做脱敏的字段白名单
fieldsWhiteList=fundid,custid,orgid,brhid,secuid,bankcode,market,ordersno,ordergroup,count,poststr,stkcode,bsflag,\
orderamt,price,qty,bankcode,tacode,ofcode,transacc,taacc
fieldsWhiteList: "fundid,custid,orgid,brhid,secuid,bankcode,market,ordersno,ordergroup,count,poststr,stkcode,bsflag,\
orderamt,price,qty,bankcode,tacode,ofcode,transacc,taacc"
# 脱敏用的正则表达式
# 姓名正则
nameRegExp = "[\一-龥]{1,20}|[a-zA-Z\\.\\s]{1,20}"
nameRegExp: "[\u4e00-\u9fa5]{1,20}|[a-zA-Z\\.\\s]{1,20}"
# 手机号正则
mobileRegExp = "(13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}"
mobileRegExp: "(13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}"
# 电话号码正则
phoneRegExp = "(\\d{3,4}-)?\\d{6,8}"
phoneRegExp: "(\\d{3,4}-)?\\d{6,8}"
# 邮箱正则
emailRegExp = "\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*"
emailRegExp: "\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*"
# 身份证号码(15位)正则
idRegExp15 = "[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}"
idRegExp15: "[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}"
#身份证号码(18位)正则
idRegExp18 = "[1-9]\\d{5}[1-9]\\d{3}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}([0-9Xx])"
idRegExp18: "[1-9]\\d{5}[1-9]\\d{3}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}([0-9Xx])"
# 家庭住址正则
addressRegExp = "([\一-\龥A-Za-z0-9_]+(省|市|区|县|道|路|街|号|弄|条|室)){2,}"
addressRegExp: "([\u4E00-\u9FA5A-Za-z0-9_]+(省|市|区|县|道|路|街|号|弄|条|室)){2,}"
# ip地址正则
ipRegExp = "((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)"
ipRegExp: "((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)"
# mac地址正则
macRegExp = "([A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}"
\ No newline at end of file
macRegExp: "([A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}"
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment