Commit 01a8a8e5 authored by 王海鹰's avatar 王海鹰

init

parents
# Default ignored files
/shelf/
/workspace.xml
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
# Editor-based HTTP Client requests
/httpRequests/
This diff is collapsed.
This diff is collapsed.
package com.zorkdata.datamask.domain;
import lombok.Data;
import org.joda.time.DateTime;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
/**
* @author wanghaiying
* @Description LogData
* @Email wanghaiying@zork.com.cn
* @Date 2020/9/25 10:00
*/
@Data
@SuppressWarnings("all")
public class LogData implements Serializable {
private static final long serialVersionUID = 1L;
/**
* logTypeName 日志类型
*/
private String logTypeName;
/**
* timestamp 时间戳
*/
private String timestamp;
/**
* source
*/
private String source;
/**
* offset 偏移量
*/
private String offset;
/**
* dimensions 维度
*/
private Map<String, String> dimensions;
/**
* measures
*/
private Map<String, Double> measures;
/**
* normalFields
*/
private Map<String, String> normalFields;
// public LogData() {
// }
// public String getLogTypeName() {
// return logTypeName;
// }
//
// public void setLogTypeName(String logTypeName) {
// this.logTypeName = logTypeName;
// }
//
// public String getTimestamp() {
// return timestamp;
// }
//
// public void setTimestamp(String timestamp) {
// this.timestamp = timestamp;
// }
//
// public String getSource() {
// return source;
// }
//
// public void setSource(String source) {
// this.source = source;
// }
//
// public String getOffset() {
// return offset;
// }
//
// public void setOffset(String offset) {
// this.offset = offset;
// }
//
// public Map<String, String> getDimensions() {
// return dimensions;
// }
//
// public void setDimensions(Map<String, String> dimensions) {
// this.dimensions = new HashMap<>(50);
// for (Map.Entry entry : dimensions.entrySet()) {
// this.dimensions.put(String.valueOf(entry.getKey()), String.valueOf(entry.getValue()));
// }
// }
//
// public Map<String, Double> getMeasures() {
// return measures;
// }
//
// public void setMeasures(Map<String, Double> measures) {
// this.measures = new HashMap<>(50);
// for (Map.Entry entry : measures.entrySet()) {
// this.measures.put(String.valueOf(entry.getKey()), Double.valueOf(String.valueOf(entry.getValue())));
// }
// }
//
// public Map<String, String> getNormalFields() {
// return normalFields;
// }
//
// public void setNormalFields(Map<String, String> normalFields) {
// this.normalFields = new HashMap<>(50);
// for (Map.Entry entry : normalFields.entrySet()) {
// this.normalFields.put(String.valueOf(entry.getKey()), String.valueOf(entry.getValue()));
// }
// }
@Override
public String toString() {
return new DateTime(timestamp).toDate().getTime() + " ZorkLogData{" + "logTypeName='" + logTypeName + '\'' + ", timestamp='" + timestamp + '\'' + ", source='"
+ source + '\'' + ", offset='" + offset + '\'' + ", dimensions=" + dimensions + ", measures=" + measures
+ ", normalFields=" + normalFields + '}';
}
}
This diff is collapsed.
{
"namespace": "com.zork.logs",
"type": "record",
"name": "logs",
"fields": [
{
"name": "logTypeName",
"type": [
"string",
"null"
]
},
{
"name": "timestamp",
"type": [
"string",
"null"
]
},
{
"name": "source",
"type": [
"string",
"null"
]
},
{
"name": "offset",
"type": [
"string",
"null"
]
},
{
"name": "dimensions",
"type": [
"null",
{
"type": "map",
"values": "string"
}
]
},
{
"name": "measures",
"type": [
"null",
{
"type": "map",
"values": "double"
}
]
},
{
"name": "normalfields",
"type": [
"null",
{
"type": "map",
"values": "string"
}
]
}
]
}
package com.zorkdata.datamask.function;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.TypeReference;
import com.zorkdata.datamask.domain.LogData;
import com.zorkdata.datamask.util.avro.AvroDeserializer;
import com.zorkdata.datamask.util.avro.AvroDeserializerFactory;
import lombok.extern.slf4j.Slf4j;
import org.apache.avro.generic.GenericRecord;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.RichFlatMapFunction;
import org.apache.flink.util.Collector;
/**
* @author xiese
* @Description Avro2StrFlatMapFunction
* @Email xiesen310@163.com
* @Date 2020/9/26 23:14
*/
@Slf4j
public class Avro2StrFlatMapFunction implements FlatMapFunction<String, LogData> {
@Override
public void flatMap(String value, Collector<LogData> out) throws Exception {
try {
if (null != value) {
AvroDeserializer logsDeserializer = AvroDeserializerFactory.getLogsDeserializer();
GenericRecord record = logsDeserializer.deserializing(value.getBytes());
// System.out.println("----------record---------"+record);
if (null != record) {
LogData logData = JSON.parseObject(record.toString(), new TypeReference<LogData>() {
});
// System.out.println("----------logData---------"+logData);
// out.collect(JSON.toJSONString(logData));
out.collect(logData);
}
}
} catch (Exception e) {
log.error("avro 反序列化失败,错误信息: {}", e.getMessage(), e);
}
}
}
package com.zorkdata.datamask.util;
import com.alibaba.fastjson.JSON;
import com.zorkdata.datamask.domain.LogData;
import com.zorkdata.datamask.domain.TransactionLog;
import com.zorkdata.datamask.util.avro.AvroDeserializer;
import com.zorkdata.datamask.util.avro.AvroDeserializerFactory;
import com.zorkdata.datamask.util.avro.AvroSerializerFactory;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificDatumWriter;
import java.io.*;
import java.util.HashMap;
/**
* Description :
* https://www.cnblogs.com/fillPv/p/5009737.html
* @author : wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* Date : Create in 2020/9/23 4:43
*/
// java -jar avro-tools-1.10.0.jar compile schema log.avro .
//
public class AvroTest {
public static void main(String[] args) {
// Avro序列化,写avro文件
// TransactionLog transactionLog = new TransactionLog();
LogData transactionLog = new LogData();
transactionLog.setLogTypeName("kcbp_biz_log");
transactionLog.setTimestamp("2020-09-18T13:59:53.000+08:00");
transactionLog.setSource("d:\\\\kcbp\\\\log\\\\run\\\\20200918\\\\runlog23.log");
transactionLog.setOffset("165683111");
HashMap dimensions = new HashMap() {{
put("appsystem", "jzjy");
put("appprogramname", "jzc9-kcbp1_9600");
put("hostname", "jzc9-kcbp1");
put("func", "");
put("nodeid", "");
put("operway", "W");
}};
transactionLog.setDimensions(dimensions);
HashMap measures = new HashMap<String, Double>() {{
put("latence", 0.0);
put("latency", 1.0);
put("spendtime", 0.5);
}};
transactionLog.setMeasures(measures);
HashMap normalFields = new HashMap() {{
put("indexTime", "2020-09-18T13:59:54.524+08:00");
put("bsflag", "");
put("productcode", "");
put("developercode", "");
put("fmillsecond", "");
put("inputtype", "");
put("logchecktime", "");
put("message", "身份证号码:372925199008075158,地址:上海浦东新区张江高科碧波路690号,手机号:15000101879,邮箱:wanghaiying@zork.com.cn");
put("end_logtime", "");
put("smillsecond", "585606599");
put("featurecode", "");
put("orgid", "");
put("authcode", "");
put("collecttime", "2020-09-18T13:59:53.529+08:00");
put("fundid", "");
put("deserializerTime", "2020-09-18T13:59:53.671+08:00");
put("messid", "0000011404342B32233DDCDA");
put("custid", "");
put("netputr", "");
put("versioninfo", "");
put("beg_logtime", "20200918-135953");
put("authinfo", "");
}};
// transactionLog.setNormalfields(normalFields);
transactionLog.setNormalFields(normalFields);
// String path = "d:\\transactionlog-20200925.avro"; // avro文件存放目录
// DatumWriter<TransactionLog> logDatumWriter = new SpecificDatumWriter<>(TransactionLog.class);
// DataFileWriter<TransactionLog> dataFileWriter = new DataFileWriter<>(logDatumWriter);
// dataFileWriter.create(transactionLog.getSchema(), new File(path));
// // 把生成的对象写入到avro文件
// dataFileWriter.append(transactionLog);
// dataFileWriter.append(transactionLog);
// dataFileWriter.append(transactionLog);
// dataFileWriter.close();
/**
* 序列化
*/
byte[] kcbp_biz_logs = AvroSerializerFactory.getLogAvroSerializer().serializingLog("kcbp_biz_log", "2020-09-18T13:59:53.000+08:00",
"d:\\\\kcbp\\\\log\\\\run\\\\20200918\\\\runlog23.log", "165683111", dimensions, measures, normalFields);
// FileOutputStream fos = null;
// try {
// fos = new FileOutputStream("d:\\transactionlog-20200929.avro");
// } catch (FileNotFoundException e) {
// e.printStackTrace();
// }
// try {
// fos.write(kcbp_biz_logs,0,kcbp_biz_logs.length);
// fos.flush();
// fos.close();
// } catch (IOException e) {
// e.printStackTrace();
// }
/**
* 反序列化
*/
// File file = new File("d:\\zork\\part-0-0.avro");
File file = new File("c:\\part-0-0.avro");
// File file = new File("d:\\hdfs-transactionlog-20200929.avro");
byte[] byteBuffer = new byte[(int) file.length()];
FileInputStream fileInputStream = null;
try {
// fileInputStream = new FileInputStream("d:\\zork\\part-0-0.avro");
fileInputStream = new FileInputStream("c:\\part-0-0.avro");
// fileInputStream = new FileInputStream("d:\\hdfs-transactionlog-20200929.avro");
} catch (FileNotFoundException e) {
e.printStackTrace();
}
try {
fileInputStream.read(byteBuffer);
} catch (IOException e) {
e.printStackTrace();
}
GenericRecord genericRecord = AvroDeserializerFactory.getLogsDeserializer().deserializing(byteBuffer);
System.out.println(genericRecord);
// 读取avro文件,反序列化
// DatumReader<TransactionLog> reader = new SpecificDatumReader<TransactionLog>(TransactionLog.class);
// DataFileReader<TransactionLog> dataFileReader = new DataFileReader<TransactionLog>(new File("d:\\transactionlog-20200925.avro"), reader);
//// DataFileReader<TransactionLog> dataFileReader = new DataFileReader<TransactionLog>(new File("D:\\test.avro"), reader);
// TransactionLog transactionLogRead = null;
// while (dataFileReader.hasNext()) {
// transactionLogRead = dataFileReader.next();
// System.out.println(transactionLogRead);
// }
}
}
package com.zorkdata.datamask.util;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Description :
*
* @author : wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
* Date : Create in 2020/9/23 9:30
* RegularExpression
*/
public class MaskUtil {
/**
* 姓名正则
*/
static Pattern namePattern = Pattern.compile("^([\\u4e00-\u9fa5]{1,20}|[a-zA-Z\\.\\s]{1,20})$");
/**
* 手机号正则
*/
static Pattern mobilePattern = Pattern.compile("^((13[0-9])|(14[5,7])|(15[0-3,5-9])|(17[0,3,5-8])|(18[0-9])|(147))\\d{8}$");
/**
* 电话号码正则
*/
static Pattern phonePattern = Pattern.compile("^(\\d{3,4}-)?\\d{6,8}$");
/**
* 邮箱正则
*/
static Pattern emailPattern = Pattern.compile("^\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*$");
/**
* 身份证号码(15位)正则
*/
// static Pattern idPattern15 = Pattern.compile("\\d{17}[0-9Xx]|\\d{15}");
static Pattern idPattern15 = Pattern.compile("^[1-9]\\d{7}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}$");
/**
* 身份证号码(18位)正则
*/
static Pattern idPattern18 = Pattern.compile("^[1-9]\\d{5}[1-9]\\d{3}((0\\d)|(1[0-2]))(([0|1|2]\\d)|3[0-1])\\d{3}([0-9Xx])$");
/**
* 家庭住址正则
*/
static Pattern addressPattern = Pattern.compile("^([\\u4E00-\\u9FA5A-Za-z0-9_]+(省|市|区|县|道|路|街|号)){2,}$");
/**
* ip地址正则
*/
// static Pattern ipPattern = Pattern.compile("^((\\d|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5]|[*])\\.){3}(\\d|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5]|[*])$");
static Pattern ipPattern = Pattern.compile("((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)");
static Pattern macPattern = Pattern.compile("([A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}");
static List<Pattern> patterns = new ArrayList<Pattern>(){{
add(namePattern);
add(mobilePattern);
add(phonePattern);
add(emailPattern);
add(idPattern15);
add(idPattern18);
add(addressPattern);
add(ipPattern);
add(macPattern);
}};
public static Map mask(Map map){
map.forEach((k, v) -> {
String value = v.toString();
for(Pattern pattern:patterns){
Matcher matcher = pattern.matcher(v.toString());
if (matcher.matches()){
String replaceStr = "";
for(int i=0; i < matcher.group().length(); i++){
replaceStr = replaceStr.concat("*");
}
System.out.println(replaceStr);
value = value.replace(matcher.group(), replaceStr);
}
}
map.put(k, value);
});
return map;
}
public static void main(String[] args) {
Map map = new HashMap();
map.put("姓名", "王海鹰");
map.put("身份证号", "372925199008075158");
map.put("手机号", "15000101879");
map.put("电话", "021-61341606");
map.put("邮箱", "wanghaiying@zork.com.cn");
map.put("住址", "上海市浦东新区碧波路690号");
map.put("ip地址", "192.168.70.2");
map.put("mac地址", "3c-78-43-25-80-bd");
System.out.println(mask(map));
// String mobile = "15000101879";
//
// Pattern pattern = Pattern.compile("(13\\d|14[579]|15[^4\\D]|17[^49\\D]|18\\d)\\d{8}");
// Matcher m = pattern.matcher(mobile);
////
// System.out.println(m.matches());
// if(m.matches()){
// System.out.println(m.group());
//
// String replaceStr = "";
// for(int i=0; i < m.group().length(); i++){
// replaceStr = replaceStr.concat("*");
// }
// System.out.println(replaceStr);
// mobile = mobile.replaceAll(m.group(), replaceStr);
// System.out.println(mobile);
// }
}
}
package com.zorkdata.datamask.util.avro;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author xiesen
* @Description 反序列化
* @Email xiesen310@163.com
* @Date 2020/6/28 9:31
*/
public class AvroDeserializer {
private static final Logger LOGGER = LoggerFactory.getLogger(AvroDeserializer.class);
public JSONObject jsonObject;
public JSONArray jsonArray;
public Schema schema;
public String[] keys;
public AvroDeserializer(String schema) {
getKeysFromjson(schema);
}
/**
* @param schema:Avro序列化所使用的schema
* @return void 返回类型
* @throws
* @Title: getKeysFromjson
* @Description:用于获取Avro的keys
*/
void getKeysFromjson(String schema) {
this.jsonObject = JSONObject.parseObject(schema);
this.schema = new Schema.Parser().parse(schema);
this.jsonArray = this.jsonObject.getJSONArray("fields");
this.keys = new String[this.jsonArray.size()];
for (int i = 0; i < this.jsonArray.size(); i++) {
this.keys[i] = this.jsonArray.getJSONObject(i).get("name").toString();
}
}
/**
* @param body 参数:byte[] body:kafka消息。
* @param @return 设定文件
* @return String 返回类型
* @throws
* @Title: deserializing
* @Description: 用于Avro的反序列化。
*/
public GenericRecord deserializing(byte[] body) {
DatumReader<GenericData.Record> datumReader = new GenericDatumReader<GenericData.Record>(this.schema);
Decoder decoder = DecoderFactory.get().binaryDecoder(body, null);
GenericData.Record result = null;
try {
result = datumReader.read(null, decoder);
} catch (Exception e) {
LOGGER.error(String.format("error Avro反序列化"), e);
}
return result;
}
}
package com.zorkdata.datamask.util.avro;
/**
* @author xiesen
* @Description 反序列化工厂类
* @Email xiesen310@163.com
* @Date 2020/6/28 9:31
*/
public class AvroDeserializerFactory {
private static AvroDeserializer logs = null;
private static AvroDeserializer metrics = null;
public static void init() {
logs = null;
metrics = null;
}
/**
* getLogsDeserializer
*
* @return
*/
public static AvroDeserializer getLogsDeserializer() {
if (logs == null) {
logs = new AvroDeserializer(LogAvroMacroDef.metadata);
}
return logs;
}
/**
* getLogsDeserializer
*
* @return
*/
// public static AvroDeserializer getMetricDeserializer() {
// if (metrics == null) {
// metrics = new AvroDeserializer(MetricAvroMacroDef.metadata);
// }
// return metrics;
// }
}
package com.zorkdata.datamask.util.avro;
/**
* @author xiesen
* @Description 序列化工厂类
* @Email xiesen310@163.com
* @Date 2020/6/28 9:32
*/
@SuppressWarnings("all")
public class AvroSerializerFactory {
private static AvroSerializer metricMetadata = null;
private static AvroSerializer logMetadata = null;
public static AvroSerializer getLogAvroSerializer() {
if (logMetadata == null) {
logMetadata = new AvroSerializer(LogAvroMacroDef.metadata);
}
return logMetadata;
}
// public static AvroSerializer getMetricAvroSerializer() {
// if (metricMetadata == null) {
// metricMetadata = new AvroSerializer(MetricAvroMacroDef.metadata);
// }
// return metricMetadata;
// }
}
package com.zorkdata.datamask.util.avro;
/**
* @author xiesen
* @Description 日志集 schema 定义
* @Email xiesen310@163.com
* @Date 2020/6/28 9:33
*/
public class LogAvroMacroDef {
public static String metadata = "{\n" +
" \"namespace\": \"com.zork.logs\",\n" +
" \"type\": \"record\",\n" +
" \"name\": \"logs\",\n" +
" \"fields\": [\n" +
" {\n" +
" \"name\": \"logTypeName\",\n" +
" \"type\": [\n" +
" \"string\",\n" +
" \"null\"\n" +
" ]\n" +
" },\n" +
" {\n" +
" \"name\": \"timestamp\",\n" +
" \"type\": [\n" +
" \"string\",\n" +
" \"null\"\n" +
" ]\n" +
" },\n" +
" {\n" +
" \"name\": \"source\",\n" +
" \"type\": [\n" +
" \"string\",\n" +
" \"null\"\n" +
" ]\n" +
" },\n" +
" {\n" +
" \"name\": \"offset\",\n" +
" \"type\": [\n" +
" \"string\",\n" +
" \"null\"\n" +
" ]\n" +
" },\n" +
" {\n" +
" \"name\": \"dimensions\",\n" +
" \"type\": [\n" +
" \"null\",\n" +
" {\n" +
" \"type\": \"map\",\n" +
" \"values\": \"string\"\n" +
" }\n" +
" ]\n" +
" },\n" +
" {\n" +
" \"name\": \"measures\",\n" +
" \"type\": [\n" +
" \"null\",\n" +
" {\n" +
" \"type\": \"map\",\n" +
" \"values\": \"double\"\n" +
" }\n" +
" ]\n" +
" },\n" +
" {\n" +
" \"name\": \"normalFields\",\n" +
" \"type\": [\n" +
" \"null\",\n" +
" {\n" +
" \"type\": \"map\",\n" +
" \"values\": \"string\"\n" +
" }\n" +
" ]\n" +
" }\n" +
" ]\n" +
"}";
}
################################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
log4j.rootLogger=INFO, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment