package com.zorkdata.datamask;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.TypeReference;
import com.zorkdata.datamask.domain.LogData;
import com.zorkdata.datamask.util.MaskUtil;
import com.zorkdata.datamask.util.avro.AvroSerializerFactory;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.mapred.AvroInputFormat;
import org.apache.avro.mapred.AvroTextOutputFormat;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.hadoop.mapred.HadoopInputFormat;
import org.apache.flink.api.java.hadoop.mapred.HadoopOutputFormat;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.operators.FlatMapOperator;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.formats.avro.AvroOutputFormat;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.fs.bucketing.BucketingSink;
import org.apache.flink.streaming.connectors.fs.bucketing.DateTimeBucketer;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.util.Collector;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputFormat;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.text.SimpleDateFormat;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Properties;

/**
 * Description : 国泰交易日志脱敏job
 *
 * @author : wanghaiying (<a href="wanghaiying@zorkdata.com.cn">wanghaiying@zorkdata.com.cn</a>)
 * Date : Create in 2020/9/18 17:35
 */
public class TransactionLogMask {
    public static void main(String[] args) throws Exception {
        ParameterTool params = ParameterTool.fromArgs(args);
        String source = params.get("source", "hdfs");

        if ("hdfs".equals(source)) {
            maskHdfsLog(params);
        } else if ("kafka".equals(source)) {
            maskKafkaLog(params);
        }
    }

    /**
     * hdfs日志文件脱敏
     *
     * @param params 请求参数
     * @return void
     */
    public static void maskHdfsLog(ParameterTool params) throws Exception {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        JobConf jobConf = new JobConf();

        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
        String hdfsSrc = params.get("hdfs-src");
        String hdfsDest = params.get("hdfs-dest");
        String core = params.get("core", "c1");
        String date = params.get("date", sdf.format(new Date()));
        String startTime = params.get("startTime");
        String endTime = params.get("endTime");

//        List<String> logFiles = filterHdfsLogFiles(hdfsSrc, date, startTime, endTime);
        List<String> logFiles = new ArrayList<String>() {
            {
                add(hdfsSrc);
            }
        };

        for (String logFile : logFiles) {
            HadoopInputFormat<Object, Object> hadoopInputFormat = new HadoopInputFormat<Object, Object>
                    (new AvroInputFormat(), Object.class, Object.class, jobConf);

            AvroInputFormat.addInputPath(hadoopInputFormat.getJobConf(), new Path(logFile));
            DataSource<Tuple2<Object, Object>> input = env.createInput(hadoopInputFormat);

            FlatMapOperator<Tuple2<Object, Object>, Object> map = input.flatMap(new FlatMapFunction<Tuple2<Object, Object>, Object>() {
                @Override
                public void flatMap(Tuple2<Object, Object> value, Collector<Object> collector) throws Exception {
                    collector.collect(value.f0);
                }
            });

            /**
             * 脱敏算子
             */
            FlatMapOperator<Tuple2<Object, Object>, Object> maskFlatMapOperator = input.flatMap(new FlatMapFunction<Tuple2<Object, Object>, Object>() {
                @Override
                public void flatMap(Tuple2<Object, Object> value, Collector<Object> out) throws Exception {
                    LogData logData = JSON.parseObject(value.getField(0).toString(), new TypeReference<LogData>() {
                    });
                    logData.setNormalFields(MaskUtil.mask(logData.getNormalFields()));
                    out.collect(logData);
                }
            });


            FlatMapOperator<Object, String> objectStringFlatMapOperator = maskFlatMapOperator.flatMap(new FlatMapFunction<Object, String>() {
                @Override
                public void flatMap(Object value, Collector<String> collector) throws Exception {
                    LogData logData = (LogData) value;
                    collector.collect(JSON.toJSONString(logData));
                }
            });

//            objectStringFlatMapOperator.print();

            String logFileName = logFile.split("/")[logFile.split("/").length - 1];

            String filePath = hdfsDest + logFileName;
            System.out.println("---------------writepath-----------------:" + filePath);
            objectStringFlatMapOperator.writeAsText(filePath, org.apache.flink.core.fs.FileSystem.WriteMode.OVERWRITE).name("hadoop-sink");

            try {
                env.execute("国泰交易日志脱敏job");
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * kafka消息数据脱敏
     *
     * @param params 请求参数
     * @return void
     */
    public static void maskKafkaLog(ParameterTool params) {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
        String servers = params.get("servers");
        String zookeeper = params.get("zookeeper");
        String topic = params.get("topic");
        String hdfsDest = params.get("hdfs-dest");
        String core = params.get("core", "c1");
        String date = params.get("date", sdf.format(new Date()));
        String startTime = params.get("startTime");
        String endTime = params.get("endTime");

        Properties props = new Properties();
        props.put("bootstrap.servers", servers);
        props.put("zookeeper.connect", zookeeper);
        props.put("group.id", "group1");
        props.put("enable.auto.commit", false);
        props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        props.put("auto.offset.reset", "earliest");
        props.put("max.poll.records", 1000);
        SingleOutputStreamOperator<String> dataStreamSource = env.addSource(new FlinkKafkaConsumer<>(topic, new SimpleStringSchema(), props)).setParallelism(1);

        // TODO 根据date、startTime、endTime过滤
        BucketingSink<String> hdfsSink = new BucketingSink<>(hdfsDest);
        //创建一个按照时间创建目录的bucketer,默认是yyyy-MM-dd--HH，时区默认是美国时间。这里我都改了，一天创建一次目录，上海时间
        hdfsSink.setBucketer(new DateTimeBucketer<String>("yyyy-MM-dd", ZoneId.of("Asia/Shanghai")));
        //设置每个文件的最大大小 ,默认是384M(1024 * 1024 * 384)
        hdfsSink.setBatchSize(1024 * 1024 * 384);
        //设置多少时间，就换一个文件写
        hdfsSink.setBatchRolloverInterval(1000 * 60 * 60);
        hdfsSink.setPendingSuffix("ccc");
        hdfsSink.setInactiveBucketThreshold(60 * 1000L);
        hdfsSink.setInactiveBucketCheckInterval(60 * 1000L);
        hdfsSink.setAsyncTimeout(60 * 1000);
        dataStreamSource.addSink(hdfsSink);
        try {
            env.execute("国泰交易日志脱敏job");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 过滤hdfs日志文件
     *
     * @param hdfs      hdfs地址
     * @param date      日期
     * @param startTime 起始时间
     * @param endTime   结束时间
     * @return hdfs文件列表
     */
    private static List<String> filterHdfsLogFiles(String hdfs, String date, String startTime, String endTime) {
        if (!hdfs.endsWith("/")) {
            hdfs += "/";
        }
        String path = hdfs;
        if (null != date) {
            path = hdfs + date;
        }
        Configuration conf = new Configuration();
//        conf.set("dfs.replication", "3");
//        conf.set("fs.defaultFS", "hdfs://cdh-2:8020");
//        conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
//        conf.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER");
        System.out.println("---------------conf-----------------:" + conf.toString());

        List<String> logFiles = new ArrayList<>();
        try {
//            FileSystem fileSystem = FileSystem.get(conf);
            FileSystem fileSystem = null;
            try {
                fileSystem = FileSystem.get(new URI("hdfs://cdh-2:8020/"), conf, "hdfs");
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator = fileSystem.listFiles(new Path(path), false);
            while (locatedFileStatusRemoteIterator.hasNext()) {
                LocatedFileStatus next = locatedFileStatusRemoteIterator.next();
                Path path1 = next.getPath();
                System.out.println("---------------path1-----------------:" + path1.toString());
                logFiles.add(path1.toString());
            }
        } catch (IOException e) {
            e.printStackTrace();
        } catch (URISyntaxException e) {
            e.printStackTrace();
        }
        return logFiles;
    }

    private static LogData avroDeserialize(byte[] data) throws IOException {
        DatumReader<LogData> reader = new SpecificDatumReader<LogData>(LogData.class);
        Decoder decoder = DecoderFactory.get().binaryDecoder(data, null);
        LogData transactionLog = reader.read(null, decoder);
        System.out.println(transactionLog);
        return transactionLog;
    }
}