package org.apache.carbondata.spark.load;

import java.util.Comparator;
import java.util.List;
import org.apache.carbondata.common.logging.LogServiceFactory;
import org.apache.carbondata.converter.SparkDataTypeConverterImpl;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.datastore.row.CarbonRow;
import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.metadata.datatype.StructType;
import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
import org.apache.carbondata.core.segmentmeta.SegmentMetaDataInfo;
import org.apache.carbondata.core.statusmanager.LoadMetadataDetails;
import org.apache.carbondata.core.statusmanager.SegmentStatus;
import org.apache.carbondata.core.util.CarbonProperties;
import org.apache.carbondata.core.util.CarbonTaskInfo;
import org.apache.carbondata.core.util.CarbonUtil;
import org.apache.carbondata.core.util.DataTypeUtil;
import org.apache.carbondata.core.util.TaskMetricsMap;
import org.apache.carbondata.core.util.ThreadLocalTaskInfo;
import org.apache.carbondata.hadoop.CarbonProjection;
import org.apache.carbondata.hadoop.api.CarbonTableOutputFormat;
import org.apache.carbondata.processing.datatypes.GenericDataType;
import org.apache.carbondata.processing.loading.CarbonDataLoadConfiguration;
import org.apache.carbondata.processing.loading.DataField;
import org.apache.carbondata.processing.loading.DataLoadProcessBuilder;
import org.apache.carbondata.processing.loading.FailureCauses;
import org.apache.carbondata.processing.loading.csvinput.CSVInputFormat;
import org.apache.carbondata.processing.loading.model.CarbonLoadModel;
import org.apache.carbondata.processing.sort.sortdata.NewRowComparator;
import org.apache.carbondata.processing.sort.sortdata.NewRowComparatorForNormalDims;
import org.apache.carbondata.processing.sort.sortdata.SortParameters;
import org.apache.carbondata.processing.util.CarbonDataProcessorUtil;
import org.apache.carbondata.processing.util.TableOptionConstant;
import org.apache.carbondata.spark.rdd.CarbonScanRDD;
import org.apache.carbondata.spark.rdd.CarbonScanRDD$;
import org.apache.carbondata.spark.rdd.InsertTaskCompletionListener;
import org.apache.carbondata.spark.rdd.StringArrayRow;
import org.apache.carbondata.spark.util.CommonUtil$;
import org.apache.carbondata.spark.util.Util;
import org.apache.carbondata.store.CarbonRowReadSupport;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.log4j.Logger;
import org.apache.spark.CarbonInputMetrics;
import org.apache.spark.DataSkewRangePartitioner;
import org.apache.spark.SparkContext;
import org.apache.spark.TaskContext$;
import org.apache.spark.broadcast.Broadcast;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.execution.command.ExecutionErrors;
import org.apache.spark.sql.util.SparkSQLUtil$;
import org.apache.spark.sql.util.SparkTypeConverter$;
import org.apache.spark.storage.StorageLevel$;
import org.apache.spark.util.CollectionAccumulator;
import org.apache.spark.util.LongAccumulator;
import org.apache.spark.util.SerializableConfiguration;
import scala.Function1;
import scala.Option;
import scala.Predef$;
import scala.Some;
import scala.Tuple2;
import scala.collection.JavaConverters$;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.collection.mutable.Buffer;
import scala.collection.mutable.Buffer$;
import scala.collection.mutable.Map$;
import scala.collection.mutable.StringBuilder;
import scala.math.Ordering;
import scala.math.PartialOrdering;
import scala.reflect.ClassTag;
import scala.reflect.ClassTag$;
import scala.reflect.package$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.RichInt$;
import scala.runtime.ScalaRunTime$;
import scala.runtime.VolatileObjectRef;

/* compiled from: DataLoadProcessBuilderOnSpark.scala */
/* loaded from: input_file:org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark$.class */
public final class DataLoadProcessBuilderOnSpark$ {
    public static final DataLoadProcessBuilderOnSpark$ MODULE$ = null;
    private final Logger LOGGER;

    static {
        new DataLoadProcessBuilderOnSpark$();
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v0 */
    /* JADX WARN: Type inference failed for: r0v1, types: [java.lang.Throwable] */
    /* JADX WARN: Type inference failed for: r0v5 */
    private DataLoadProcessBuilderOnSpark$RowOrdering$2$ RowOrdering$1$lzycompute(final Comparator comparator, VolatileObjectRef volatileObjectRef) {
        ?? r0 = this;
        synchronized (r0) {
            if (volatileObjectRef.elem == null) {
                volatileObjectRef.elem = new Ordering<Object[]>(comparator) { // from class: org.apache.carbondata.spark.load.DataLoadProcessBuilderOnSpark$RowOrdering$2$
                    private final Comparator rowComparator$1;

                    /* renamed from: tryCompare, reason: merged with bridge method [inline-methods] */
                    public Some m3929tryCompare(Object obj, Object obj2) {
                        return Ordering.class.tryCompare(this, obj, obj2);
                    }

                    public boolean lteq(Object obj, Object obj2) {
                        return Ordering.class.lteq(this, obj, obj2);
                    }

                    public boolean gteq(Object obj, Object obj2) {
                        return Ordering.class.gteq(this, obj, obj2);
                    }

                    public boolean lt(Object obj, Object obj2) {
                        return Ordering.class.lt(this, obj, obj2);
                    }

                    public boolean gt(Object obj, Object obj2) {
                        return Ordering.class.gt(this, obj, obj2);
                    }

                    public boolean equiv(Object obj, Object obj2) {
                        return Ordering.class.equiv(this, obj, obj2);
                    }

                    public Object max(Object obj, Object obj2) {
                        return Ordering.class.max(this, obj, obj2);
                    }

                    public Object min(Object obj, Object obj2) {
                        return Ordering.class.min(this, obj, obj2);
                    }

                    /* renamed from: reverse, reason: merged with bridge method [inline-methods] */
                    public Ordering<Object[]> m3928reverse() {
                        return Ordering.class.reverse(this);
                    }

                    public <U> Ordering<U> on(Function1<U, Object[]> function1) {
                        return Ordering.class.on(this, function1);
                    }

                    public Ordering.Ops mkOrderingOps(Object obj) {
                        return Ordering.class.mkOrderingOps(this, obj);
                    }

                    public int compare(Object[] objArr, Object[] objArr2) {
                        return this.rowComparator$1.compare(objArr, objArr2);
                    }

                    {
                        this.rowComparator$1 = comparator;
                        PartialOrdering.class.$init$(this);
                        Ordering.class.$init$(this);
                    }
                };
            }
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
            r0 = r0;
            return (DataLoadProcessBuilderOnSpark$RowOrdering$2$) volatileObjectRef.elem;
        }
    }

    private Logger LOGGER() {
        return this.LOGGER;
    }

    public Tuple2<String, Tuple2<LoadMetadataDetails, ExecutionErrors>>[] loadDataUsingGlobalSort(SparkSession sparkSession, Option<Dataset<Row>> option, CarbonLoadModel carbonLoadModel, Configuration configuration, CollectionAccumulator<Map<String, SegmentMetaDataInfo>> collectionAccumulator) {
        RDD map;
        RDD mapPartitionsWithIndex;
        VolatileObjectRef zero = VolatileObjectRef.zero();
        boolean z = false;
        if (option.isDefined()) {
            map = ((Dataset) option.get()).rdd();
        } else {
            z = true;
            map = CsvRDDHelper$.MODULE$.csvFileScanRDD(sparkSession, carbonLoadModel, configuration).map(new DataLoadProcessBuilderOnSpark$$anonfun$2(carbonLoadModel.getCsvHeaderColumns().length), ClassTag$.MODULE$.apply(StringArrayRow.class));
        }
        RDD rdd = map;
        SparkContext sparkContext = sparkSession.sparkContext();
        Broadcast broadcast = sparkContext.broadcast(carbonLoadModel, ClassTag$.MODULE$.apply(CarbonLoadModel.class));
        LongAccumulator longAccumulator = sparkContext.longAccumulator("Partial Success Accumulator");
        LongAccumulator longAccumulator2 = sparkContext.longAccumulator("Input Processor Accumulator");
        LongAccumulator longAccumulator3 = sparkContext.longAccumulator("Convert Processor Accumulator");
        LongAccumulator longAccumulator4 = sparkContext.longAccumulator("Sort Processor Accumulator");
        LongAccumulator longAccumulator5 = sparkContext.longAccumulator("Write Processor Accumulator");
        configuration.set(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, sparkSession.sparkContext().appName());
        Broadcast<SerializableConfiguration> broadCastHadoopConf = SparkSQLUtil$.MODULE$.broadCastHadoopConf(sparkContext, configuration);
        if (z) {
            mapPartitionsWithIndex = rdd.mapPartitionsWithIndex(new DataLoadProcessBuilderOnSpark$$anonfun$3(broadcast, longAccumulator2), rdd.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(CarbonRow.class));
        } else {
            RDD mapPartitions = rdd.mapPartitions(new DataLoadProcessBuilderOnSpark$$anonfun$4(broadcast), rdd.mapPartitions$default$2(), ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(Object.class)));
            mapPartitionsWithIndex = mapPartitions.mapPartitionsWithIndex(new DataLoadProcessBuilderOnSpark$$anonfun$5(broadcast, longAccumulator2), mapPartitions.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(CarbonRow.class));
        }
        RDD rdd2 = mapPartitionsWithIndex;
        RDD filter = rdd2.mapPartitionsWithIndex(new DataLoadProcessBuilderOnSpark$$anonfun$6(broadcast, longAccumulator, longAccumulator3, broadCastHadoopConf), rdd2.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(CarbonRow.class)).filter(new DataLoadProcessBuilderOnSpark$$anonfun$7());
        CarbonDataLoadConfiguration createConfiguration = DataLoadProcessBuilder.createConfiguration(carbonLoadModel);
        SortParameters createSortParameters = SortParameters.createSortParameters(createConfiguration);
        Comparator newRowComparator = createSortParameters.getNoDictionaryCount() > 0 ? new NewRowComparator(createSortParameters.getNoDictionarySortColumn(), createSortParameters.getNoDictDataType()) : new NewRowComparatorForNormalDims(createSortParameters.getDimColCount());
        int globalSortPartitions = CarbonDataProcessorUtil.getGlobalSortPartitions(createConfiguration.getDataLoadProperty(CarbonCommonConstants.LOAD_GLOBAL_SORT_PARTITIONS));
        if (globalSortPartitions <= 0) {
            globalSortPartitions = filter.partitions().length;
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        } else if (System.getProperty("useIndexServer") == null) {
            BoxedUnit boxedUnit2 = BoxedUnit.UNIT;
        } else {
            filter.partitions();
        }
        if (globalSortPartitions > 1) {
            filter.persist(StorageLevel$.MODULE$.fromString(CarbonProperties.getInstance().getGlobalSortRddStorageLevel()));
        } else {
            BoxedUnit boxedUnit3 = BoxedUnit.UNIT;
        }
        RDD sortBy = filter.sortBy(new DataLoadProcessBuilderOnSpark$$anonfun$8(), filter.sortBy$default$2(), globalSortPartitions, RowOrdering$1(newRowComparator, zero), package$.MODULE$.classTag(ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(Object.class))));
        sparkContext.runJob(sortBy.mapPartitionsWithIndex(new DataLoadProcessBuilderOnSpark$$anonfun$9(broadcast, longAccumulator4), sortBy.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(CarbonRow.class)), new DataLoadProcessBuilderOnSpark$$anonfun$loadDataUsingGlobalSort$1(carbonLoadModel, collectionAccumulator, broadcast, longAccumulator5, broadCastHadoopConf), ClassTag$.MODULE$.Unit());
        if (globalSortPartitions > 1) {
            filter.unpersist(false);
        } else {
            BoxedUnit boxedUnit4 = BoxedUnit.UNIT;
        }
        LOGGER().info(new StringBuilder().append("Total rows processed in step Input Processor: ").append(longAccumulator2.value()).toString());
        LOGGER().info(new StringBuilder().append("Total rows processed in step Data Converter: ").append(longAccumulator3.value()).toString());
        LOGGER().info(new StringBuilder().append("Total rows processed in step Sort Processor: ").append(longAccumulator4.value()).toString());
        LOGGER().info(new StringBuilder().append("Total rows processed in step Data Writer: ").append(longAccumulator5.value()).toString());
        return updateLoadStatus(carbonLoadModel, longAccumulator);
    }

    public Tuple2<String, Tuple2<LoadMetadataDetails, ExecutionErrors>>[] insertDataUsingGlobalSortWithInternalRow(SparkSession sparkSession, RDD<InternalRow> rdd, CarbonLoadModel carbonLoadModel, Configuration configuration, CollectionAccumulator<Map<String, SegmentMetaDataInfo>> collectionAccumulator) {
        SparkContext sparkContext = sparkSession.sparkContext();
        Broadcast broadcast = sparkContext.broadcast(carbonLoadModel, ClassTag$.MODULE$.apply(CarbonLoadModel.class));
        LongAccumulator longAccumulator = sparkContext.longAccumulator("Partial Success Accumulator");
        LongAccumulator longAccumulator2 = sparkContext.longAccumulator("Sort Processor Accumulator");
        LongAccumulator longAccumulator3 = sparkContext.longAccumulator("Write Processor Accumulator");
        configuration.set(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, sparkSession.sparkContext().appName());
        Broadcast<SerializableConfiguration> broadCastHadoopConf = SparkSQLUtil$.MODULE$.broadCastHadoopConf(sparkContext, configuration);
        CarbonDataLoadConfiguration createConfiguration = DataLoadProcessBuilder.createConfiguration(carbonLoadModel);
        Seq seq = Predef$.MODULE$.refArrayOps(Util.convertToSparkSchemaFromColumnSchema(carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable(), true).fields()).toSeq();
        Seq seq2 = (Seq) seq.map(new DataLoadProcessBuilderOnSpark$$anonfun$10(), Seq$.MODULE$.canBuildFrom());
        scala.collection.mutable.Map<String, GenericDataType<?>> apply = Map$.MODULE$.apply(Nil$.MODULE$);
        CommonUtil$.MODULE$.convertComplexDataType(apply, createConfiguration);
        RDD map = rdd.map(new DataLoadProcessBuilderOnSpark$$anonfun$11(seq, apply), ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(Object.class)));
        int globalSortPartitions = CarbonDataProcessorUtil.getGlobalSortPartitions(createConfiguration.getDataLoadProperty(CarbonCommonConstants.LOAD_GLOBAL_SORT_PARTITIONS));
        if (globalSortPartitions <= 0) {
            globalSortPartitions = rdd.partitions().length;
        }
        if (globalSortPartitions > 1) {
            map.persist(StorageLevel$.MODULE$.fromString(CarbonProperties.getInstance().getGlobalSortRddStorageLevel()));
        } else {
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        }
        int size = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable().getSortColumns().size();
        RDD sortBy = map.sortBy(new DataLoadProcessBuilderOnSpark$$anonfun$12(size), true, globalSortPartitions, GlobalSortHelper$.MODULE$.generateRowComparator((Seq) ((Seq) ((Seq) seq2.take(size)).map(new DataLoadProcessBuilderOnSpark$$anonfun$insertDataUsingGlobalSortWithInternalRow$1(), Seq$.MODULE$.canBuildFrom())).zipWithIndex(Seq$.MODULE$.canBuildFrom())), package$.MODULE$.classTag(ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(Object.class))));
        sparkContext.runJob(sortBy.mapPartitionsWithIndex(new DataLoadProcessBuilderOnSpark$$anonfun$13(carbonLoadModel, longAccumulator2), sortBy.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(CarbonRow.class)), new DataLoadProcessBuilderOnSpark$$anonfun$insertDataUsingGlobalSortWithInternalRow$2(carbonLoadModel, collectionAccumulator, broadcast, longAccumulator3, broadCastHadoopConf), ClassTag$.MODULE$.Unit());
        if (globalSortPartitions > 1) {
            map.unpersist(false);
        } else {
            BoxedUnit boxedUnit2 = BoxedUnit.UNIT;
        }
        LOGGER().info(new StringBuilder().append("Total rows processed in step Sort Processor: ").append(longAccumulator2.value()).toString());
        LOGGER().info(new StringBuilder().append("Total rows processed in step Data Writer: ").append(longAccumulator3.value()).toString());
        return updateLoadStatus(carbonLoadModel, longAccumulator);
    }

    public Object[] getKey(Object[] objArr, int i) {
        Object[] objArr2 = new Object[i];
        System.arraycopy(objArr, 0, objArr2, 0, i);
        return objArr2;
    }

    private Tuple2<String, Tuple2<LoadMetadataDetails, ExecutionErrors>>[] updateLoadStatus(CarbonLoadModel carbonLoadModel, LongAccumulator longAccumulator) {
        LoadMetadataDetails loadMetadataDetails = new LoadMetadataDetails();
        loadMetadataDetails.setLoadName(carbonLoadModel.getSegmentId());
        if (BoxesRunTime.equalsNumObject(longAccumulator.value(), BoxesRunTime.boxToInteger(0))) {
            String stringBuilder = new StringBuilder().append(carbonLoadModel.getTableName()).append("_").append("Success").toString();
            loadMetadataDetails.setSegmentStatus(SegmentStatus.SUCCESS);
            return new Tuple2[]{new Tuple2<>(stringBuilder, new Tuple2(loadMetadataDetails, new ExecutionErrors(FailureCauses.NONE, "")))};
        }
        String stringBuilder2 = new StringBuilder().append(carbonLoadModel.getTableName()).append("_").append("Partial_Success").toString();
        loadMetadataDetails.setSegmentStatus(SegmentStatus.LOAD_PARTIAL_SUCCESS);
        ExecutionErrors executionErrors = new ExecutionErrors(FailureCauses.NONE, "");
        executionErrors.failureCauses_$eq(FailureCauses.BAD_RECORDS);
        return new Tuple2[]{new Tuple2<>(stringBuilder2, new Tuple2(loadMetadataDetails, executionErrors))};
    }

    public Tuple2<String, Tuple2<LoadMetadataDetails, ExecutionErrors>>[] loadDataUsingRangeSort(SparkSession sparkSession, CarbonLoadModel carbonLoadModel, Configuration configuration, CollectionAccumulator<Map<String, SegmentMetaDataInfo>> collectionAccumulator) {
        SparkContext sparkContext = sparkSession.sparkContext();
        Broadcast<CarbonLoadModel> broadcast = sparkContext.broadcast(carbonLoadModel, ClassTag$.MODULE$.apply(CarbonLoadModel.class));
        LongAccumulator longAccumulator = sparkContext.longAccumulator("Partial Success Accumulator");
        LongAccumulator longAccumulator2 = sparkContext.longAccumulator("Input Processor Accumulator");
        LongAccumulator longAccumulator3 = sparkContext.longAccumulator("Convert Processor Accumulator");
        LongAccumulator longAccumulator4 = sparkContext.longAccumulator("Sort Processor Accumulator");
        LongAccumulator longAccumulator5 = sparkContext.longAccumulator("Write Processor Accumulator");
        configuration.set(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, sparkSession.sparkContext().appName());
        RDD<InternalRow> csvFileScanRDD = CsvRDDHelper$.MODULE$.csvFileScanRDD(sparkSession, carbonLoadModel, configuration);
        RDD mapPartitionsWithIndex = csvFileScanRDD.mapPartitionsWithIndex(new DataLoadProcessBuilderOnSpark$$anonfun$14(broadcast, longAccumulator2), csvFileScanRDD.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(CarbonRow.class));
        Broadcast<SerializableConfiguration> broadCastHadoopConf = SparkSQLUtil$.MODULE$.broadCastHadoopConf(sparkContext, configuration);
        RDD<CarbonRow> filter = mapPartitionsWithIndex.mapPartitionsWithIndex(new DataLoadProcessBuilderOnSpark$$anonfun$15(broadcast, longAccumulator, longAccumulator3, broadCastHadoopConf), mapPartitionsWithIndex.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(CarbonRow.class)).filter(new DataLoadProcessBuilderOnSpark$$anonfun$16());
        CarbonDataLoadConfiguration createConfiguration = DataLoadProcessBuilder.createConfiguration(carbonLoadModel);
        RDD keyBy = filter.keyBy(new DataLoadProcessBuilderOnSpark$$anonfun$17(indexOfColumn(carbonLoadModel.getRangePartitionColumn(), createConfiguration.getDataFields())));
        int numPartitions = getNumPartitions(createConfiguration, carbonLoadModel, filter);
        Ordering<Object> createOrderingForColumn = createOrderingForColumn(carbonLoadModel.getRangePartitionColumn());
        RDD<Tuple2<Object, Object>> sampleRDD = getSampleRDD(sparkSession, carbonLoadModel, configuration, createConfiguration, broadcast);
        ClassTag Object = ClassTag$.MODULE$.Object();
        ClassTag apply = ClassTag$.MODULE$.apply(CarbonRow.class);
        RDD$.MODULE$.rddToPairRDDFunctions$default$4(keyBy);
        sparkContext.runJob(RDD$.MODULE$.rddToPairRDDFunctions(keyBy, Object, apply, (Ordering) null).partitionBy(new DataSkewRangePartitioner(numPartitions, sampleRDD, false, createOrderingForColumn, package$.MODULE$.classTag(ClassTag$.MODULE$.Object()))).map(new DataLoadProcessBuilderOnSpark$$anonfun$18(), ClassTag$.MODULE$.apply(CarbonRow.class)), new DataLoadProcessBuilderOnSpark$$anonfun$loadDataUsingRangeSort$1(carbonLoadModel, collectionAccumulator, broadcast, longAccumulator5, broadCastHadoopConf), ClassTag$.MODULE$.Unit());
        LOGGER().info(new StringBuilder().append("Total rows processed in step Input Processor: ").append(longAccumulator2.value()).toString());
        LOGGER().info(new StringBuilder().append("Total rows processed in step Data Converter: ").append(longAccumulator3.value()).toString());
        LOGGER().info(new StringBuilder().append("Total rows processed in step Sort Processor: ").append(longAccumulator4.value()).toString());
        LOGGER().info(new StringBuilder().append("Total rows processed in step Data Writer: ").append(longAccumulator5.value()).toString());
        return updateLoadStatus(carbonLoadModel, longAccumulator);
    }

    private RDD<Tuple2<Object, Object>> getSampleRDD(SparkSession sparkSession, CarbonLoadModel carbonLoadModel, Configuration configuration, CarbonDataLoadConfiguration carbonDataLoadConfiguration, Broadcast<CarbonLoadModel> broadcast) {
        CarbonDataLoadConfiguration createConfiguration = DataLoadProcessBuilder.createConfiguration(carbonLoadModel);
        String[] header = createConfiguration.getHeader();
        CarbonColumn rangePartitionColumn = carbonLoadModel.getRangePartitionColumn();
        int unboxToInt = BoxesRunTime.unboxToInt(RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), header.length).find(new DataLoadProcessBuilderOnSpark$$anonfun$1(header, rangePartitionColumn)).get());
        DataField dataField = (DataField) Predef$.MODULE$.refArrayOps(createConfiguration.getDataFields()).find(new DataLoadProcessBuilderOnSpark$$anonfun$19(rangePartitionColumn)).get();
        Configuration configuration2 = new Configuration(configuration);
        configuration2.set(CSVInputFormat.SELECT_COLUMN_INDEX, String.valueOf(BoxesRunTime.boxToInteger(unboxToInt)));
        RDD<InternalRow> csvFileScanRDD = CsvRDDHelper$.MODULE$.csvFileScanRDD(sparkSession, carbonLoadModel, configuration2);
        RDD mapPartitionsWithIndex = csvFileScanRDD.mapPartitionsWithIndex(new DataLoadProcessBuilderOnSpark$$anonfun$20(broadcast, dataField), csvFileScanRDD.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(CarbonRow.class));
        return mapPartitionsWithIndex.mapPartitionsWithIndex(new DataLoadProcessBuilderOnSpark$$anonfun$21(broadcast, dataField, SparkSQLUtil$.MODULE$.broadCastHadoopConf(sparkSession.sparkContext(), configuration)), mapPartitionsWithIndex.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(CarbonRow.class)).filter(new DataLoadProcessBuilderOnSpark$$anonfun$22()).map(new DataLoadProcessBuilderOnSpark$$anonfun$getSampleRDD$1(), ClassTag$.MODULE$.apply(Tuple2.class));
    }

    private int getNumPartitions(CarbonDataLoadConfiguration carbonDataLoadConfiguration, CarbonLoadModel carbonLoadModel, RDD<CarbonRow> rdd) {
        int globalSortPartitions = CarbonDataProcessorUtil.getGlobalSortPartitions(carbonDataLoadConfiguration.getDataLoadProperty(CarbonCommonConstants.LOAD_GLOBAL_SORT_PARTITIONS));
        if (globalSortPartitions <= 0) {
            globalSortPartitions = carbonLoadModel.getTotalSize() <= 0 ? rdd.partitions().length : getNumPartitionsBasedOnSize(carbonLoadModel.getTotalSize(), carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable(), carbonLoadModel, false);
        }
        return globalSortPartitions;
    }

    public int getNumPartitionsBasedOnSize(double d, CarbonTable carbonTable, CarbonLoadModel carbonLoadModel, boolean z) {
        long blockSizeInMB = 1048576 * carbonTable.getBlockSizeInMB();
        long blockletSizeInMB = 1048576 * carbonTable.getBlockletSizeInMB();
        return (int) Math.ceil(d / (Math.max(blockletSizeInMB, blockSizeInMB - blockletSizeInMB) * (z ? 1 : carbonLoadModel.getScaleFactor() == 0 ? CarbonProperties.getInstance().getRangeColumnScaleFactor() : carbonLoadModel.getScaleFactor())));
    }

    private int indexOfColumn(CarbonColumn carbonColumn, DataField[] dataFieldArr) {
        return BoxesRunTime.unboxToInt(RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), dataFieldArr.length).find(new DataLoadProcessBuilderOnSpark$$anonfun$indexOfColumn$1(carbonColumn, dataFieldArr)).get());
    }

    private Ordering<Object> createOrderingForColumn(CarbonColumn carbonColumn) {
        if (!Predef$.MODULE$.Boolean2boolean(carbonColumn.isDimension())) {
            return new PrimitiveOrdering(carbonColumn.getDataType());
        }
        DataType dataType = ((CarbonDimension) carbonColumn).getDataType();
        DataType dataType2 = DataTypes.DATE;
        return (dataType != null ? !dataType.equals(dataType2) : dataType2 != null) ? DataTypeUtil.isPrimitiveColumn(carbonColumn.getDataType()) ? new PrimitiveOrdering(carbonColumn.getDataType()) : new ByteArrayOrdering() : new PrimitiveOrdering(DataTypes.INT);
    }

    public void setTaskListener(String str, String str2, CollectionAccumulator<Map<String, SegmentMetaDataInfo>> collectionAccumulator) {
        TaskContext$.MODULE$.get().addTaskCompletionListener(new InsertTaskCompletionListener(null, null, collectionAccumulator, str, str2));
        TaskMetricsMap.initializeThreadLocal();
        CarbonTaskInfo carbonTaskInfo = new CarbonTaskInfo();
        carbonTaskInfo.setTaskId(CarbonUtil.generateUUID());
        ThreadLocalTaskInfo.setCarbonTaskInfo(carbonTaskInfo);
    }

    public CarbonLoadModel createLoadModelForGlobalSort(SparkSession sparkSession, CarbonTable carbonTable) {
        Configuration newHadoopConf = SparkSQLUtil$.MODULE$.sessionState(sparkSession).newHadoopConf();
        CarbonTableOutputFormat.setDatabaseName(newHadoopConf, carbonTable.getDatabaseName());
        CarbonTableOutputFormat.setTableName(newHadoopConf, carbonTable.getTableName());
        CarbonTableOutputFormat.setCarbonTable(newHadoopConf, carbonTable);
        CarbonTableOutputFormat.setInputSchema(newHadoopConf, new StructType((List) JavaConverters$.MODULE$.bufferAsJavaListConverter((Buffer) ((TraversableLike) JavaConverters$.MODULE$.asScalaBufferConverter(carbonTable.getCreateOrderColumn()).asScala()).map(new DataLoadProcessBuilderOnSpark$$anonfun$23(), Buffer$.MODULE$.canBuildFrom())).asJava()));
        CarbonLoadModel loadModel = CarbonTableOutputFormat.getLoadModel(newHadoopConf);
        loadModel.setSerializationNullFormat(new StringBuilder().append(TableOptionConstant.SERIALIZATION_NULL_FORMAT.getName()).append(",\\N").toString());
        loadModel.setBadRecordsLoggerEnable(new StringBuilder().append(TableOptionConstant.BAD_RECORDS_LOGGER_ENABLE.getName()).append(",false").toString());
        loadModel.setBadRecordsAction(new StringBuilder().append(TableOptionConstant.BAD_RECORDS_ACTION.getName()).append(",force").toString());
        loadModel.setIsEmptyDataBadRecord("IS_EMPTY_DATA_BAD_RECORD,false");
        String str = carbonTable.getTableInfo().getFactTable().getTableProperties().get("global_sort_partitions");
        if (str != null) {
            loadModel.setGlobalSortPartitions(str);
        }
        return loadModel;
    }

    public Dataset<Row> createInputDataFrame(SparkSession sparkSession, CarbonTable carbonTable, Seq<InputSplit> seq) {
        String[] strArr = (String[]) ((TraversableOnce) ((TraversableLike) JavaConverters$.MODULE$.asScalaBufferConverter(carbonTable.getCreateOrderColumn()).asScala()).map(new DataLoadProcessBuilderOnSpark$$anonfun$24(), Buffer$.MODULE$.canBuildFrom())).toArray(ClassTag$.MODULE$.apply(String.class));
        org.apache.spark.sql.types.StructType createSparkSchema = SparkTypeConverter$.MODULE$.createSparkSchema(carbonTable, Predef$.MODULE$.wrapRefArray(strArr));
        return SparkSQLUtil$.MODULE$.execute(new CarbonScanRDD(sparkSession, new CarbonProjection(strArr), null, carbonTable.getAbsoluteTableIdentifier(), carbonTable.getTableInfo().serialize(), carbonTable.getTableInfo(), new CarbonInputMetrics(), null, SparkDataTypeConverterImpl.class, CarbonRowReadSupport.class, (List) JavaConverters$.MODULE$.seqAsJavaListConverter(seq).asJava(), CarbonScanRDD$.MODULE$.$lessinit$greater$default$12(), ClassTag$.MODULE$.apply(CarbonRow.class)).map(new DataLoadProcessBuilderOnSpark$$anonfun$25(), ClassTag$.MODULE$.apply(InternalRow.class)), createSparkSchema, sparkSession);
    }

    private final DataLoadProcessBuilderOnSpark$RowOrdering$2$ RowOrdering$1(Comparator comparator, VolatileObjectRef volatileObjectRef) {
        return volatileObjectRef.elem == null ? RowOrdering$1$lzycompute(comparator, volatileObjectRef) : (DataLoadProcessBuilderOnSpark$RowOrdering$2$) volatileObjectRef.elem;
    }

    private DataLoadProcessBuilderOnSpark$() {
        MODULE$ = this;
        this.LOGGER = LogServiceFactory.getLogService(getClass().getCanonicalName());
    }
}
