/*
* Copyright (C) 2016 Christian Schilling (schillic@informatik.uni-freiburg.de)
* Copyright (C) 2016 University of Freiburg
*
* This file is part of the ULTIMATE Util Library.
*
* The ULTIMATE Util Library is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The ULTIMATE Util Library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with the ULTIMATE Util Library. If not, see .
*
* Additional permission under GNU GPL version 3 section 7:
* If you modify the ULTIMATE Util Library, or any covered work, by linking
* or combining it with Eclipse RCP (or a modified version of Eclipse RCP),
* containing parts covered by the terms of the Eclipse Public License, the
* licensors of the ULTIMATE Util Library grant you additional permission
* to convey the resulting work.
*/
package de.uni_freiburg.informatik.ultimate.util.csv;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.function.Predicate;
/**
* Encapsulates a collection of {@link ICsvProvider}s.
*
* NOTE: Data contains shallow copies, i.e., modifications affect both the original data and this wrapper. Use the
* {@link #copy()} method to avoid such problems.
*
* @author Christian Schilling (schillic@informatik.uni-freiburg.de)
* @param
* CSV provider type
*/
public class CsvProviderPartition {
private Collection> mCsvs;
/**
* Constructor from an existing CSV partition.
*
* @param csvPartition
* CSV partition
*/
public CsvProviderPartition(final Collection> csvPartition) {
mCsvs = csvPartition;
}
/**
* Constructor from an existing CSV where grouping is applied with respect to a given column. This means that all
* rows with the same entry in that column are in the same group.
*
* @param csv
* CSV provider
* @param column
* aggregation column
*/
public CsvProviderPartition(final ICsvProvider csv, final String column) {
mCsvs = groupByColumnKeyAndThreshold(csv, column, null);
}
/**
* Constructor from an existing CSV where grouping is applied with respect to an integer split of one column.
*
* This constructor only makes sense if the values in the respective row are numeric data.
*
* @param csv
* CSV provider
* @param column
* aggregation column
* @param thresholds
* threshold values for different bins
*/
public CsvProviderPartition(final ICsvProvider csv, final String column, final int[] thresholds) {
mCsvs = groupByColumnKeyAndThreshold(csv, column, thresholds);
}
public Iterable> getCsvs() {
return mCsvs;
}
/**
* @return The number of CSVs in the partition.
*/
public int size() {
return mCsvs.size();
}
/**
* @return a single CSV provider containing all groups from the partition.
*/
public ICsvProvider toCsvProvider() {
if (mCsvs.isEmpty()) {
return new SimpleCsvProvider<>(Collections.emptyList());
}
// we assume that all CSVs have the same column titles
final SimpleCsvProvider result = new SimpleCsvProvider<>(mCsvs.iterator().next().getColumnTitles());
for (final ICsvProvider csv : mCsvs) {
final int numberOfRows = csv.getRowHeaders().size();
for (int i = 0; i < numberOfRows; ++i) {
result.addRow(csv.getRowHeaders().get(i), csv.getRow(i));
}
}
return result;
}
/**
* @return A fresh object with copied data.
*/
public CsvProviderPartition copy() {
final Collection> partitionCopy = new ArrayList<>();
for (final ICsvProvider csv : mCsvs) {
final ICsvProvider csvCopy = new SimpleCsvProvider<>(csv.getColumnTitles());
partitionCopy.add(csvCopy);
final int numberOfRows = csv.getRowHeaders().size();
for (int i = 0; i < numberOfRows; ++i) {
csvCopy.addRow(new ArrayList<>(csv.getRow(i)));
}
}
return new CsvProviderPartition<>(partitionCopy);
}
/**
* @param transformer
* Transformer which is applied to each group.
*/
public void transform(final ICsvProviderTransformer transformer) {
final List> transformedCsvs = new ArrayList<>(mCsvs.size());
for (final ICsvProvider csv : mCsvs) {
transformedCsvs.add(transformer.transform(csv));
}
mCsvs = transformedCsvs;
}
/**
* @param predicate
* Predicate on CSV; returns {@code true} iff the CSV should remain, otherwise the CSV is discarded.
*/
public void filterGroups(final Predicate> predicate) {
final Collection> filteredCsvs = new ArrayList<>(mCsvs.size());
for (final ICsvProvider csv : mCsvs) {
if (predicate.test(csv)) {
filteredCsvs.add(csv);
}
}
if (filteredCsvs.size() < mCsvs.size()) {
mCsvs = filteredCsvs;
}
}
/**
* NOTE: The method has two use cases. Either {@code thresholds} is {@code null}, then we use a {@link HashMap} with
* the entry in the defined column as key. Or {@code thresholds} has a value, then we use thresholds to pack the
* data (assumed to be {@code Integer}s) into bins.
*/
private List> groupByColumnKeyAndThreshold(final ICsvProvider csv, final String column,
final int[] thresholds) {
final int columnIndex = csv.getColumnTitles().indexOf(column);
if (columnIndex == -1) {
throw new IllegalArgumentException("The CSV key does not exist: " + column);
}
final Map> key2group;
final Map> bin2group;
if (thresholds == null) {
key2group = new HashMap<>();
bin2group = null;
} else {
key2group = null;
bin2group = new TreeMap<>();
}
final int numberOfRows = csv.getRowHeaders().size();
for (int i = 0; i < numberOfRows; ++i) {
final List row = csv.getRow(i);
final T entry = row.get(columnIndex);
assert thresholds == null || entry instanceof Integer;
final int bin = getBin(entry, thresholds);
ICsvProvider group = (thresholds == null) ? key2group.get(entry) : bin2group.get(bin);
final String rowTitle;
if (group == null) {
group = new SimpleCsvProvider<>(csv.getColumnTitles());
if (thresholds == null) {
key2group.put(entry, group);
rowTitle = entry.toString();
} else {
bin2group.put(bin, group);
final String lower = bin == 0 ? "(-\\infty" : "[" + Integer.toString(thresholds[bin - 1]);
final String upper =
bin == thresholds.length ? "\\infty)" : Integer.toString(thresholds[bin]) + "]";
rowTitle = "$n \\in " + lower + "; " + upper + "$";
}
} else {
final List rowHeaders = group.getRowHeaders();
rowTitle = rowHeaders.isEmpty()
? entry.toString()
: (rowHeaders.get(0) == null ? entry.toString() : rowHeaders.get(0));
}
group.addRow(rowTitle, new ArrayList<>(row));
}
final List> result = new ArrayList<>();
Collection> csvs;
if (thresholds == null) {
csvs = key2group.values();
} else {
csvs = bin2group.values();
}
for (final ICsvProvider group : csvs) {
result.add(group);
}
return result;
}
private int getBin(final T entryRaw, final int[] thresholds) {
if (thresholds == null) {
return 0;
}
final int entry = Integer.parseInt(entryRaw.toString());
for (int i = 0; i < thresholds.length; ++i) {
if (entry < thresholds[i]) {
return i;
}
}
return thresholds.length;
}
/**
* Checks that all entries in a CSV are non-null. Since CSVs can originate from text files, we also check that the
* {@link #toString()} representation is different from "null".
*
* @author Christian Schilling (schillic@informatik.uni-freiburg.de)
*/
public class AllEntriesNonNullFilter implements Predicate> {
@Override
public boolean test(final ICsvProvider csv) {
final int numberOfRows = csv.getRowHeaders().size();
for (int i = 0; i < numberOfRows; ++i) {
final List row = csv.getRow(i);
for (final T entry : row) {
if (entry == null || "null".equals(entry.toString())) {
return false;
}
}
}
return true;
}
}
}