/*
* Copyright (C) 2016 Christian Schilling (schillic@informatik.uni-freiburg.de)
* Copyright (C) 2016 University of Freiburg
*
* This file is part of the ULTIMATE Util Library.
*
* The ULTIMATE Util Library is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The ULTIMATE Util Library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with the ULTIMATE Util Library. If not, see .
*
* Additional permission under GNU GPL version 3 section 7:
* If you modify the ULTIMATE Util Library, or any covered work, by linking
* or combining it with Eclipse RCP (or a modified version of Eclipse RCP),
* containing parts covered by the terms of the Eclipse Public License, the
* licensors of the ULTIMATE Util Library grant you additional permission
* to convey the resulting work.
*/
package de.uni_freiburg.informatik.ultimate.util.csv;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
/**
* Aggregates data of an {@link ICsvProvider}.
*
* NOTE: Data contains shallow copies, i.e., modifications affect both the original data and this wrapper. Copy the
* original to avoid such problems.
*
* @author Christian Schilling (schillic@informatik.uni-freiburg.de)
* @param
* CSV provider type
*/
public class CsvProviderAggregator implements ICsvProviderTransformer {
/**
* Aggregation mode.
*
* @author Christian Schilling (schillic@informatik.uni-freiburg.de)
*/
public enum Aggregation {
/**
* Average/mean of numeric columns.
*/
AVERAGE,
/**
* Sum of numeric columns.
*/
SUM,
/**
* Ignored, i.e., removed from the CSV.
*/
IGNORE,
}
private final Map mColumn2aggregation;
private final String mCountColumnTitle;
/**
* @param column2aggregation
* Maps columns to aggregation mode.
*/
public CsvProviderAggregator(final Map column2aggregation) {
this(column2aggregation, null);
}
/**
* @param column2aggregation
* Maps columns to aggregation mode.
* @param countColumnTitle
* column title for a new column counting the elements (null for deactivation)
* NOTE: To this feature, the CSV entries must be of type {@link String}.
*/
public CsvProviderAggregator(final Map column2aggregation, final String countColumnTitle) {
mColumn2aggregation = column2aggregation;
mCountColumnTitle = countColumnTitle;
}
/**
* Aggregates a CSV.
*
* @param csv
* CSV
* @return aggregates CSV
*/
@SuppressWarnings("unchecked")
@Override
public ICsvProvider transform(final ICsvProvider csv) {
final List rowHeaders = csv.getRowHeaders();
final int rows = rowHeaders.size();
final ArrayList columnTitles = new ArrayList<>();
if (rows == 0) {
return new SimpleCsvProvider<>(columnTitles);
}
final int columnsOld = csv.getColumnTitles().size();
final boolean[] useColumn = new boolean[columnsOld];
int index = 0;
for (final String columnTitle : csv.getColumnTitles()) {
final Aggregation aggregation = mColumn2aggregation.get(columnTitle);
if (aggregation == null) {
System.err.println("Ignoring column " + columnTitle + " which was not specified.");
mColumn2aggregation.put(columnTitle, Aggregation.IGNORE);
} else if (aggregation != Aggregation.IGNORE) {
columnTitles.add(columnTitle);
useColumn[index] = true;
}
++index;
}
columnTitles.trimToSize();
isCountColumnFresh(columnTitles);
final List aggRow = filter(csv.getRow(0), useColumn, columnTitles.size());
for (int i = 1; i < rows; ++i) {
final List row = csv.getRow(i);
final List filteredRow = filter(row, useColumn, columnTitles.size());
aggregateRows(aggRow, filteredRow, columnTitles, i);
}
List columnTitlesEnhanced;
if (mCountColumnTitle != null) {
// insert count
columnTitlesEnhanced = new ArrayList<>(columnTitles.size() + 1);
for (int i = 0; i < columnTitles.size(); ++i) {
columnTitlesEnhanced.add(columnTitles.get(i));
}
columnTitlesEnhanced.add(mCountColumnTitle);
if (!csv.getRow(0).isEmpty() && csv.getRow(0).get(0) instanceof String) {
aggRow.add((T) Integer.toString(rowHeaders.size()));
} else {
throw new IllegalArgumentException(
"Aggregation with adding a count column only works if the CSV has type String.");
}
} else {
columnTitlesEnhanced = columnTitles;
}
final ICsvProvider result = new SimpleCsvProvider<>(columnTitlesEnhanced);
final String rowHeader = rowHeaders.get(0);
result.addRow(rowHeader, aggRow);
return result;
}
private void aggregateRows(final List aggregatedRow, final List singleRow, final List columnTitles,
final int numberOfAggregationsSoFar) {
final ListIterator aggIt = aggregatedRow.listIterator();
final ListIterator singleIt = singleRow.listIterator();
final ListIterator columnTitlesIt = columnTitles.listIterator();
for (int i = 0; i < aggregatedRow.size(); ++i) {
final T aggEntry = aggIt.next();
final T singleEntry = singleIt.next();
final String columnTitle = columnTitlesIt.next();
final Aggregation agg = mColumn2aggregation.get(columnTitle);
assert agg != null;
switch (agg) {
case AVERAGE:
aggIt.set(getAverage(aggEntry, singleEntry, numberOfAggregationsSoFar));
break;
case SUM:
aggIt.set(getSum(aggEntry, singleEntry, numberOfAggregationsSoFar));
break;
case IGNORE:
assert false;
break;
default:
throw new IllegalArgumentException("Unknown aggregation mode: " + agg);
}
}
}
private List filter(final List row, final boolean[] useColumn, final int length) {
int i = 0;
final List result = new ArrayList<>(length);
for (final T entry : row) {
assert i < useColumn.length;
if (useColumn[i]) {
result.add(entry);
}
++i;
}
return result;
}
/**
* The mean of k+1 samples can be computed given the mean of k samples and one more sample as follows:
* {@code m(k+1) = m(k) + 1/(k+1) * (x - m(k))}
*/
private T getAverage(final T aggEntryRaw, final T singleEntryRaw, final int numberOfSamples) {
final double aggEntry = Double.parseDouble(aggEntryRaw.toString());
final double singleEntry = Double.parseDouble(singleEntryRaw.toString());
final double result = aggEntry + 1.0 / (numberOfSamples + 1) * (singleEntry - aggEntry);
return getTypeFromDouble(result, aggEntryRaw);
}
private T getSum(final T aggEntryRaw, final T singleEntryRaw, final int numberOfSamples) {
final double aggEntry = Double.parseDouble(aggEntryRaw.toString());
final double singleEntry = Double.parseDouble(singleEntryRaw.toString());
final double result = aggEntry + singleEntry;
return getTypeFromDouble(result, aggEntryRaw);
}
@SuppressWarnings("unchecked")
private T getTypeFromDouble(final Double d, final T typeSample) {
if (typeSample instanceof Double) {
return (T) d;
}
if (typeSample instanceof String) {
return (T) BigDecimal.valueOf(d).toPlainString();
}
throw new IllegalArgumentException(
"Received data not of type Double but of type " + typeSample.getClass().toGenericString());
}
private void isCountColumnFresh(final ArrayList columnTitles) {
if (mCountColumnTitle != null && columnTitles.contains(mCountColumnTitle)) {
throw new IllegalArgumentException();
}
}
}