PentahoDataTransmuter


1   /*
2    * Copyright 2006 Pentaho Corporation.  All rights reserved. 
3    * This software was developed by Pentaho Corporation and is provided under the terms 
4    * of the Mozilla Public License, Version 1.1, or any later version. You may not use 
5    * this file except in compliance with the license. If you need a copy of the license, 
6    * please go to http://www.mozilla.org/MPL/MPL-1.1.txt. The Original Code is the Pentaho 
7    * BI Platform.  The Initial Developer is Pentaho Corporation.
8    *
9    * Software distributed under the Mozilla Public License is distributed on an "AS IS" 
10   * basis, WITHOUT WARRANTY OF ANY KIND, either express or  implied. Please refer to 
11   * the license for the specific language governing your rights and limitations.
12   *
13   * Created Nov 10, 2005 
14   * @author wseyler
15   */
16  
17  package org.pentaho.core.connection;
18  
19  import java.text.Format  ;
20  import java.text.MessageFormat  ;
21  import java.util.ArrayList  ;
22  import java.util.Collection  ;
23  import java.util.HashMap  ;
24  import java.util.Iterator  ;
25  import java.util.LinkedList  ;
26  import java.util.List  ;
27  import java.util.Map  ;
28  import java.util.TreeMap  ;
29  import org.apache.commons.collections.map.ListOrderedMap;
30  import org.pentaho.core.connection.memory.MemoryMetaData;
31  import org.pentaho.core.connection.memory.MemoryResultSet;
32  import org.pentaho.messages.Messages;
33  
34  /**
35   * @author wseyler
36   * 
37   * Provides various methods to transmutes an IPentahoResultSet such that the
38   * resulting IPentahoResultSet dimensionality can be used for different purposes
39   * (ie creating a pie or bar chart)
40   * 
41   */
42  public class PentahoDataTransmuter extends DataUtilities {
43      protected final IPentahoResultSet sourceResultSet;
44  
45      public PentahoDataTransmuter(IPentahoResultSet resultSet) {
46          super();
47          sourceResultSet = resultSet;
48      }
49  
50      /**
51       * @return Returns the sourceResultSet.
52       */
53      public IPentahoResultSet getSourceResultSet() {
54          return sourceResultSet;
55      }
56  
57      public static IPentahoResultSet transmute(IPentahoResultSet source, boolean pivot) {
58          return transmute(source, (Integer  ) null, null, null, null, pivot);
59      }
60  
61      public static IPentahoResultSet transmute(IPentahoResultSet source, Integer   columnForRowHeaders, Integer   rowForColumnHeaders, boolean pivot) {
62          return transmute(source, columnForRowHeaders, rowForColumnHeaders, null, null, pivot);
63      }
64  
65      public IPentahoResultSet transmute(Integer   columnForRowHeaders, Integer   rowForColumnHeaders, boolean pivot) {
66          return transmute(sourceResultSet, columnForRowHeaders, rowForColumnHeaders, null, null, pivot);
67      }
68  
69      public static IPentahoResultSet transmute(IPentahoResultSet source, String  [] columnForRowHeaders, String  [] rowForColumnHeaders, boolean pivot) {
70          return transmute(source, columnForRowHeaders, rowForColumnHeaders, null, null, pivot);
71      }
72  
73      public IPentahoResultSet transmute(String  [] columnForRowHeaders, String  [] rowForColumnHeaders, boolean pivot) {
74          return transmute(sourceResultSet, columnForRowHeaders, rowForColumnHeaders, null, null, pivot);
75      }
76  
77      public static IPentahoResultSet transmute(IPentahoResultSet source, Integer  [] rowsToInclude, Integer  [] columnsToInclude, boolean pivot) {
78          return transmute(source, null, null, rowsToInclude, columnsToInclude, pivot);
79      }
80  
81      public IPentahoResultSet transmute(Integer  [] rowsToInclude, Integer  [] columnsToInclude, boolean pivot) {
82          return transmute(sourceResultSet, null, null, rowsToInclude, columnsToInclude, pivot);
83      }
84  
85      public static IPentahoResultSet transmute(IPentahoResultSet source, String  [][] rowsToInclude, String  [][] columnsToInclude, boolean pivot) {
86          return transmute(source, null, null, rowsToInclude, columnsToInclude, pivot);
87      }
88  
89      public IPentahoResultSet transmute(String  [][] rowsToInclude, String  [][] columnsToInclude, boolean pivot) {
90          return transmute(sourceResultSet, null, null, rowsToInclude, columnsToInclude, pivot);
91      }
92  
93      public IPentahoResultSet transmute(String  [] columnForRowHeaders, String  [] rowForColumnHeaders, String  [][] rowsToInclude, String  [][] columnsToInclude, boolean pivot) {
94          return transmute(sourceResultSet, columnForRowHeaders, rowForColumnHeaders, rowsToInclude, columnsToInclude, pivot);
95      }
96  
97      /**
98       * Returns a memory result set that represents a grid of data and it's
99       * associated headers.
100      * 
101      * @param source -
102      *            The source IPentahoResultSet
103      * @param columnForRowHeaders -
104      *            a 0 based column number whose data will be used as the row
105      *            headers. If null and rowHeaders exist in the source then the
106      *            rowHeaders will be returned otherwise the first column (column
107      *            0) will be used for row headers
108      * @param rowForColumnHeaders -
109      *            a 0 based row number whose data will be used as the
110      *            columnHeaders. If null and columnHeaders exist in the source
111      *            then the columnHeaders will be returned. Otherwise the first
112      *            row (row 0) will be used for the columnHeaders
113      * @param columnsToInclude -
114      *            An 2D String array where each row is a String[] representing a
115      *            column header[] of a column to include in the result.
116      * @param rowsToInclude -
117      *            An 2D String array where each row is a String[] representing a
118      *            row header[] of a row to include in the result.
119      * @param pivot -
120      *            pivot the entire IPentahoResultSet - NOTE this occurs last in
121      *            the process. Other params referencing rows and colums are
122      *            based on the source before it is pivoted.
123      * @return - a copy of the result set containing the requested data.
124      */
125     public static IPentahoResultSet transmute(IPentahoResultSet source, String  [] columnForRowHeaders, String  [] rowForColumnHeaders, String  [][] rowsToInclude, String  [][] columnsToInclude, boolean pivot) {
126         Integer   cfrh = null;
127         if (columnForRowHeaders != null) {
128             String  [][] searchStrings = new String  [1][];
129             searchStrings[0] = columnForRowHeaders;
130             Integer  [] indexes = columnNamesToIndexes(source, searchStrings);
131             if (indexes.length > 0) {
132                 cfrh = indexes[0];
133             }
134         }
135         Integer   rfch = null;
136         if (rowForColumnHeaders != null) {
137             String  [][] searchStrings = new String  [1][];
138             searchStrings[0] = rowForColumnHeaders;
139             Integer  [] indexes = rowNamesToIndexes(source, searchStrings);
140             if (indexes.length > 0) {
141                 rfch = indexes[0];
142             }
143         }
144         Integer  [] rti = null;
145         if (rowsToInclude != null) {
146             rti = rowNamesToIndexes(source, rowsToInclude);
147         }
148         Integer  [] cti = null;
149         if (columnsToInclude != null) {
150             cti = columnNamesToIndexes(source, columnsToInclude);
151         }
152         return transmute(source, cfrh, rfch, rti, cti, pivot);
153     }
154 
155     public IPentahoResultSet transmute(Integer   columnForRowHeaders, Integer   rowForColumnHeaders, Integer  [] rowsToInclude, Integer  [] columnsToInclude, boolean pivot) {
156         return transmute(sourceResultSet, columnForRowHeaders, rowForColumnHeaders, rowsToInclude, columnsToInclude, pivot);
157     }
158 
159     /**
160      * Returns a memory result set that represents a grid of data and it's
161      * associated headers.
162      * 
163      * @param source -
164      *            The source IPentahoResultSet
165      * @param columnForRowHeaders -
166      *            a 0 based column number whose data will be used as the row
167      *            headers. If null and rowHeaders exist in the source then the
168      *            rowHeaders will be returned otherwise the first column (column
169      *            0) will be used for row headers
170      * @param rowForColumnHeaders -
171      *            a 0 based row number whose data will be used as the
172      *            columnHeaders. If null and columnHeaders exist in the source
173      *            then the columnHeaders will be returned. Otherwise the first
174      *            row (row 0) will be used for the columnHeaders
175      * @param columnsToInclude -
176      *            An integer array of columns to include in the result.
177      * @param rowToInclude -
178      *            An integer array of rows to include in the result
179      * @param pivot -
180      *            pivot the entire IPentahoResultSet - NOTE this occurs last in
181      *            the process. Other params referencing rows and colums are
182      *            based on the source before it is pivoted.
183      * @return - a copy of the result set containing the requested data.
184      */
185     public static IPentahoResultSet transmute(IPentahoResultSet source, Integer   columnForRowHeaders, Integer   rowForColumnHeaders, Integer  [] rowsToInclude, Integer  [] columnsToInclude, boolean pivot) {
186         Object  [][] rowHeaders = null;
187         Object  [][] columnHeaders = null;
188 
189         // construct the headers
190         rowHeaders = constructRowHeaders(source, columnForRowHeaders);
191         columnHeaders = constructColumnHeaders(source, rowForColumnHeaders);
192         // select the headers
193         rowHeaders = DataUtilities.filterDataByRows(rowHeaders, rowsToInclude);
194         columnHeaders = DataUtilities.filterDataByColumns(columnHeaders, columnsToInclude);
195         // create a 2D array of the data
196         source.beforeFirst(); // make sure we start at the beginning
197         List   dataList = new ArrayList  ();
198         Object  [] rowData = source.next();
199         while (rowData != null) {
200             dataList.add(rowData);
201             rowData = source.next();
202         }
203         Object  [][] data = null;
204         if (columnHeaders != null) {
205             data = new Object  [dataList.size()][columnHeaders[0].length];
206             for (int row = 0; row < data.length; row++) {
207                 data[row] = (Object  []) dataList.get(row);
208             }
209         }
210         // now filter the data
211         data = DataUtilities.filterData(data, rowsToInclude, columnsToInclude);
212         // create the MemoryResultSet with the processed headers
213         IPentahoResultSet result = new MemoryResultSet(new MemoryMetaData(columnHeaders, rowHeaders)); // Create
214         // the
215         // resultSet
216         // now copy the data to the MemoryResultSet
217         if (data != null) {
218             for (int row = 0; row < data.length; row++) {
219                 ((MemoryResultSet) result).addRow(data[row]);
220             }
221         }
222         // finally pivot this if requested
223         if (pivot) {
224             result = pivot(result);
225         }
226         return result;
227     }
228 
229     public IPentahoResultSet pivot() {
230         return pivot(sourceResultSet);
231     }
232 
233     /**
234      * This rotates a IPentahoResultSet such that the row and column data are
235      * reversed and the data headers are also rotated. ie source.
236      * 
237      * |Cheader1,0|CHeader1,1|CHeader1,2|Cheader1,3|
238      * |CHeader0,0|CHeader0,1|CHeader0,2|CHeader0,3|
239      * ---------------------------------------------
240      * |RHeader0,2|RHeader0,1|RHeader0,0 |Data0,0 |Data0,1 |Data0,2 |Data 0,3 |
241      * |RHeader1,2|RHeader1,1|RHeader1,0 |Data1,0 |Data1,1 |Data1,2 |Data 1,3 |
242      * |RHeader2,2|RHeader2,1|RHeader2,0 |Data2,0 |Data2,1 |Data2,2 |Data 2,3 |
243      * |RHeader3,2|RHeader3,1|RHeader3,0 |Data3,0 |Data3,1 |Data3,2 |Data 3,3 |
244      * |RHeader4,2|RHeader4,1|RHeader4,0 |Data4,0 |Data4,1 |Data4,2 |Data 4,3 |
245      * |RHeader5,2|RHeader5,1|RHeader5,0 |Data5,0 |Data5,1 |Data5,2 |Data 5,3 |
246      * 
247      * becomes:
248      * 
249      * |RHeader0,2|RHeader1,2|RHeader2,2|RHeader3,2|RHeader4,2|RHeader5,2|
250      * |RHeader0,1|RHeader1,1|RHeader2,1|RHeader3,1|RHeader4,1|RHeader5,1|
251      * |RHeader0,0|RHeader1,0|RHeader2,0|RHeader3,0|RHeader4,0|RHeader5,0|
252      * -------------------------------------------------------------------
253      * |CHeader1,0|CHeader0,0 |Data0,0 |Data1,0 |Data2,0 |Data3,0 |Data4,0
254      * |Data5,0 | |CHeader1,1|CHeader0,1 |Data0,1 |Data1,1 |Data2,1 |Data3,1
255      * |Data4,1 |Data5,1 | |CHeader1,2|CHeader0,2 |Data0,2 |Data1,2 |Data2,2
256      * |Data3,2 |Data4,2 |Data5,2 | |CHeader1,3|CHeader0,3 |Data0,3 |Data1,3
257      * |Data2,3 |Data3,3 |Data4,3 |Data5,3 |
258      * 
259      * @param resultSet
260      *            and IPentahoResultSet on which to operate. Note that this
261      *            parameter remains unchaged after this call. The original data
262      *            is preserved
263      * 
264      * @return an IPentahoResultSet that represents the rotated
265      *         IPentahoResultSet argumnent
266      */
267     public static IPentahoResultSet pivot(IPentahoResultSet resultSet) {
268         MemoryResultSet result = new MemoryResultSet(swapAndPivotRowAndColumnHeaders(resultSet.getMetaData()));
269 
270         // Make a copy of the current data
271         Object  [][] dataValues = new Object  [resultSet.getRowCount()][resultSet.getColumnCount()];
272         for (int row = 0; row < resultSet.getRowCount(); row++) {
273             Object  [] dataRow = resultSet.next();
274             for (int column = 0; column < resultSet.getColumnCount(); column++) {
275                 dataValues[row][column] = dataRow[column];
276             }
277         }
278         // Swap the dimensionality of the data
279         dataValues = DataUtilities.pivotDimensions(dataValues);
280         for (int row = 0; row < dataValues.length; row++) {
281             result.addRow(dataValues[row]);
282         }
283         return result;
284     }
285 
286     public Integer  [] rowNamesToIndexes(String  [][] names) {
287         return rowNamesToIndexes(sourceResultSet, names);
288     }
289 
290     /**
291      * Returns an array of row Indexes taht match the names parameter
292      * 
293      * @param source
294      *            IPentahoResultSet to operate agains
295      * @param names
296      *            a 2D string array[rows][columns] where each row is a string
297      *            array to compare against the row headers
298      * @return an array of Integers that represes the selected rows
299      */
300     public static Integer  [] rowNamesToIndexes(IPentahoResultSet source, String  [][] names) {
301         List   result = new ArrayList  ();
302         for (int row = 0; row < names.length; row++) {
303             int found = source.getMetaData().getRowIndex(names[row]);
304             if (found != -1) {
305                 result.add(new Integer  (found));
306             }
307         }
308         Integer  [] resultArray = new Integer  [result.size()];
309         for (int i = 0; i < resultArray.length; i++) {
310             resultArray[i] = (Integer  ) result.get(i);
311         }
312         return resultArray;
313     }
314 
315     public Integer  [] columnNamesToIndexes(String  [][] names) {
316         return columnNamesToIndexes(sourceResultSet, names);
317     }
318 
319     /**
320      * Returns an array of column Indexes that match the names parameter
321      * 
322      * @param source
323      *            IPentahoResultSet to operate against
324      * @param names
325      *            a 2D string array[rows][columns] where each row is a string
326      *            array to compare against the column headers
327      * @return an array of Integers that represents the selected columns
328      */
329     public static Integer  [] columnNamesToIndexes(IPentahoResultSet source, String  [][] names) {
330         List   result = new ArrayList  ();
331         for (int row = 0; row < names.length; row++) {
332             int found = source.getMetaData().getColumnIndex(names[row]);
333             if (found != -1) {
334                 result.add(new Integer  (found));
335             }
336         }
337         Integer  [] resultArray = new Integer  [result.size()];
338         for (int i = 0; i < resultArray.length; i++) {
339             resultArray[i] = (Integer  ) result.get(i);
340         }
341         return resultArray;
342     }
343 
344     /**
345      * @param source
346      * @param rowForColumnHeaders
347      * @return
348      */
349     protected static Object  [][] constructColumnHeaders(IPentahoResultSet source, Integer   rowForColumnHeaders) {
350         Object  [][] result = null;
351         if (rowForColumnHeaders == null) { // Nothing specified by user so
352             // we'll apply the rules
353             if (source.getMetaData().getColumnHeaders() == null) { // If we
354                 // don't
355                 // have
356                 // columnHeaders
357                 Object  [] dataRow = source.getDataRow(0); // use the first row
358                 if (dataRow != null) {
359                     result = new Object  [1][];
360                     result[0] = dataRow;
361                 }
362             } else { // We have Headers so let's just use them
363                 result = source.getMetaData().getColumnHeaders();
364             }
365         } else { // A rowForColumnHeaders was supplied so lets use it
366             Object  [] dataRow = source.getDataRow(rowForColumnHeaders.intValue());
367             if (dataRow != null) {
368                 result = new Object  [1][];
369                 result[0] = dataRow;
370             }
371         }
372         return result;
373     }
374 
375     /**
376      * Constructs the headers based on the rule as stated in the
377      * columnForRowHeaders param.
378      * 
379      * @param source -
380      *            The source IPentahoResultSet
381      * @param columnForRowHeaders -
382      *            a 0 based column number whose data will be used as the row
383      *            headers. If null and rowHeaders exist in the source then the
384      *            rowHeaders will be flattened and returned otherwise the first
385      *            column (column 0) will be used for row headers
386      * @return 2D array representing the manufactured rowHeaders
387      */
388     protected static Object  [][] constructRowHeaders(IPentahoResultSet source, Integer   columnForRowHeaders) {
389         Object  [][] result = null;
390         if (columnForRowHeaders == null) { // Nothing specified by user so
391             // we'll apply the rules
392             if (source.getMetaData().getRowHeaders() == null) { // If we don't
393                 // have
394                 // rowHeaders
395                 Object  [] dataColumn = source.getDataColumn(0); // use the first
396                 // column
397                 result = new Object  [dataColumn.length][1];
398                 for (int row = 0; row < result.length; row++) {
399                     result[row][0] = dataColumn[row];
400                 }
401             } else { // We have Headers so let's just use them
402                 result = source.getMetaData().getRowHeaders();
403             }
404         } else { // A columnForRowHeaders was supplied so lets use it
405             Object  [] dataColumn = source.getDataColumn(columnForRowHeaders.intValue()); // use
406             // the
407             // param
408             // column
409             result = new Object  [dataColumn.length][1];
410             for (int row = 0; row < result.length; row++) {
411                 result[row][0] = dataColumn[row];
412             }
413         }
414         return result;
415     }
416 
417     /**
418      * @param metaData
419      *            source
420      * @return IPentahoMetaData that contain the swapped and pivoted Row and
421      *         ColumnHeaders
422      */
423     protected static IPentahoMetaData swapAndPivotRowAndColumnHeaders(IPentahoMetaData metaData) {
424         Object  [][] sourceColumnHeaders = metaData.getColumnHeaders();
425         Object  [][] sourceRowHeaders = metaData.getRowHeaders();
426         boolean hasColumnHeaders = sourceColumnHeaders != null;
427         boolean hasRowHeaders = sourceRowHeaders != null;
428 
429         Object  [][] columnHeaders = null;
430         Object  [][] rowHeaders = null;
431 
432         if (hasColumnHeaders) {
433             rowHeaders = DataUtilities.pivotDimensions(sourceColumnHeaders);
434         }
435 
436         if (hasRowHeaders) {
437             columnHeaders = DataUtilities.pivotDimensions(sourceRowHeaders);
438         }
439 
440         MemoryMetaData result = new MemoryMetaData(columnHeaders, rowHeaders);
441         return result;
442     }
443 
444     private static String  [] getCollapsedRowHeaders(IPentahoResultSet resultSet, char seperator) {
445         Object  [][] rowHeaders = resultSet.getMetaData().getRowHeaders();
446         if (rowHeaders != null) {
447             StringBuffer  [] resultBuffer = new StringBuffer  [rowHeaders.length];
448             for (int i = 0; i < resultBuffer.length; i++) {
449                 resultBuffer[i] = new StringBuffer  ();
450             }
451             for (int row = 0; row < rowHeaders.length; row++) {
452                 for (int col = 0; col < rowHeaders[row].length; col++) {
453                     if (col == 0) { // Don't use seperator
454                         resultBuffer[row].append(rowHeaders[row][col].toString());
455                     } else { // use seperator
456                         resultBuffer[row].append(seperator + rowHeaders[row][col].toString());
457                     }
458                 }
459             }
460             String  [] result = new String  [resultBuffer.length];
461             for (int i = 0; i < resultBuffer.length; i++) {
462                 result[i] = resultBuffer[i].toString();
463             }
464             return result;
465         }
466         return null;
467     }
468 
469     private static String  [] getCollapsedColumnHeaders(IPentahoResultSet resultSet, char seperator) {
470         Object  [][] columnHeaders = resultSet.getMetaData().getColumnHeaders();
471         if (columnHeaders != null) {
472             StringBuffer  [] resultBuffer = new StringBuffer  [columnHeaders[0].length];
473             for (int i = 0; i < resultBuffer.length; i++) {
474                 resultBuffer[i] = new StringBuffer  ();
475             }
476             for (int col = 0; col < columnHeaders[0].length; col++) { // for
477                 // each
478                 // column
479                 for (int row = 0; row < columnHeaders.length; row++) { // for
480                     // each
481                     // row
482                     // except
483                     // the
484                     // first
485                     if (row == 0) { // don't use seperator for first row
486                         resultBuffer[col].append(columnHeaders[row][col].toString());
487                     } else { // use the seperator;
488                         resultBuffer[col].append(seperator + columnHeaders[row][col].toString());
489                     }
490                 }
491             }
492             String  [] result = new String  [resultBuffer.length];
493             for (int i = 0; i < resultBuffer.length; i++) {
494                 result[i] = resultBuffer[i].toString();
495             }
496             return result;
497         }
498         return null;
499     }
500 
501     public String  [] getCollapsedHeaders(int axis, char seperator) throws Exception   {
502         return getCollapsedHeaders(axis, sourceResultSet, seperator);
503     }
504 
505     /**
506      * Returns a string array where each element represents the concatenations
507      * of the headers for a sing column. Each concatenation is seperated by the
508      * parameter "seperator"
509      * 
510      * @param axis
511      *            row or column headers
512      * @param resultSet
513      *            to headers from
514      * @param a
515      *            character that represents the seprator between entities of a
516      *            column header
517      * @return a String array that represents a fully qualified column headers
518      * @throws Exception
519      */
520     public static String  [] getCollapsedHeaders(int axis, IPentahoResultSet resultSet, char seperator) throws Exception   {
521         if (axis != AXIS_COLUMN && axis != AXIS_ROW) {
522             throw new IllegalArgumentException  (Messages.getString("PentahoDataTransmuter.ERROR_0001_INVALID_AXIS")); //$NON-NLS-1$
523         }
524         if (resultSet == null) {
525             throw new IllegalArgumentException  (Messages.getString("PentahoDataTransmuter.ERROR_0002_NULL_DATASET")); //$NON-NLS-1$
526         }
527 
528         switch (axis) {
529         case AXIS_COLUMN:
530             return getCollapsedColumnHeaders(resultSet, seperator);
531 
532         case AXIS_ROW:
533             return getCollapsedRowHeaders(resultSet, seperator);
534 
535         }
536         return null;
537     }
538 
539     /**
540      * 
541      * Flatten a resultset based on a particular column, new values for each row
542      * trigger a new row in the flattened resultset.
543      * 
544      * @return IPentahoResultSet
545      * 
546      */
547     public static IPentahoResultSet flattenResultSet(IPentahoResultSet resultSet, int squashColumn) {
548         IPentahoResultSet flattenedResultSet = resultSet.memoryCopy();
549         if (flattenedResultSet instanceof MemoryResultSet) {
550             MemoryResultSet memoryResultSet = (MemoryResultSet) flattenedResultSet;
551             // go through the resultset looking at the 'squashColumn'
552             // for all values that are the same, create a new row
553             // all the data should be missing/null/empty except for 1 row in
554             // each
555             // column
556             // the new row should be fully populated
557             Object   colHeads[][] = memoryResultSet.getMetaData().getColumnHeaders();
558 
559             int rowCount = memoryResultSet.getRowCount();
560             int colCount = memoryResultSet.getColumnCount();
561             Object   squashColumnValue = memoryResultSet.getValueAt(0, squashColumn);
562             Object   squashedRow[] = new Object  [colCount];
563             List   rowsList = new LinkedList  ();
564             rowsList.add(squashedRow);
565             for (int row = 0; row < rowCount; row++) {
566                 Object   newSquashColumnValue = memoryResultSet.getValueAt(row, squashColumn);
567                 if (newSquashColumnValue.equals(squashColumnValue)) {
568                     // continue row
569                 } else {
570                     // start a new row
571                     squashedRow = new Object  [colCount];
572                     rowsList.add(squashedRow);
573                 }
574                 squashColumnValue = newSquashColumnValue;
575                 for (int col = 0; col < colCount; col++) {
576                     // try to find a legit value in the row
577                     Object   potentialValue = memoryResultSet.getValueAt(row, col);
578                     if (potentialValue != null && !potentialValue.toString().equals("")) { //$NON-NLS-1$
579                         // legit value
580                         squashedRow[col] = potentialValue;
581                     }
582                 }
583             }
584             Object   rows[][] = new Object  [rowsList.size()][colCount];
585             for (int i = 0; i < rowsList.size(); i++) {
586                 rows[i] = (Object  []) rowsList.get(i);
587             }
588             return MemoryResultSet.createFromArrays(colHeads, rows);
589         }
590         return flattenedResultSet;
591     }
592 
593     public static String   dump(IPentahoResultSet source) {
594         return dump(source, true);
595     }
596 
597     public static String   dump(IPentahoResultSet source, boolean useOrBar) {
598             StringBuffer   sb = new StringBuffer  ();
599             String   orBar = ""; //$NON-NLS-1$
600         if (useOrBar) {
601             orBar = "|"; //$NON-NLS-1$
602         }
603         source.beforeFirst();
604         Object  [][] columnHeaders = source.getMetaData().getColumnHeaders();
605         Object  [][] rowHeaders = source.getMetaData().getRowHeaders();
606         String   formatString = ""; //$NON-NLS-1$
607         if (rowHeaders != null) {
608             for (int columns = 0; columns < rowHeaders[0].length; columns++) {
609                 formatString = formatString + "\t\t"; //$NON-NLS-1$
610             }
611         }
612         if (columnHeaders != null) {
613             for (int row = columnHeaders.length - 1; row >= 0; row--) {
614                         sb.append(formatString);
615                 for (int column = 0; column < columnHeaders[row].length; column++) {
616                         sb.append(columnHeaders[row][column] + orBar + "\t"); //$NON-NLS-1$
617                 }
618                 sb.append('\n');
619             }
620         }
621         if (rowHeaders != null) {
622             for (int row = 0; row < rowHeaders.length; row++) {
623                 for (int column = rowHeaders[row].length - 1; column >= 0; column--) {
624                         sb.append(rowHeaders[row][column] + orBar + "\t"); //$NON-NLS-1$
625                 }
626                 Object  [] dataRow = source.next();
627                 for (int column = 0; column < dataRow.length; column++) {
628                         sb.append(dataRow[column] + orBar + "\t"); //$NON-NLS-1$
629                 }
630                 sb.append('\n');
631             }
632         } else {
633             Object  [] dataRow = source.next();
634             while (dataRow != null) {
635                 for (int column = 0; column < dataRow.length; column++) {
636                     sb.append(dataRow[column] + orBar + "\t"); //$NON-NLS-1$
637                 }
638                 sb.append('\n');
639                 dataRow = source.next();
640             }
641         }
642         sb.append('\n');
643 
644         source.beforeFirst();
645         return sb.toString();
646     }
647 
648     public static IPentahoResultSet crossTab(IPentahoResultSet source, int columnToPivot, int measureColumn, Format   pivotDataFormatter) {
649         return crossTab(source, columnToPivot, measureColumn, pivotDataFormatter, true);
650     }
651 
652     
653     /**
654      * This method takes a column of data, and turns it into multiple columns
655      * based on the values within the column. The measure column specified is
656      * then distributed among the newly created columns. Sparse data is handled
657      * by populating missing cells with nulls.
658      * 
659      * @param source
660      *            The starting IPentahoResultSet
661      * @param columnToPivot
662      *            The column that becomes multiple columns
663      * @param measureColumn
664      *            The measures column to distribute to the new columns created
665      * @param pivotDataFormatter
666      *            If the column to pivot requires formatting, this is the
667      *            formatter to use
668      * @param orderedMaps
669      *            If true, will sort the new column names alphabetically. If
670      *            false, the colums will be created in the order of appearance
671      *            in the rows
672      * @return IPentahoResultSet containing crosstabbed data.
673      * 
674      * @author mbatchelor
675      * 
676      * Assumptions/Limitations: a- This only works with one dimension going
677      * across. This won't do multi-level crosstabbing.
678      * 
679      * b- All column numbers given are assumed to be ZERO based. So, the first
680      * column is 0.
681      * 
682      * Example:
683      * 
684      * Starting Resultset ================== SaleDate|Vendor|Product|Total_Units
685      * 1-1-05 |A |Cola |12.0 1-1-05 |A |Diet |7.0 1-1-05 |B |RootBr |16.0 1-1-05
686      * |B |Diet |14.0 2-1-05 |A |Diet |8.0 2-1-05 |B |Ginger |17.0
687      * 
688      * Could Become ============ Parameters: (startingResultSet, 0, 3,
689      * simpleDateFormatObject, true)
690      * 
691      * Vendor|Product|Jan 2005|Feb 2005 A |Cola |12.0 |null A |Diet |7.0 |8.0 B
692      * |RootBr |16.0 |null B |Diet |14.0 |null B |Ginger |null |17.0
693      * 
694      * Or could become =============== Parameters: (startingResultSet, 2, 3,
695      * null, false)
696      * 
697      * SaleDate|Vendor|Cola|Diet|RootBr|Ginger 1-1-05 |A |12.0|7.0 |null |null
698      * 1-1-05 |B |null|14.0|16.0 |null 2-1-05 |A |null|8.0 |null |null 2-1-05 |B
699      * |null|null|null |17.0
700      * 
701      * 
702      */
703     public static IPentahoResultSet crossTab(IPentahoResultSet source, int columnToPivot, int measureColumn, Format   pivotDataFormatter, boolean orderedMaps) {
704         return crossTab(source, columnToPivot, measureColumn, -1, pivotDataFormatter, null, orderedMaps);
705     }
706 
707     /**
708      * This method takes a column of data, and turns it into multiple columns
709      * based on the values within the column. The measure column specified is
710      * then distributed among the newly created columns. Sparse data is handled
711      * by populating missing cells with nulls. This version of the method also
712      * takes two additional parameters - the column to sort the new columns by,
713      * and a formatter for that column.
714      * 
715      * @param source
716      *            The starting IPentahoResultSet
717      * @param columnToPivot
718      *            The column that becomes multiple columns
719      * @param measureColumn
720      *            The measures column to distribute to the new columns created
721      * @param columnToSortColumnsBy
722      *            The column to use to sort the newly created columns by
723      * @param pivotDataFormatter
724      *            If the column to pivot requires formatting, this is the
725      *            formatter to use
726      * @param sortDataFormatter
727      *            The formatter to use to convert the sort column to a string
728      * @param orderedMaps
729      *            If true, will sort the new column names alphabetically. If
730      *            false, the colums will be created in the order of appearance
731      *            in the rows.
732      * @return IPentahoResultSet containing crosstabbed data.
733      * 
734      * @author mbatchelor
735      * 
736      * Assumptions: a- This only works with one dimension going across. This
737      * won't do multi-level crosstabbing.
738      * 
739      * b- All column numbers given are assumed to be ZERO based. So, the first
740      * column is 0.
741      * 
742      * c- If a columnToSortColumnsBy column is specified (>=0), the orderedMaps
743      * flag will be set to true regardless of the passed in value.
744      * 
745      * d- For now, we assume that the column to sort by is removed from the
746      * dataset. In the future, this will not be an assumption.
747      * 
748      * TODO: Update method to make removal of sort-by column optional.
749      * 
750      * Example:
751      * 
752      * Starting Resultset
753      * ================== 
754      * Month|Vendor|Rank|Counts
755      * Jan |A-A-A |2 |92
756      * Jan |Acme  |3 |200
757      * Jan |Ajax  |4 |163
758      * Feb |Acme |3 |27
759      * Feb |Ajax |4 |102
760      * Mar |Donn |1 |427
761      * Mar |A-A-A|2 |301
762      * Mar |Acme |3 |82
763      * 
764      * Could Become 
765      * ============ 
766      * parameters: (startingResultSet, 1, 3, 2, null, decimalFormatObject, true)
767      * 
768      * Month|Donn|A-A-A|Acme|Ajax
769      * Jan |null|92 |200 |163
770      * Feb |null|null |27 |102
771      * Mar |427 |301 |82 |null
772      * 
773      * 
774      */
775     public static IPentahoResultSet crossTab(IPentahoResultSet source, int columnToPivot, int measureColumn, int columnToSortColumnsBy, Format   pivotDataFormatter, Format   sortDataFormatter, boolean orderedMaps) {
776 
777 // System.out.println("*********************Before********************");
778 // System.out.println(dump(source));
779       
780         // First, do some error checking...
781         if (source == null) {
782             throw new IllegalArgumentException  (Messages.getErrorString("PentahoDataTransmuter.ERROR_0002_NULL_DATASET")); //$NON-NLS-1$
783         }
784         int sourceColumnCount = source.getColumnCount();
785         if (columnToPivot > sourceColumnCount) {
786             throw new IllegalArgumentException  (Messages.getErrorString("PentahoDataTransmuter.ERROR_0003_INVALID_PIVOT_COLUMN")); //$NON-NLS-1$
787         }
788         if (measureColumn > sourceColumnCount) {
789             throw new IllegalArgumentException  (Messages.getErrorString("PentahoDataTransmuter.ERROR_0004_INVALID_MEASURE_COLUMN")); //$NON-NLS-1$
790         }
791         if (columnToSortColumnsBy > sourceColumnCount) {
792             throw new IllegalArgumentException  (Messages.getErrorString("PentahoDataTransmuter.ERROR_0004_INVALID_SORT_COLUMN")); //$NON-NLS-1$
793         }
794         // Now, setup so variables and such
795         final String   sortPrefixSeparator = "\t"; //$NON-NLS-1$
796         Map   rowMap = null, newHeadersMap = null;
797 
798         // Force orderedMaps to true if we're sorting using a column in the
799         // input.
800         // See assumption 'c' in the comment-block above.
801         if (columnToSortColumnsBy >= 0) {
802             orderedMaps = true;
803         }
804 
805         if (orderedMaps) {
806             // If we're using ordered maps, then our maps become TreeMaps.
807             rowMap = new TreeMap  (); // Map of the current row
808             newHeadersMap = new TreeMap  (); // New header columns map
809         } else {
810             // Use Apache ListOrderedMap so that columns become ordered by their
811             // position in the data.
812             rowMap = ListOrderedMap.decorate(new HashMap  ());
813             newHeadersMap = ListOrderedMap.decorate(new HashMap  ());
814         }
815         List   columnHeaders = new ArrayList  (); // All column headers
816         // Create column headers of the known columns
817         IPentahoMetaData origMetaData = source.getMetaData();
818         Object  [][] origColHeaders = origMetaData.getColumnHeaders();
819         for (int i = 0; i < origColHeaders[0].length; i++) {
820             if ((i != columnToPivot) && (i != measureColumn)) {
821                 columnHeaders.add(origColHeaders[0][i].toString());
822             }
823         }
824         // Now, we have the starting column headers. Time to start iterating
825         // over the data.
826         Object   colPivotData, colMeasureData, cellData, colToSortByRaw;
827         Object  [] rowData = source.next();
828         String   columnPrefix = null;
829         Map   currentMap = null;
830         while (rowData != null) {
831             colPivotData = rowData[columnToPivot]; // The data we're pivoting to columns
832             if (colPivotData == null) {
833               throw new IllegalArgumentException  (Messages.getString("PentahoDataTransmuter.ERROR_0006_CANNOT_PIVOT_NULL_DATA")); //$NON-NLS-1$
834             }
835             colMeasureData = rowData[measureColumn]; // The value data we're
836             // using as the final set.
837             if (columnToSortColumnsBy >= 0) {
838                 colToSortByRaw = rowData[columnToSortColumnsBy];
839                 if (colToSortByRaw == null) {
840                     throw new IllegalArgumentException  (Messages.getString("PentahoDataTransmuter.ERROR_0007_CANNOT_SORT_NULL_DATA")); //$NON-NLS-1$
841                 }
842                 if (sortDataFormatter != null) {
843                     columnPrefix = sortDataFormatter.format(colToSortByRaw);
844                 } else {
845                     columnPrefix = colToSortByRaw.toString();
846                 }
847             }
848             currentMap = rowMap; // Start at the top...
849             for (int i = 0; i < rowData.length; i++) {
850                 if ((i != columnToPivot) && (i != measureColumn) && (i != columnToSortColumnsBy)) {
851                     // I'm on a data row (like a column header). Find it in the
852                     // row map.
853                     cellData = currentMap.get(rowData[i]);
854                     if (cellData == null) {
855                         // Add the column to the current map of maps
856                         Map   newColumnMap = null;
857                         if (orderedMaps) {
858                             newColumnMap = new TreeMap  ();
859                         } else {
860                             newColumnMap = ListOrderedMap.decorate(new HashMap  ());
861                         }
862 
863                         currentMap.put(rowData[i], newColumnMap);
864                         currentMap = newColumnMap;
865                     } else {
866                         // Found something here - it should be a map.
867                         currentMap = (Map  ) cellData;
868                     }
869                 }
870             }
871             // Done iterating over columns creating other columns. Now, create
872             // (or locate) pivoted data as a column
873             String   formattedPivotData = null;
874             if (pivotDataFormatter != null) {
875                 if (pivotDataFormatter instanceof MessageFormat  ) {
876                     formattedPivotData = pivotDataFormatter.format(new Object  [] { colPivotData });
877                 } else {
878                     formattedPivotData = pivotDataFormatter.format(colPivotData);
879                 }
880             } else {
881                 formattedPivotData = colPivotData.toString();
882             }
883             // Do column sorting based on another input column.
884             if (columnToSortColumnsBy >= 0) {
885                 formattedPivotData = columnPrefix + sortPrefixSeparator + formattedPivotData;
886             }
887             // For this row, look for the pivoted data in newHeaders.
888             Object   header = newHeadersMap.get(formattedPivotData);
889             if (header == null) {
890                 // Create a map containing just the new column headers
891                 newHeadersMap.put(formattedPivotData, ""); //$NON-NLS-1$
892             }
893             // Put the measure data in the final spot in the map
894             currentMap.put(formattedPivotData, colMeasureData);
895             // Get next row
896             rowData = source.next();
897         }
898         // Add the new headers to the columnHeaders list
899         Iterator   hIt = newHeadersMap.keySet().iterator();
900         while (hIt.hasNext()) {
901             columnHeaders.add(hIt.next().toString());
902         }
903 
904         // Create each individual row
905         ArrayList   rows = new ArrayList  ();
906         // The uniqueItems collections allows me to handle null/missing values
907         Collection   uniqueItems = rowMap.keySet();
908         // For each unique item outer-column, iterate and create the rows
909         // recursively
910         Iterator   it = uniqueItems.iterator();
911         List   newCurRow = new ArrayList  ();
912         while (it.hasNext()) {
913             // Iterate over each unique value in the outermost map
914             recurseCreateRow(it.next(), rowMap, rows, newCurRow, newHeadersMap);
915             newCurRow.clear();
916         }
917 
918         // Now, if there was a sort-column specified, we need to remove the
919         // prefix from the
920         // column header before creating the final set of headers.
921         if (columnToSortColumnsBy >= 0) {
922             String   aHeader;
923             int tabIdx;
924             for (int i = 0; i < columnHeaders.size(); i++) {
925                 aHeader = (String  ) columnHeaders.get(i);
926                 tabIdx = aHeader.indexOf(sortPrefixSeparator);
927                 if (tabIdx >= 0) {
928                     columnHeaders.set(i, aHeader.substring(tabIdx + 1));
929                 }
930             }
931         }
932         // Create the final resultset.
933         IPentahoResultSet result = MemoryResultSet.createFromLists(columnHeaders, rows);
934 // System.out.println("*************************After***********************");
935 // System.out.println(dump(result));
936         return result;
937     }
938 
939     private static List   recurseCreateRow(Object   lookup, Map   mapToLookupIn, List   rows, List   curRow, Map   newColumnsMap) {
940       // Recursive method used to create each row.
941       if (curRow == null) {
942           // Starting off with no row, so create it.
943           curRow = new ArrayList  ();
944       }
945       // Look for the unique value in the current map.
946       Object   value = mapToLookupIn.get(lookup);
947 
948       if (value == null) {
949           // If value is null, we're at the end of the chain, and this is a
950           // missing value.
951           curRow.add(null);
952           return null;
953       } else {
954           // Value wasn't null - it must be another map to traverse, or the
955           // measure
956           if (value instanceof Map  ) {
957               // We've got a map here. Add the lookup value to the row, and
958               // iterate over the map
959               curRow.add(lookup);
960               Map   newLkupMap = (Map  ) value; // The newLkupMap is the new map
961               // to iterate over
962               Collection   uniqueItems = newLkupMap.keySet(); // Get the keys
963               // from the
964               // lookup map
965               // Is this a new column? If so, make sure to iterate over the
966               // newColumnsMap to make sure to detect null values.
967               Iterator   it = uniqueItems.iterator();
968               // Check to see if this is a new column. If so, iterate over the
969               // newColumnsMap.
970               // Otherwise, iterate over the map values.
971               Object   obj = it.next();
972               if (newColumnsMap.get(obj.toString()) != null) {
973                   it = newColumnsMap.keySet().iterator();
974               } else {
975                   it = uniqueItems.iterator();
976               }
977               // When going to a new row, set the row up to contain all the
978               // prior columns before the one we're processing.
979               List   beforeValues = new ArrayList  (curRow);
980               beforeValues.remove(beforeValues.size() - 1);
981               // When a new row is added, this is the that gets returned
982               List   addedValues = null;
983               // Iterate over either the new columns, or the existing map and
984               // repeat recursively
985               while (it.hasNext()) {
986                   // Recursively call this method to traverse all the columns
987                   addedValues = recurseCreateRow(it.next(), newLkupMap, rows, curRow, newColumnsMap);
988                   if (addedValues != null) {
989                       // We have a new current row!
990                       curRow.clear();
991                       curRow.addAll(addedValues);
992                   }
993               }
994               // If we added the values (i.e. measures), then add this row to
995               // the list, and return a
996               // new row that had all the preceeding columns in it.
997               if (addedValues == null) {
998                   if (curRow.size() > newColumnsMap.size()) {
999                       rows.add(new ArrayList  (curRow));
1000                  }
1001                  return beforeValues;
1002              } else {
1003                  // No values added - unwind the stack.
1004                  curRow.clear();
1005                  curRow.addAll(beforeValues);
1006                  return null;
1007              }
1008          } else {
1009              // We have an actual value (measure) - add it to the row, and
1010              // iterate.
1011              curRow.add(value);
1012              return null;
1013          }
1014      }
1015    }
1016    
1017    private static boolean isEqual(Object   a, Object   b) {
1018     if ( (a==null) && (b == null)) {
1019       return true;
1020     } else if ( (a == null) || (b == null) ) {
1021       return false;
1022     } else {
1023       return a.equals(b);
1024     }
1025    }
1026    
1027    private static boolean isNewRow(Object  [] thisRow, Object  [] lastRow) {
1028     for(int i=0; i<thisRow.length; i++) {
1029       if (!isEqual(thisRow[i], lastRow[i])) {
1030         return true;
1031       }
1032     }
1033     return false;
1034    }
1035    
1036    private static String   formatPivotData(Object   colPivotData, Format   pivotDataFormatter) {
1037     String   formattedPivotData = null;
1038     if (pivotDataFormatter != null) {
1039         if (pivotDataFormatter instanceof MessageFormat  ) {
1040             formattedPivotData = pivotDataFormatter.format(new Object  [] { colPivotData });
1041         } else {
1042             formattedPivotData = pivotDataFormatter.format(colPivotData);
1043         }
1044     } else {
1045         formattedPivotData = colPivotData.toString();
1046     }
1047     return formattedPivotData;
1048    }
1049
1050    /**
1051     * Marc Batchelor Diatribe
1052     * It's 6am, I've been up all night, but I gotta right this down. So, here goes.
1053     * 
1054     * This crosstab function is similar to the "old" version exception that it works in
1055     * many more cases than the old one did. The only requirement is that either the data 
1056     * is ordered by the left-over columns that will indicate that the row is a continuation
1057     * of the previous row, or the method is handed a column that will be used to find unique
1058     * rows in the result set.
1059     * 
1060     * The old crosstab function used maps for each data element of a row. This only works in
1061     * very certain conditions, but is extremely fragile. But, that approach allowed
1062     * the same row of data to be spread out all over the result set. So, with better capability
1063     * and performance comes a limitation. This can best be seen by attempting to crosstab a 
1064     * simple query from the sample data (and crosstabbing on REGION): 
1065     * 
1066     * select REGION, DEPARTMENT, SUM(ACTUAL) as ACTUAL
1067     * from quadrant_actuals
1068     * group by REGION, DEPARTMENT
1069     * order by REGION, DEPARTMENT
1070
1071     *       
1072     * REGION   DEPARTMENT            ACTUAL
1073     * Central  Executive Management  1776282
1074     * Central  Finance               3106680
1075     *        ... more rows ...
1076     * Eastern  Executive Management  1507580
1077     * Eastern  Finance               3039180
1078     *        ... more rows ...
1079     * Southern Executive Management  1507580
1080     * Southern Finance               3039180
1081     *        ... more rows ...
1082     * 
1083     * Note that when the data is ordered this way, we would have to keep
1084     * coming back to the Executive Management row multiple times to
1085     * fill in the empty holes. In the old version of the crossTab utility, it would
1086     * do this by using a complex series of maps to hold the data not being crosstabbed.
1087     * This worked OK on very simple result sets, but absolutely fell apart once
1088     * the same values appeared in the same row (like a 2 in one column and a 2 in
1089     * another column), or when null values appeared in the data.
1090     * 
1091     * Due to customer requirements, I needed to re-create the crosstab functionality,
1092     * and I chose to do it a little more sensibly. Now, instead of maps of maps, I simply
1093     * traverse the dataset twice. Once to gather all the new column headers, and then
1094     * again to fill out the data. The down side to this approach is that the above data
1095     * set could not be crosstabbed in that format. Crosstabbing the above format
1096     * would result in data that looks like this:
1097
1098     * DEPARTMENT                Central       Eastern     ....
1099     * Executive Management      1776282       ---
1100     * Finance                   3106680       ---
1101     *        ... more rows ...
1102     * Executive Management      ---           1507580
1103     * Finance                   ---           3039180
1104     *        ... more rows ...
1105     * 
1106     * A completely useless crosstab. So, with this new function, a simple change
1107     * needs to be made to the query as follows:
1108     *       
1109     * select REGION, DEPARTMENT, SUM(ACTUAL) as ACTUAL
1110     * from quadrant_actuals
1111     * group by DEPARTMENT, REGION -- Switched the order
1112     * order by DEPARTMENT, REGION -- Switched the order
1113     * 
1114     * By simply making sure that the row to be crosstabbed is not the primary
1115     * sort, the data comes out more sensibly for crosstabbing:
1116     * 
1117     * REGION   DEPARTMENT            ACTUAL
1118     * Central  Executive Management  1776282
1119     * Eastern  Executive Management  1507580
1120     *        ... more rows ...
1121     * Central  Finance               3106680
1122     * Eastern  Finance               3039180
1123     *        ... more rows ...
1124     * 
1125     * In other words, the remaining data that identifies a single row is grouped
1126     * together.
1127     * 
1128     * The other alternative is to pass in a uniqueRowIdentifierColumn. This column
1129     * will be used to determine whether the row has been seen before. If it has,
1130     * then it will grab the already written row, and update it.
1131     * 
1132     * So, in the above example, you would specify the uniqueRowIdentifierColumn as
1133     * column 1 (DEPARTMENT). Note that this doesn't perform as well since every row will 
1134     * result in a map lookup for this column. However, for XML resultsets, there may be
1135     * no other way to accomplish what you need.
1136     *       
1137     * @param source
1138     * @param columnToPivot
1139     * @param measureColumn
1140     * @param columnToSortColumnsBy
1141     * @param pivotDataFormatter
1142     * @param sortDataFormatter
1143     * @param orderedMaps
1144     * @param uniqueRowIdentifierColumn
1145     * @return
1146     */
1147    
1148    public static IPentahoResultSet crossTabOrdered(IPentahoResultSet source, int columnToPivot, int measureColumn, Format   pivotDataFormatter) {
1149      return crossTabOrdered(source, columnToPivot, measureColumn, pivotDataFormatter, true);
1150    }
1151
1152    public static IPentahoResultSet crossTabOrdered(IPentahoResultSet source, int columnToPivot, int measureColumn, Format   pivotDataFormatter, boolean orderedMaps) {
1153      return crossTabOrdered(source, columnToPivot, measureColumn, -1, pivotDataFormatter, null, orderedMaps, -1);
1154    }
1155    
1156    public static IPentahoResultSet crossTabOrdered(IPentahoResultSet source, 
1157        int columnToPivot, 
1158        int measureColumn, 
1159        int columnToSortColumnsBy, 
1160        Format   pivotDataFormatter, 
1161        Format   sortDataFormatter, 
1162        boolean orderedMaps,
1163        int uniqueRowIdentifierColumn) {
1164
1165      // System.out.println("*********************Before********************");
1166     //System.out.println(dump(source));
1167    
1168      // First, do some error checking...
1169      if (source == null) {
1170          throw new IllegalArgumentException  (Messages.getErrorString("PentahoDataTransmuter.ERROR_0002_NULL_DATASET")); //$NON-NLS-1$
1171      }
1172      int sourceColumnCount = source.getColumnCount();
1173      if (columnToPivot > sourceColumnCount) {
1174          throw new IllegalArgumentException  (Messages.getErrorString("PentahoDataTransmuter.ERROR_0003_INVALID_PIVOT_COLUMN")); //$NON-NLS-1$
1175      }
1176      if (measureColumn > sourceColumnCount) {
1177          throw new IllegalArgumentException  (Messages.getErrorString("PentahoDataTransmuter.ERROR_0004_INVALID_MEASURE_COLUMN")); //$NON-NLS-1$
1178      }
1179      if (columnToSortColumnsBy > sourceColumnCount) {
1180          throw new IllegalArgumentException  (Messages.getErrorString("PentahoDataTransmuter.ERROR_0004_INVALID_SORT_COLUMN")); //$NON-NLS-1$
1181      }
1182      if (uniqueRowIdentifierColumn > sourceColumnCount) {
1183        throw new IllegalArgumentException  (Messages.getErrorString("PentahoDataTransmuter.ERROR_0008_INVALID_UNIQUE_COLUMN")); //$NON-NLS-1$
1184      }
1185      
1186      // Now, setup so variables and such
1187      final String   sortPrefixSeparator = "\t"; //$NON-NLS-1$
1188      Map   newHeadersMap = null;
1189      
1190      Map   uniqueColumnIdentifierMap = null;
1191      
1192      if (uniqueRowIdentifierColumn >=0) {
1193        uniqueColumnIdentifierMap = new HashMap  ();
1194      }
1195
1196      int uniqueRowIdentifierColumnPostShift = -1;
1197      
1198      // Force orderedMaps to true if we're sorting using a column in the
1199      // input.
1200      // See assumption 'c' in the comment-block above.
1201      if (columnToSortColumnsBy >= 0) {
1202          orderedMaps = true;
1203      }
1204
1205      if (orderedMaps) {
1206          // If we're using ordered maps, then our maps become TreeMaps.
1207          newHeadersMap = new TreeMap  (); // New header columns map
1208      } else {
1209          // Use Apache ListOrderedMap so that columns become ordered by their
1210          // position in the data.
1211          newHeadersMap = ListOrderedMap.decorate(new HashMap  ());
1212      }
1213      List   columnHeaders = new ArrayList  (); // All column headers
1214      // Create column headers of the known columns
1215      IPentahoMetaData origMetaData = source.getMetaData();
1216      Object  [][] origColHeaders = origMetaData.getColumnHeaders();
1217
1218      for (int i = 0; i < origColHeaders[0].length; i++) {
1219          if ((i != columnToPivot) && (i != measureColumn) && ((i != columnToSortColumnsBy)) ) {
1220              columnHeaders.add(origColHeaders[0][i].toString());
1221              if (i == uniqueRowIdentifierColumn) {
1222                uniqueRowIdentifierColumnPostShift = columnHeaders.size()-1;
1223              }
1224          }
1225      }
1226      int baseColumnsCount = columnHeaders.size();
1227      // Now, we have the starting column headers. Time to start iterating
1228      // over the data.
1229      Object   colPivotData, colMeasureData, colToSortByRaw;
1230      Object  [] rowData = source.next();
1231      String   columnPrefix = null;
1232      
1233      
1234      /*
1235       * First, find out what the new columns will be - this will traverse 
1236       * the dataset gathering the unique values for the column containing
1237       * the values that will become the new columns.
1238       * 
1239       */
1240      Map   newColumnHeadersRaw = new HashMap  ();
1241      Integer   placeHolder = new Integer  (0);
1242      while (rowData != null) {
1243        colPivotData = rowData[columnToPivot]; // The data we're pivoting to columns
1244        if (colPivotData == null) {
1245          throw new IllegalArgumentException  (Messages.getString("PentahoDataTransmuter.ERROR_0006_CANNOT_PIVOT_NULL_DATA")); //$NON-NLS-1$
1246        }
1247        
1248        // newColumnHeadersRaw.add(colPivotData);
1249        if (columnToSortColumnsBy >= 0) {
1250          colToSortByRaw = rowData[columnToSortColumnsBy];
1251          if (colToSortByRaw == null) {
1252              throw new IllegalArgumentException  (Messages.getString("PentahoDataTransmuter.ERROR_0007_CANNOT_SORT_NULL_DATA")); //$NON-NLS-1$
1253          }
1254          if (sortDataFormatter != null) {
1255              columnPrefix = sortDataFormatter.format(colToSortByRaw);
1256          } else {
1257              columnPrefix = colToSortByRaw.toString();
1258          }
1259        }
1260        
1261        if (!newColumnHeadersRaw.containsKey(colPivotData)) {
1262          newColumnHeadersRaw.put(colPivotData, placeHolder);
1263          // Do column sorting based on another input column.
1264          String   formattedPivotData = formatPivotData(colPivotData, pivotDataFormatter);
1265          if (columnToSortColumnsBy >= 0) {
1266              formattedPivotData = columnPrefix + sortPrefixSeparator + formattedPivotData;
1267          }
1268          newHeadersMap.put(formattedPivotData, colPivotData );
1269        }
1270        
1271        rowData = source.next();
1272      }
1273      source.beforeFirst();
1274
1275      // Now, we have all the new headers. Next, update the rawHeaders with the 
1276      // target column number.
1277
1278      Iterator   it = newHeadersMap.entrySet().iterator();
1279      int columnIndex = columnHeaders.size(); // start adding columns where the fixed columns leave off.
1280      while (it.hasNext()) {
1281        Map.Entry   me = (Map.Entry  )it.next();
1282        newColumnHeadersRaw.put(me.getValue(), new Integer  (columnIndex));
1283        columnHeaders.add(formatPivotData(me.getValue(), pivotDataFormatter));
1284        columnIndex++;
1285      }
1286      
1287      // OK - we now know the new column headers, and the place they'll
1288      // appear in all the rows. Now, it's time to construct each row.
1289      
1290      int columnCount = columnHeaders.size();
1291      int rowPos;
1292      MemoryResultSet mrs = new MemoryResultSet();
1293      MemoryMetaData md = new MemoryMetaData(columnHeaders);
1294      mrs.setMetaData(md);
1295      Object  [] thisRow = new Object  [baseColumnsCount];
1296
1297      Object  [] currentRow = new Object  [columnCount];
1298      rowData = source.next();
1299      boolean isFirstRow = true;
1300      while (rowData != null) {
1301        colMeasureData = rowData[measureColumn]; // The value data we're
1302        rowPos = 0;
1303        for (int i = 0; i < rowData.length; i++) {
1304          if ((i != columnToPivot) && (i != measureColumn) && (i != columnToSortColumnsBy)) {
1305            // This is data - put it in the correct spot in the row
1306            thisRow[rowPos] = rowData[i];
1307            rowPos++;
1308          }
1309        }
1310        // OK - we got the base data. Is this a new row, or a continuation
1311        // of the previous row.
1312        
1313        boolean newRow = true;
1314        
1315        Object   uniqueRowIdentifierValue = null;
1316        Integer   previousRowNumber = null;
1317        
1318        // First, did they provide us with a hint.
1319        if (uniqueRowIdentifierColumn>=0) {
1320          uniqueRowIdentifierValue = rowData[uniqueRowIdentifierColumn]!=null?rowData[uniqueRowIdentifierColumn]:"_NULL_VALUE_"; //$NON-NLS-1$
1321          previousRowNumber = (Integer  )uniqueColumnIdentifierMap.get(uniqueRowIdentifierValue); 
1322          if (previousRowNumber != null) {
1323            addIfNeeded(currentRow, mrs, uniqueColumnIdentifierMap, uniqueRowIdentifierColumnPostShift);
1324            currentRow = mrs.getDataRow(previousRowNumber.intValue());
1325            newRow = false;
1326          }
1327        }
1328        
1329        newRow = (newRow && !isFirstRow && isNewRow(thisRow, currentRow));
1330        if (newRow) {
1331          addIfNeeded(currentRow, mrs, uniqueColumnIdentifierMap, uniqueRowIdentifierColumnPostShift);
1332          // Create new current row - the row inprogress.
1333          currentRow = new Object  [columnCount];
1334          // Now, copy thisRow to currentRow.
1335          System.arraycopy(thisRow, 0, currentRow, 0, thisRow.length);
1336        } else if (isFirstRow) {
1337          System.arraycopy(thisRow, 0, currentRow, 0, thisRow.length);
1338        }
1339        isFirstRow = false;
1340        colPivotData = rowData[columnToPivot]; // The data we're pivoting to columns
1341        Integer   targetColumn = (Integer  )newColumnHeadersRaw.get(colPivotData);
1342        currentRow[targetColumn.intValue()] = colMeasureData;
1343        
1344        // Get next row
1345        rowData = source.next();
1346      }
1347      addIfNeeded(currentRow, mrs, uniqueColumnIdentifierMap, uniqueRowIdentifierColumnPostShift);
1348      // System.out.println("*************************After***********************");
1349      // System.out.println(dump(mrs));
1350      return mrs;
1351    }
1352    
1353    private static void addIfNeeded(Object  [] currentRow, MemoryResultSet mrs, Map   uniqueColumnIdentifierMap, int uniqueRowIdentifierColumnPostShift) {
1354      if (uniqueRowIdentifierColumnPostShift >=0) {
1355        Object   tmpValue = currentRow[uniqueRowIdentifierColumnPostShift]!=null?currentRow[uniqueRowIdentifierColumnPostShift]:"_NULL_VALUE_"; //$NON-NLS-1$
1356        if (!uniqueColumnIdentifierMap.containsKey(tmpValue)) {
1357          int addedRow = mrs.addRow(currentRow);
1358          uniqueColumnIdentifierMap.put(tmpValue, new Integer  (addedRow));
1359        }
1360      } else {
1361        mrs.addRow(currentRow);
1362      }
1363    }
1364
1365}
1366
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags