001/*****************************************************************************
002 * Copyright by The HDF Group.                                               *
003 * Copyright by the Board of Trustees of the University of Illinois.         *
004 * All rights reserved.                                                      *
005 *                                                                           *
006 * This file is part of the HDF Java Products distribution.                  *
007 * The full copyright notice, including terms governing use, modification,   *
008 * and redistribution, is contained in the file COPYING.                     *
009 * COPYING can be found at the root of the source code distribution tree.    *
010 * If you do not have access to this file, you may request a copy from       *
011 * help@hdfgroup.org.                                                        *
012 ****************************************************************************/
013
014package hdf.object;
015
016import java.lang.reflect.Array;
017import java.util.Vector;
018
019/**
020 * The abstract class provides general APIs to create and manipulate dataset
021 * objects, and retrieve dataset properties, datatype and dimension sizes.
022 * <p>
023 * This class provides two convenient functions, read()/write(), to read/write
024 * data values. Reading/writing data may take many library calls if we use the
025 * library APIs directly. The read() and write functions hide all the details of
026 * these calls from users.
027 * <p>
028 * For more details on dataset,
029 * see <b> <a href="https://www.hdfgroup.org/HDF5/doc/UG/HDF5_Users_Guide-Responsive%20HTML5/index.html">HDF5 User's Guide</a> </b>
030 * <p>
031 *
032 * @see hdf.object.ScalarDS
033 * @see hdf.object.CompoundDS
034 *
035 * @version 1.1 9/4/2007
036 * @author Peter X. Cao
037 */
038public abstract class Dataset extends HObject {
039    private static final long serialVersionUID    = -3360885430038261178L;
040
041    private final static org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(Dataset.class);
042
043    /**
044     * The memory buffer that holds the raw data of the dataset.
045     */
046    protected Object          data;
047
048    /**
049     * The number of dimensions of the dataset.
050     */
051    protected int             rank;
052
053    /**
054     * The current dimension sizes of the dataset
055     */
056    protected long[]          dims;
057
058    /**
059     * The max dimension sizes of the dataset
060     */
061    protected long[]          maxDims;
062
063    /**
064     * Array that contains the number of data points selected (for read/write)
065     * in each dimension.
066     * <p>
067     * The selected size must be less than or equal to the current dimension size.
068     * A subset of a rectangle selection is defined by the starting position and
069     * selected sizes.
070     * <p>
071     * For example, if a 4 X 5 dataset is as follows:
072     *
073     * <pre>
074     *     0,  1,  2,  3,  4
075     *    10, 11, 12, 13, 14
076     *    20, 21, 22, 23, 24
077     *    30, 31, 32, 33, 34
078     * long[] dims = {4, 5};
079     * long[] startDims = {1, 2};
080     * long[] selectedDims = {3, 3};
081     * then the following subset is selected by the startDims and selectedDims above:
082     *     12, 13, 14
083     *     22, 23, 24
084     *     32, 33, 34
085     * </pre>
086     */
087    protected long[]          selectedDims;
088
089    /**
090     * The starting position of each dimension of a selected subset. With both
091     * the starting position and selected sizes, the subset of a rectangle
092     * selection is fully defined.
093     */
094    protected long[]          startDims;
095
096    /**
097     * Array that contains the indices of the dimensions selected for display.
098     * <p>
099     * <B>selectedIndex[] is provided for two purposes:</B>
100     * <OL>
101     * <LI>
102     * selectedIndex[] is used to indicate the order of dimensions for display,
103     * i.e. selectedIndex[0] = row, selectedIndex[1] = column and
104     * selectedIndex[2] = depth. For example, for a four dimension dataset, if
105     * selectedIndex[] is {1, 2, 3}, then dim[1] is selected as row index,
106     * dim[2] is selected as column index and dim[3] is selected as depth index.
107     * <LI>
108     * selectedIndex[] is also used to select dimensions for display for
109     * datasets with three or more dimensions. We assume that applications such
110     * as HDFView can only display data up to three dimensions (a 2D
111     * spreadsheet/image with a third dimension that the 2D spreadsheet/image is
112     * cut from). For datasets with more than three dimensions, we need
113     * selectedIndex[] to store which three dimensions are chosen for display.
114     * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3},
115     * then dim[1] is selected as row index, dim[2] is selected as column index
116     * and dim[3] is selected as depth index. dim[0] is not selected. Its
117     * location is fixed at 0 by default.
118     * </OL>
119     */
120    protected final int[]     selectedIndex;
121
122    /**
123     * The number of elements to move from the start location in each dimension.
124     * For example, if selectedStride[0] = 2, every other data point is selected
125     * along dim[0].
126     */
127    protected long[]          selectedStride;
128
129    /**
130     * The array of dimension sizes for a chunk.
131     */
132    protected long[]          chunkSize;
133
134    /** The compression information. */
135    protected String          compression;
136    public final static String          compression_gzip_txt = "GZIP: level = ";
137
138    /** The filters information. */
139    protected String          filters;
140
141    /** The storage layout information. */
142    protected String          storage_layout;
143
144    /** The storage information. */
145    protected String          storage;
146
147    /** The datatype object of the dataset. */
148    protected Datatype        datatype;
149
150    /**
151     * Array of strings that represent the dimension names. It is null if
152     * dimension names do not exist.
153     */
154    protected String[]        dimNames;
155
156    /** Flag to indicate if the byte[] array is converted to strings */
157    protected boolean         convertByteToString = true;
158
159    /** Flag to indicate if data values are loaded into memory. */
160    protected boolean         isDataLoaded        = false;
161
162    /** The number of data points in the memory buffer. */
163    protected long            nPoints             = 1;
164
165    /**
166     * The data buffer that contains the raw data directly reading from file
167     * (before any data conversion).
168     */
169    protected Object          originalBuf         = null;
170
171    /**
172     * The array that holds the converted data of unsigned C-type integers.
173     * <p>
174     * For example, Suppose that the original data is an array of unsigned
175     * 16-bit short integers. Since Java does not support unsigned integer, the
176     * data is converted to an array of 32-bit singed integer. In that case, the
177     * converted buffer is the array of 32-bit singed integer.
178     */
179    protected Object          convertedBuf        = null;
180
181    /**
182     * Flag to indicate if the enum data is converted to strings.
183     */
184    protected boolean         enumConverted       = false;
185
186    /**
187     * Constructs a Dataset object with a given file, name and path.
188     *
189     * @param theFile
190     *            the file that contains the dataset.
191     * @param name
192     *            the name of the Dataset, e.g. "dset1".
193     * @param path
194     *            the full group path of this Dataset, e.g. "/arrays/".
195     */
196    public Dataset(FileFormat theFile, String name, String path) {
197        this(theFile, name, path, null);
198    }
199
200    /**
201     * @deprecated Not for public use in the future. <br>
202     *             Using {@link #Dataset(FileFormat, String, String)}
203     *
204     * @param theFile
205     *            the file that contains the dataset.
206     * @param name
207     *            the name of the Dataset, e.g. "dset1".
208     * @param path
209     *            the full group path of this Dataset, e.g. "/arrays/".
210     * @param oid
211     *            the oid of this Dataset.
212     */
213    @Deprecated
214    public Dataset(FileFormat theFile, String name, String path, long[] oid) {
215        super(theFile, name, path, oid);
216
217        rank = 0;
218        data = null;
219        dims = null;
220        maxDims = null;
221        selectedDims = null;
222        startDims = null;
223        selectedStride = null;
224        chunkSize = null;
225        compression = "NONE";
226        filters = "NONE";
227        storage = "NONE";
228        dimNames = null;
229
230        selectedIndex = new int[3];
231        selectedIndex[0] = 0;
232        selectedIndex[1] = 1;
233        selectedIndex[2] = 2;
234    }
235
236    /**
237     * Clears memory held by the dataset, such as the data buffer.
238     */
239    @SuppressWarnings("rawtypes")
240    public void clear() {
241        if (data != null) {
242            if (data instanceof Vector) {
243                ((Vector) data).setSize(0);
244            }
245            data = null;
246            originalBuf = null;
247            convertedBuf = null;
248        }
249        isDataLoaded = false;
250    }
251
252    /**
253     * Retrieves datatype and dataspace information from file and sets the
254     * dataset in memory.
255     * <p>
256     * The init() is designed to support lazy operation in a dataset object. When
257     * a data object is retrieved from file, the datatype, dataspace and raw
258     * data are not loaded into memory. When it is asked to read the raw data
259     * from file, init() is first called to get the datatype and dataspace
260     * information, then load the raw data from file.
261     * <p>
262     * init() is also used to reset the selection of a dataset (start, stride and
263     * count) to the default, which is the entire dataset for 1D or 2D datasets.
264     * In the following example, init() at step 1) retrieves datatype and
265     * dataspace information from file. getData() at step 3) reads only one data
266     * point. init() at step 4) resets the selection to the whole dataset.
267     * getData() at step 4) reads the values of whole dataset into memory.
268     *
269     * <pre>
270     * dset = (Dataset) file.get(NAME_DATASET);
271     *
272     * // 1) get datatype and dataspace information from file
273     * dset.init();
274     * rank = dset.getRank(); // rank = 2, a 2D dataset
275     * count = dset.getSelectedDims();
276     * start = dset.getStartDims();
277     * dims = dset.getDims();
278     *
279     * // 2) select only one data point
280     * for (int i = 0; i &lt; rank; i++) {
281     *     start[0] = 0;
282     *     count[i] = 1;
283     * }
284     *
285     * // 3) read one data point
286     * data = dset.getData();
287     *
288     * // 4) reset selection to the whole dataset
289     * dset.init();
290     *
291     * // 5) clean the memory data buffer
292     * dset.clearData();
293     *
294     * // 6) Read the whole dataset
295     * data = dset.getData();
296     * </pre>
297     */
298    public abstract void init();
299
300    /**
301     * Returns the rank (number of dimensions) of the dataset.
302     *
303     * @return the number of dimensions of the dataset.
304     */
305    public final int getRank() {
306        if (rank < 0) init();
307
308        return rank;
309    }
310
311    /**
312     * Returns the array that contains the dimension sizes of the dataset.
313     *
314     * @return the dimension sizes of the dataset.
315     */
316    public final long[] getDims() {
317        if (rank < 0) init();
318
319        return dims;
320    }
321
322    /**
323     * Returns the array that contains the max dimension sizes of the dataset.
324     *
325     * @return the max dimension sizes of the dataset.
326     */
327    public final long[] getMaxDims() {
328        if (rank < 0) init();
329
330        if (maxDims == null) return dims;
331
332        return maxDims;
333    }
334
335    /**
336     * Returns the dimension sizes of the selected subset.
337     * <p>
338     * The SelectedDims is the number of data points of the selected subset.
339     * Applications can use this array to change the size of selected subset.
340     *
341     * The selected size must be less than or equal to the current dimension size.
342     * Combined with the starting position, selected sizes and stride, the
343     * subset of a rectangle selection is fully defined.
344     * <p>
345     * For example, if a 4 X 5 dataset is as follows:
346     *
347     * <pre>
348     *     0,  1,  2,  3,  4
349     *    10, 11, 12, 13, 14
350     *    20, 21, 22, 23, 24
351     *    30, 31, 32, 33, 34
352     * long[] dims = {4, 5};
353     * long[] startDims = {1, 2};
354     * long[] selectedDims = {3, 3};
355     * long[] selectedStride = {1, 1};
356     * then the following subset is selected by the startDims and selectedDims
357     *     12, 13, 14
358     *     22, 23, 24
359     *     32, 33, 34
360     * </pre>
361     *
362     * @return the dimension sizes of the selected subset.
363     */
364    public final long[] getSelectedDims() {
365        if (rank < 0) init();
366
367        return selectedDims;
368    }
369
370    /**
371     * Returns the starting position of a selected subset.
372     * <p>
373     * Applications can use this array to change the starting position of a
374     * selection. Combined with the selected dimensions, selected sizes and
375     * stride, the subset of a rectangle selection is fully defined.
376     * <p>
377     * For example, if a 4 X 5 dataset is as follows:
378     *
379     * <pre>
380     *     0,  1,  2,  3,  4
381     *    10, 11, 12, 13, 14
382     *    20, 21, 22, 23, 24
383     *    30, 31, 32, 33, 34
384     * long[] dims = {4, 5};
385     * long[] startDims = {1, 2};
386     * long[] selectedDims = {3, 3};
387     * long[] selectedStride = {1, 1};
388     * then the following subset is selected by the startDims and selectedDims
389     *     12, 13, 14
390     *     22, 23, 24
391     *     32, 33, 34
392     * </pre>
393     *
394     * @return the starting position of a selected subset.
395     */
396    public final long[] getStartDims() {
397        if (rank < 0) init();
398
399        return startDims;
400    }
401
402    /**
403     * Returns the selectedStride of the selected dataset.
404     * <p>
405     * Applications can use this array to change how many elements to move in
406     * each dimension.
407     *
408     * Combined with the starting position and selected sizes, the subset of a
409     * rectangle selection is defined.
410     * <p>
411     * For example, if a 4 X 5 dataset is as follows:
412     *
413     * <pre>
414     *     0,  1,  2,  3,  4
415     *    10, 11, 12, 13, 14
416     *    20, 21, 22, 23, 24
417     *    30, 31, 32, 33, 34
418     * long[] dims = {4, 5};
419     * long[] startDims = {0, 0};
420     * long[] selectedDims = {2, 2};
421     * long[] selectedStride = {2, 3};
422     * then the following subset is selected by the startDims and selectedDims
423     *     0,   3
424     *     20, 23
425     * </pre>
426     *
427     * @return the selectedStride of the selected dataset.
428     */
429    public final long[] getStride() {
430        if (rank < 0) init();
431
432        if (rank <= 0) {
433            return null;
434        }
435
436        if (selectedStride == null) {
437            selectedStride = new long[rank];
438            for (int i = 0; i < rank; i++) {
439                selectedStride[i] = 1;
440            }
441        }
442
443        return selectedStride;
444    }
445
446    /**
447     * Sets the flag that indicates if a byte array is converted to a string
448     * array.
449     * <p>
450     * In a string dataset, the raw data from file is stored in a byte array. By
451     * default, this byte array is converted to an array of strings. For a large
452     * dataset (e.g. more than one million strings), the conversion takes a long
453     * time and requires a lot of memory space to store the strings. In some
454     * applications, such a conversion can be delayed. For example, A GUI
455     * application may convert only the part of the strings that is visible to the
456     * users, not the entire data array.
457     * <p>
458     * setConvertByteToString(boolean b) allows users to set the flag so that
459     * applications can choose to perform the byte-to-string conversion or not.
460     * If the flag is set to false, the getData() returns an array of byte
461     * instead of an array of strings.
462     *
463     * @param b
464     *            convert bytes to strings if b is true; otherwise, if false, do
465     *            not convert bytes to strings.
466     */
467    public final void setConvertByteToString(boolean b) {
468        convertByteToString = b;
469    }
470
471    /**
472     * Returns the flag that indicates if a byte array is converted to a string
473     * array.
474     *
475     * @return true if byte array is converted to string; otherwise, returns
476     *         false if there is no conversion.
477     */
478    public final boolean getConvertByteToString() {
479        return convertByteToString;
480    }
481
482    /**
483     * Reads the data from file.
484     * <p>
485     * read() reads the data from file to a memory buffer and returns the memory
486     * buffer. The dataset object does not hold the memory buffer. To store the
487     * memory buffer in the dataset object, one must call getData().
488     * <p>
489     * By default, the whole dataset is read into memory. Users can also select
490     * a subset to read. Subsetting is done in an implicit way.
491     * <p>
492     * <b>How to Select a Subset</b>
493     * <p>
494     * A selection is specified by three arrays: start, stride and count.
495     * <ol>
496     * <li>start: offset of a selection
497     * <li>stride: determines how many elements to move in each dimension
498     * <li>count: number of elements to select in each dimension
499     * </ol>
500     * getStartDims(), getStride() and getSelectedDims() returns the start,
501     * stride and count arrays respectively. Applications can make a selection
502     * by changing the values of the arrays.
503     * <p>
504     * The following example shows how to make a subset. In the example, the
505     * dataset is a 4-dimensional array of [200][100][50][10], i.e. dims[0]=200;
506     * dims[1]=100; dims[2]=50; dims[3]=10; <br>
507     * We want to select every other data point in dims[1] and dims[2]
508     *
509     * <pre>
510     * int rank = dataset.getRank(); // number of dimensions of the dataset
511     * long[] dims = dataset.getDims(); // the dimension sizes of the dataset
512     * long[] selected = dataset.getSelectedDims(); // the selected size of the dataset
513     * long[] start = dataset.getStartDims(); // the offset of the selection
514     * long[] stride = dataset.getStride(); // the stride of the dataset
515     * int[] selectedIndex = dataset.getSelectedIndex(); // the selected dimensions for display
516     *
517     * // select dim1 and dim2 as 2D data for display, and slice through dim0
518     * selectedIndex[0] = 1;
519     * selectedIndex[1] = 2;
520     * selectedIndex[1] = 0;
521     *
522     * // reset the selection arrays
523     * for (int i = 0; i &lt; rank; i++) {
524     *     start[i] = 0;
525     *     selected[i] = 1;
526     *     stride[i] = 1;
527     * }
528     *
529     * // set stride to 2 on dim1 and dim2 so that every other data point is
530     * // selected.
531     * stride[1] = 2;
532     * stride[2] = 2;
533     *
534     * // set the selection size of dim1 and dim2
535     * selected[1] = dims[1] / stride[1];
536     * selected[2] = dims[1] / stride[2];
537     *
538     * // when dataset.getData() is called, the selection above will be used since
539     * // the dimension arrays are passed by reference. Changes of these arrays
540     * // outside the dataset object directly change the values of these array
541     * // in the dataset object.
542     * </pre>
543     * <p>
544     * For ScalarDS, the memory data buffer is a one-dimensional array of byte,
545     * short, int, float, double or String type based on the datatype of the
546     * dataset.
547     * <p>
548     * For CompoundDS, the memory data object is an java.util.List object. Each
549     * element of the list is a data array that corresponds to a compound field.
550     * <p>
551     * For example, if compound dataset "comp" has the following nested
552     * structure, and member datatypes
553     *
554     * <pre>
555     * comp --&gt; m01 (int)
556     * comp --&gt; m02 (float)
557     * comp --&gt; nest1 --&gt; m11 (char)
558     * comp --&gt; nest1 --&gt; m12 (String)
559     * comp --&gt; nest1 --&gt; nest2 --&gt; m21 (long)
560     * comp --&gt; nest1 --&gt; nest2 --&gt; m22 (double)
561     * </pre>
562     *
563     * getData() returns a list of six arrays: {int[], float[], char[],
564     * String[], long[] and double[]}.
565     *
566     * @return the data read from file.
567     *
568     * @see #getData()
569     *
570     * @throws Exception if object can not be read
571     * @throws OutOfMemoryError if memory is exhausted
572     */
573    public abstract Object read() throws Exception, OutOfMemoryError;
574
575    /**
576     * Reads the raw data of the dataset from file to a byte array.
577     * <p>
578     * readBytes() reads raw data to an array of bytes instead of array of its
579     * datatype. For example, for a one-dimension 32-bit integer dataset of
580     * size 5, readBytes() returns a byte array of size 20 instead of an
581     * int array of 5.
582     * <p>
583     * readBytes() can be used to copy data from one dataset to another
584     * efficiently because the raw data is not converted to its native type, it
585     * saves memory space and CPU time.
586     *
587     * @return the byte array of the raw data.
588     *
589     * @throws Exception if data can not be read
590     */
591    public abstract byte[] readBytes() throws Exception;
592
593    /**
594     * Writes a memory buffer to the dataset in file.
595     *
596     * @param buf
597     *            the data to write
598     *
599     * @throws Exception if data can not be written
600     */
601    public abstract void write(Object buf) throws Exception;
602
603    /**
604     * Writes the memory buffer of this dataset to file.
605     *
606     * @throws Exception if buffer can not be written
607     */
608    public final void write() throws Exception {
609        if (data != null) {
610            write(data);
611        }
612    }
613
614    /**
615     * Creates a new dataset and writes the data buffer to the new dataset.
616     * <p>
617     * This function allows applications to create a new dataset for a given
618     * data buffer. For example, users can select a specific interesting part
619     * from a large image and create a new image with the selection.
620     * <p>
621     * The new dataset retains the datatype and dataset creation properties of
622     * this dataset.
623     *
624     * @param pgroup
625     *            the group which the dataset is copied to.
626     * @param name
627     *            the name of the new dataset.
628     * @param dims
629     *            the dimension sizes of the the new dataset.
630     * @param data
631     *            the data values of the subset to be copied.
632     *
633     * @return the new dataset.
634     *
635     * @throws Exception if dataset can not be copied
636     */
637    public abstract Dataset copy(Group pgroup, String name, long[] dims, Object data) throws Exception;
638
639    /**
640     * Returns the datatype object of the dataset.
641     *
642     * @return the datatype object of the dataset.
643     */
644    public abstract Datatype getDatatype();
645
646    /**
647     * Returns the data buffer of the dataset in memory.
648     * <p>
649     * If data is already loaded into memory, returns the data; otherwise, calls
650     * read() to read data from file into a memory buffer and returns the memory
651     * buffer.
652     * <p>
653     * By default, the whole dataset is read into memory. Users can also select
654     * a subset to read. Subsetting is done in an implicit way.
655     * <p>
656     * <b>How to Select a Subset</b>
657     * <p>
658     * A selection is specified by three arrays: start, stride and count.
659     * <ol>
660     * <li>start: offset of a selection
661     * <li>stride: determines how many elements to move in each dimension
662     * <li>count: number of elements to select in each dimension
663     * </ol>
664     * getStartDims(), getStride() and getSelectedDims() returns the start,
665     * stride and count arrays respectively. Applications can make a selection
666     * by changing the values of the arrays.
667     * <p>
668     * The following example shows how to make a subset. In the example, the
669     * dataset is a 4-dimensional array of [200][100][50][10], i.e. dims[0]=200;
670     * dims[1]=100; dims[2]=50; dims[3]=10; <br>
671     * We want to select every other data point in dims[1] and dims[2]
672     *
673     * <pre>
674     * int rank = dataset.getRank(); // number of dimensions of the dataset
675     * long[] dims = dataset.getDims(); // the dimension sizes of the dataset
676     * long[] selected = dataset.getSelectedDims(); // the selected size of the dataet
677     * long[] start = dataset.getStartDims(); // the offset of the selection
678     * long[] stride = dataset.getStride(); // the stride of the dataset
679     * int[] selectedIndex = dataset.getSelectedIndex(); // the selected dimensions for display
680     *
681     * // select dim1 and dim2 as 2D data for display,and slice through dim0
682     * selectedIndex[0] = 1;
683     * selectedIndex[1] = 2;
684     * selectedIndex[1] = 0;
685     *
686     * // reset the selection arrays
687     * for (int i = 0; i &lt; rank; i++) {
688     *     start[i] = 0;
689     *     selected[i] = 1;
690     *     stride[i] = 1;
691     * }
692     *
693     * // set stride to 2 on dim1 and dim2 so that every other data point is
694     * // selected.
695     * stride[1] = 2;
696     * stride[2] = 2;
697     *
698     * // set the selection size of dim1 and dim2
699     * selected[1] = dims[1] / stride[1];
700     * selected[2] = dims[1] / stride[2];
701     *
702     * // when dataset.getData() is called, the selection above will be used since
703     * // the dimension arrays are passed by reference. Changes of these arrays
704     * // outside the dataset object directly change the values of these array
705     * // in the dataset object.
706     * </pre>
707     * <p>
708     * For ScalarDS, the memory data buffer is a one-dimensional array of byte,
709     * short, int, float, double or String type based on the datatype of the
710     * dataset.
711     * <p>
712     * For CompoundDS, the memory data object is an java.util.List object. Each
713     * element of the list is a data array that corresponds to a compound field.
714     * <p>
715     * For example, if compound dataset "comp" has the following nested
716     * structure, and member datatypes
717     *
718     * <pre>
719     * comp --&gt; m01 (int)
720     * comp --&gt; m02 (float)
721     * comp --&gt; nest1 --&gt; m11 (char)
722     * comp --&gt; nest1 --&gt; m12 (String)
723     * comp --&gt; nest1 --&gt; nest2 --&gt; m21 (long)
724     * comp --&gt; nest1 --&gt; nest2 --&gt; m22 (double)
725     * </pre>
726     *
727     * getData() returns a list of six arrays: {int[], float[], char[],
728     * String[], long[] and double[]}.
729     *
730     * @return the memory buffer of the dataset.
731     *
732     * @throws Exception if object can not be read
733     * @throws OutOfMemoryError if memory is exhausted
734     */
735    public final Object getData() throws Exception, OutOfMemoryError {
736        if (!isDataLoaded) {
737            log.trace("getData: read");
738            data = read(); // load the data;
739            originalBuf = data;
740            isDataLoaded = true;
741            nPoints = 1;
742            log.trace("getData: selectedDims length={}",selectedDims.length);
743            for (int j = 0; j < selectedDims.length; j++) {
744                nPoints *= selectedDims[j];
745            }
746            log.trace("getData: read {}", nPoints);
747        }
748
749        return data;
750    }
751
752    /**
753     * @deprecated Not for public use in the future.
754     *             <p>
755     *             setData() is not safe to use because it changes memory buffer
756     *             of the dataset object. Dataset operations such as write/read
757     *             will fail if the buffer type or size is changed.
758     *
759     * @param d  the object data
760     */
761    @Deprecated
762    public final void setData(Object d) {
763        data = d;
764    }
765
766    /**
767     * Clears the current data buffer in memory and forces the next read() to load
768     * the data from file.
769     * <p>
770     * The function read() loads data from file into memory only if the data is
771     * not read. If data is already in memory, read() just returns the memory
772     * buffer. Sometimes we want to force read() to re-read data from file. For
773     * example, when the selection is changed, we need to re-read the data.
774     *
775     * @see #getData()
776     * @see #read()
777     */
778    public void clearData() {
779        isDataLoaded = false;
780    }
781
782    /**
783     * Returns the dimension size of the vertical axis.
784     *
785     * <p>
786     * This function is used by GUI applications such as HDFView. GUI
787     * applications display a dataset in a 2D table or 2D image. The display
788     * order is specified by the index array of selectedIndex as follow:
789     * <dl>
790     * <dt>selectedIndex[0] -- height</dt>
791     * <dd>The vertical axis</dd>
792     * <dt>selectedIndex[1] -- width</dt>
793     * <dd>The horizontal axis</dd>
794     * <dt>selectedIndex[2] -- depth</dt>
795     * <dd>The depth axis is used for 3 or more dimensional datasets.</dd>
796     * </dl>
797     * Applications can use getSelectedIndex() to access and change the display
798     * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the
799     * following code will set the height=200 and width=50.
800     *
801     * <pre>
802     * int[] selectedIndex = dataset.getSelectedIndex();
803     * selectedIndex[0] = 0;
804     * selectedIndex[1] = 1;
805     * </pre>
806     *
807     * @see #getSelectedIndex()
808     * @see #getWidth()
809     *
810     * @return the size of dimension of the vertical axis.
811     */
812    public final int getHeight() {
813        if (rank < 0) init();
814
815        if ((selectedDims == null) || (selectedIndex == null)) {
816            return 0;
817        }
818
819        return (int) selectedDims[selectedIndex[0]];
820    }
821
822    /**
823     * Returns the dimension size of the horizontal axis.
824     *
825     * <p>
826     * This function is used by GUI applications such as HDFView. GUI
827     * applications display a dataset in 2D Table or 2D Image. The display order is
828     * specified by the index array of selectedIndex as follow:
829     * <dl>
830     * <dt>selectedIndex[0] -- height</dt>
831     * <dd>The vertical axis</dd>
832     * <dt>selectedIndex[1] -- width</dt>
833     * <dd>The horizontal axis</dd>
834     * <dt>selectedIndex[2] -- depth</dt>
835     * <dd>The depth axis, which is used for 3 or more dimension datasets.</dd>
836     * </dl>
837     * Applications can use getSelectedIndex() to access and change the display
838     * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the
839     * following code will set the height=200 and width=100.
840     *
841     * <pre>
842     * int[] selectedIndex = dataset.getSelectedIndex();
843     * selectedIndex[0] = 0;
844     * selectedIndex[1] = 1;
845     * </pre>
846     *
847     * @see #getSelectedIndex()
848     * @see #getHeight()
849     *
850     * @return the size of dimension of the horizontal axis.
851     */
852    public final int getWidth() {
853        if (rank < 0) init();
854
855        if ((selectedDims == null) || (selectedIndex == null)) {
856            return 0;
857        }
858
859        if ((selectedDims.length < 2) || (selectedIndex.length < 2)) {
860            return 1;
861        }
862
863        return (int) selectedDims[selectedIndex[1]];
864    }
865
866    /**
867     * Returns the indices of display order.
868     * <p>
869     *
870     * selectedIndex[] is provided for two purposes:
871     * <OL>
872     * <LI>
873     * selectedIndex[] is used to indicate the order of dimensions for display.
874     * selectedIndex[0] is for the row, selectedIndex[1] is for the column and
875     * selectedIndex[2] for the depth.
876     * <p>
877     * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3},
878     * then dim[1] is selected as row index, dim[2] is selected as column index
879     * and dim[3] is selected as depth index.
880     * <LI>
881     * selectedIndex[] is also used to select dimensions for display for
882     * datasets with three or more dimensions. We assume that applications such
883     * as HDFView can only display data values up to three dimensions (2D
884     * spreadsheet/image with a third dimension which the 2D spreadsheet/image
885     * is selected from). For datasets with more than three dimensions, we need
886     * selectedIndex[] to tell applications which three dimensions are chosen
887     * for display. <br>
888     * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3},
889     * then dim[1] is selected as row index, dim[2] is selected as column index
890     * and dim[3] is selected as depth index. dim[0] is not selected. Its
891     * location is fixed at 0 by default.
892     * </OL>
893     *
894     * @return the array of the indices of display order.
895     */
896    public final int[] getSelectedIndex() {
897        if (rank < 0) init();
898
899        return selectedIndex;
900    }
901
902    /**
903     * Returns the string representation of compression information.
904     * <p>
905     * For example,
906     * "SZIP: Pixels per block = 8: H5Z_FILTER_CONFIG_DECODE_ENABLED".
907     *
908     * @return the string representation of compression information.
909     */
910    public final String getCompression() {
911        if (rank < 0) init();
912
913        return compression;
914    }
915
916    /**
917     * Returns the string representation of filter information.
918     *
919     * @return the string representation of filter information.
920     */
921    public final String getFilters() {
922        if (rank < 0) init();
923
924        return filters;
925    }
926
927    /**
928     * Returns the string representation of storage layout information.
929     *
930     * @return the string representation of storage layout information.
931     */
932    public final String getStorageLayout() {
933        if (rank < 0) init();
934
935        return storage_layout;
936    }
937
938    /**
939     * Returns the string representation of storage information.
940     *
941     * @return the string representation of storage information.
942     */
943    public final String getStorage() {
944        if (rank < 0) init();
945
946        return storage;
947    }
948
949    /**
950     * Returns the array that contains the dimension sizes of the chunk of the
951     * dataset. Returns null if the dataset is not chunked.
952     *
953     * @return the array of chunk sizes or returns null if the dataset is not
954     *         chunked.
955     */
956    public final long[] getChunkSize() {
957        if (rank < 0) init();
958
959        return chunkSize;
960    }
961
962    /**
963     * @deprecated Not for public use in the future. <br>
964     *             Using {@link #convertFromUnsignedC(Object, Object)}
965     *
966     * @param data_in  the object data
967     *
968     * @return the converted object
969     */
970    @Deprecated
971    public static Object convertFromUnsignedC(Object data_in) {
972        return Dataset.convertFromUnsignedC(data_in, null);
973    }
974
975    /**
976     * Converts one-dimension array of unsigned C-type integers to a new array
977     * of appropriate Java integer in memory.
978     * <p>
979     * Since Java does not support unsigned integer, values of unsigned C-type
980     * integers must be converted into its appropriate Java integer. Otherwise,
981     * the data value will not displayed correctly. For example, if an unsigned
982     * C byte, x = 200, is stored into an Java byte y, y will be -56 instead of
983     * the correct value of 200.
984     * <p>
985     * Unsigned C integers are upgrade to Java integers according to the
986     * following table:
987     * <TABLE CELLSPACING=0 BORDER=1 CELLPADDING=5 WIDTH=400>
988     * <caption><b>Mapping Unsigned C Integers to Java Integers</b></caption>
989     * <TR>
990     * <TD><B>Unsigned C Integer</B></TD>
991     * <TD><B>JAVA Intege</B>r</TD>
992     * </TR>
993     * <TR>
994     * <TD>unsigned byte</TD>
995     * <TD>signed short</TD>
996     * </TR>
997     * <TR>
998     * <TD>unsigned short</TD>
999     * <TD>signed int</TD>
1000     * </TR>
1001     * <TR>
1002     * <TD>unsigned int</TD>
1003     * <TD>signed long</TD>
1004     * </TR>
1005     * <TR>
1006     * <TD>unsigned long</TD>
1007     * <TD>signed long</TD>
1008     * </TR>
1009     * </TABLE>
1010     * <strong>NOTE: this conversion cannot deal with unsigned 64-bit integers.
1011     * Therefore, the values of unsigned 64-bit datasets may be wrong in Java
1012     * applications</strong>.
1013     * <p>
1014     * If memory data of unsigned integers is converted by
1015     * convertFromUnsignedC(), convertToUnsignedC() must be called to convert
1016     * the data back to unsigned C before data is written into file.
1017     *
1018     * @see #convertToUnsignedC(Object, Object)
1019     *
1020     * @param data_in
1021     *            the input 1D array of the unsigned C-type integers.
1022     * @param data_out
1023     *            the output converted (or upgraded) 1D array of Java integers.
1024     *
1025     * @return the upgraded 1D array of Java integers.
1026     */
1027    @SuppressWarnings("rawtypes")
1028    public static Object convertFromUnsignedC(Object data_in, Object data_out) {
1029        log.trace("convertFromUnsignedC(): start");
1030
1031        if (data_in == null) {
1032            log.debug("convertFromUnsignedC(): data_in is null");
1033            log.trace("convertFromUnsignedC(): finish");
1034            return null;
1035        }
1036
1037        Class data_class = data_in.getClass();
1038        if (!data_class.isArray()) {
1039            log.debug("convertFromUnsignedC(): data_in not an array");
1040            log.trace("convertFromUnsignedC(): finish");
1041            return null;
1042        }
1043
1044        if (data_out != null) {
1045            Class data_class_out = data_out.getClass();
1046            if (!data_class_out.isArray() || (Array.getLength(data_in) != Array.getLength(data_out))) {
1047                log.debug("convertFromUnsignedC(): data_out not an array or does not match data_in size");
1048                data_out = null;
1049            }
1050        }
1051
1052        String cname = data_class.getName();
1053        char dname = cname.charAt(cname.lastIndexOf("[") + 1);
1054        int size = Array.getLength(data_in);
1055        log.trace("convertFromUnsignedC(): cname={} dname={} size={}", cname, dname, size);
1056
1057        if (dname == 'B') {
1058            short[] sdata = null;
1059            if (data_out == null) {
1060                sdata = new short[size];
1061            }
1062            else {
1063                sdata = (short[]) data_out;
1064            }
1065
1066            byte[] bdata = (byte[]) data_in;
1067            for (int i = 0; i < size; i++) {
1068                sdata[i] = (short) ((bdata[i] + 256) & 0xFF);
1069            }
1070
1071            data_out = sdata;
1072        }
1073        else if (dname == 'S') {
1074            int[] idata = null;
1075            if (data_out == null) {
1076                idata = new int[size];
1077            }
1078            else {
1079                idata = (int[]) data_out;
1080            }
1081
1082            short[] sdata = (short[]) data_in;
1083            for (int i = 0; i < size; i++) {
1084                idata[i] = (sdata[i] + 65536) & 0xFFFF;
1085            }
1086
1087            data_out = idata;
1088        }
1089        else if (dname == 'I') {
1090            long[] ldata = null;
1091            if (data_out == null) {
1092                ldata = new long[size];
1093            }
1094            else {
1095                ldata = (long[]) data_out;
1096            }
1097
1098            int[] idata = (int[]) data_in;
1099            for (int i = 0; i < size; i++) {
1100                ldata[i] = (idata[i] + 4294967296L) & 0xFFFFFFFFL;
1101            }
1102
1103            data_out = ldata;
1104        }
1105        else {
1106            data_out = data_in;
1107            log.debug("convertFromUnsignedC(): Java does not support unsigned long");
1108        }
1109
1110        return data_out;
1111    }
1112
1113    /**
1114     * @deprecated Not for public use in the future. <br>
1115     *             Using {@link #convertToUnsignedC(Object, Object)}
1116     *
1117     * @param data_in
1118     *            the input 1D array of the unsigned C-type integers.
1119     *
1120     * @return the upgraded 1D array of Java integers.
1121     */
1122    @Deprecated
1123    public static Object convertToUnsignedC(Object data_in) {
1124        return Dataset.convertToUnsignedC(data_in, null);
1125    }
1126
1127    /**
1128     * Converts the array of converted unsigned integers back to unsigned C-type
1129     * integer data in memory.
1130     * <p>
1131     * If memory data of unsigned integers is converted by
1132     * convertFromUnsignedC(), convertToUnsignedC() must be called to convert
1133     * the data back to unsigned C before data is written into file.
1134     *
1135     * @see #convertFromUnsignedC(Object, Object)
1136     *
1137     * @param data_in
1138     *            the input array of the Java integer.
1139     * @param data_out
1140     *            the output array of the unsigned C-type integer.
1141     *
1142     * @return the converted data of unsigned C-type integer array.
1143     */
1144    @SuppressWarnings("rawtypes")
1145    public static Object convertToUnsignedC(Object data_in, Object data_out) {
1146        log.trace("convertToUnsignedC(): start");
1147
1148        if (data_in == null) {
1149            log.debug("convertToUnsignedC(): data_in is null");
1150            log.trace("convertToUnsignedC(): finish");
1151            return null;
1152        }
1153
1154        Class data_class = data_in.getClass();
1155        if (!data_class.isArray()) {
1156            log.debug("convertToUnsignedC(): data_in not an array");
1157            log.trace("convertToUnsignedC(): finish");
1158            return null;
1159        }
1160
1161        if (data_out != null) {
1162            Class data_class_out = data_out.getClass();
1163            if (!data_class_out.isArray() || (Array.getLength(data_in) != Array.getLength(data_out))) {
1164                log.debug("convertToUnsignedC(): data_out not an array or does not match data_in size");
1165                data_out = null;
1166            }
1167        }
1168
1169        String cname = data_class.getName();
1170        char dname = cname.charAt(cname.lastIndexOf("[") + 1);
1171        int size = Array.getLength(data_in);
1172        log.trace("convertToUnsignedC(): cname={} dname={} size={}", cname, dname, size);
1173
1174        if (dname == 'S') {
1175            byte[] bdata = null;
1176            if (data_out == null) {
1177                bdata = new byte[size];
1178            }
1179            else {
1180                bdata = (byte[]) data_out;
1181            }
1182            short[] sdata = (short[]) data_in;
1183            for (int i = 0; i < size; i++) {
1184                bdata[i] = (byte) sdata[i];
1185            }
1186            data_out = bdata;
1187        }
1188        else if (dname == 'I') {
1189            short[] sdata = null;
1190            if (data_out == null) {
1191                sdata = new short[size];
1192            }
1193            else {
1194                sdata = (short[]) data_out;
1195            }
1196            int[] idata = (int[]) data_in;
1197            for (int i = 0; i < size; i++) {
1198                sdata[i] = (short) idata[i];
1199            }
1200            data_out = sdata;
1201        }
1202        else if (dname == 'J') {
1203            int[] idata = null;
1204            if (data_out == null) {
1205                idata = new int[size];
1206            }
1207            else {
1208                idata = (int[]) data_out;
1209            }
1210            long[] ldata = (long[]) data_in;
1211            for (int i = 0; i < size; i++) {
1212                idata[i] = (int) ldata[i];
1213            }
1214            data_out = idata;
1215        }
1216        else {
1217            data_out = data_in;
1218            log.debug("convertToUnsignedC(): Java does not support unsigned long");
1219        }
1220
1221        return data_out;
1222    }
1223
1224    /**
1225     * Converts an array of bytes into an array of Strings for a fixed string
1226     * dataset.
1227     * <p>
1228     * A C-string is an array of chars while an Java String is an object. When a
1229     * string dataset is read into a Java application, the data is stored in an
1230     * array of Java bytes. byteToString() is used to convert the array of bytes
1231     * into an array of Java strings so that applications can display and modify
1232     * the data content.
1233     * <p>
1234     * For example, the content of a two element C string dataset is {"ABC",
1235     * "abc"}. Java applications will read the data into a byte array of {65,
1236     * 66, 67, 97, 98, 99). byteToString(bytes, 3) returns an array of Java
1237     * String of strs[0]="ABC", and strs[1]="abc".
1238     * <p>
1239     * If memory data of strings is converted to Java Strings, stringToByte()
1240     * must be called to convert the memory data back to byte array before data
1241     * is written to file.
1242     *
1243     * @see #stringToByte(String[], int)
1244     *
1245     * @param bytes
1246     *            the array of bytes to convert.
1247     * @param length
1248     *            the length of string.
1249     *
1250     * @return the array of Java String.
1251     */
1252    public static final String[] byteToString(byte[] bytes, int length) {
1253        log.trace("byteToString(): start");
1254
1255        if (bytes == null) {
1256            log.debug("byteToString(): input is null");
1257            log.trace("byteToString(): finish");
1258            return null;
1259        }
1260
1261        int n = bytes.length / length;
1262        log.trace("byteToString(): n={} from length of {}", n, length);
1263        // String bigstr = new String(bytes);
1264        String[] strArray = new String[n];
1265        String str = null;
1266        int idx = 0;
1267        for (int i = 0; i < n; i++) {
1268            str = new String(bytes, i * length, length);
1269            // bigstr.substring uses less memory space
1270            // NOTE: bigstr does not work on linux if bytes.length is very large
1271            // see bug 1091
1272            // offset = i*length;
1273            // str = bigstr.substring(offset, offset+length);
1274
1275            idx = str.indexOf('\0');
1276            if (idx > 0) {
1277                str = str.substring(0, idx);
1278            }
1279
1280            // trim only the end
1281            int end = str.length();
1282            while (end > 0 && str.charAt(end - 1) <= '\u0020')
1283                end--;
1284
1285            strArray[i] = (end <= 0) ? "" : str.substring(0, end);
1286
1287            // trim both start and end
1288            // strArray[i] = str.trim();
1289        }
1290
1291        log.trace("byteToString(): finish");
1292        return strArray;
1293    }
1294
1295    /**
1296     * Converts a string array into an array of bytes for a fixed string
1297     * dataset.
1298     * <p>
1299     * If memory data of strings is converted to Java Strings, stringToByte()
1300     * must be called to convert the memory data back to byte array before data
1301     * is written to file.
1302     *
1303     * @see #byteToString(byte[] bytes, int length)
1304     *
1305     * @param strings
1306     *            the array of string.
1307     * @param length
1308     *            the length of string.
1309     *
1310     * @return the array of bytes.
1311     */
1312    public static final byte[] stringToByte(String[] strings, int length) {
1313        log.trace("stringToByte(): start");
1314
1315        if (strings == null) {
1316            log.debug("stringToByte(): input is null");
1317            log.trace("stringToByte(): finish");
1318            return null;
1319        }
1320
1321        int size = strings.length;
1322        byte[] bytes = new byte[size * length];
1323        log.trace("stringToByte(): size={} length={}", size, length);
1324        StringBuffer strBuff = new StringBuffer(length);
1325        for (int i = 0; i < size; i++) {
1326            // initialize the string with spaces
1327            strBuff.replace(0, length, " ");
1328
1329            if (strings[i] != null) {
1330                if (strings[i].length() > length) {
1331                    strings[i] = strings[i].substring(0, length);
1332                }
1333                strBuff.replace(0, length, strings[i]);
1334            }
1335
1336            strBuff.setLength(length);
1337            System.arraycopy(strBuff.toString().getBytes(), 0, bytes, length * i, length);
1338        }
1339
1340        log.trace("stringToByte(): finish");
1341
1342        return bytes;
1343    }
1344
1345    /**
1346     * Returns the array of strings that represent the dimension names. Returns
1347     * null if there is no dimension name.
1348     * <p>
1349     * Some datasets have pre-defined names for each dimension such as
1350     * "Latitude" and "Longitude". getDimNames() returns these pre-defined
1351     * names.
1352     *
1353     * @return the names of dimensions, or null if there is no dimension name.
1354     */
1355    public final String[] getDimNames() {
1356        if (rank < 0) init();
1357
1358        return dimNames;
1359    }
1360
1361    /**
1362     * Checks if a given datatype is a string. Sub-classes must replace this
1363     * default implementation.
1364     *
1365     * @param tid
1366     *            The data type identifier.
1367     *
1368     * @return true if the datatype is a string; otherwise returns false.
1369     */
1370    public boolean isString(int tid) {
1371        return false;
1372    }
1373
1374    /**
1375     * Returns the size in bytes of a given datatype. Sub-classes must replace
1376     * this default implementation.
1377     *
1378     * @param tid
1379     *            The data type identifier.
1380     *
1381     * @return The size of the datatype
1382     */
1383    public int getSize(int tid) {
1384        return -1;
1385    }
1386
1387    /**
1388     * Get Class of the original data buffer if converted.
1389     *
1390     * @return the Class of originalBuf
1391     */
1392    @SuppressWarnings("rawtypes")
1393    public final Class getOriginalClass() {
1394        return originalBuf.getClass();
1395    }
1396
1397    /**
1398     * Get flag that indicate if enum data is converted to strings.
1399     *
1400     * @return the enumConverted
1401     */
1402    public boolean isEnumConverted() {
1403        return enumConverted;
1404    }
1405
1406    /**
1407     * Set flag that indicate if enum data is converted to strings.
1408     *
1409     * @param b
1410     *            the enumConverted to set
1411     */
1412    public void setEnumConverted(boolean b) {
1413        if (enumConverted != b) {
1414            originalBuf = convertedBuf = null;
1415            this.clearData();
1416        }
1417
1418        enumConverted = b;
1419    }
1420}