001/*****************************************************************************
002 * Copyright by The HDF Group.                                               *
003 * Copyright by the Board of Trustees of the University of Illinois.         *
004 * All rights reserved.                                                      *
005 *                                                                           *
006 * This file is part of the HDF Java Products distribution.                  *
007 * The full copyright notice, including terms governing use, modification,   *
008 * and redistribution, is contained in the files COPYING and Copyright.html. *
009 * COPYING can be found at the root of the source code distribution tree.    *
010 * Or, see https://support.hdfgroup.org/products/licenses.html               *
011 * If you do not have access to either file, you may request a copy from     *
012 * help@hdfgroup.org.                                                        *
013 ****************************************************************************/
014
015package hdf.object;
016
017import java.lang.reflect.Array;
018import java.math.BigDecimal;
019import java.math.BigInteger;
020import java.text.DecimalFormat;
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.Iterator;
024import java.util.List;
025import java.util.Vector;
026
027import hdf.hdf5lib.HDFNativeData;
028
029import hdf.object.Attribute;
030import hdf.object.CompoundDS;
031import hdf.object.Dataset;
032import hdf.object.Datatype;
033import hdf.object.FileFormat;
034import hdf.object.Group;
035import hdf.object.HObject;
036import hdf.object.Utils;
037
038/**
039 * A CompoundDS is a dataset with compound datatype.
040 *
041 * A compound datatype is an aggregation of one or more datatypes. Each member
042 * of a compound type has a name which is unique within that type, and a
043 * datatype of that member in a compound datum. Compound datatypes can be nested,
044 * i.e. members of a compound datatype can be some other compound datatype.
045 *
046 * For more details on compound datatypes,
047 * see <b> <a href="https://support.hdfgroup.org/HDF5/doc/UG/HDF5_Users_Guide-Responsive%20HTML5/index.html">HDF5 User's Guide</a> </b>
048 *
049 * Since Java cannot handle C-structured compound data, data in a compound dataset
050 * is loaded in to an Java List. Each element of the list is a data array that
051 * corresponds to a compound field. The data is read/written by compound field.
052 *
053 * For example, if compound dataset "comp" has the following nested structure,
054 * and member datatypes
055 *
056 * <pre>
057 * comp --&gt; m01 (int)
058 * comp --&gt; m02 (float)
059 * comp --&gt; nest1 --&gt; m11 (char)
060 * comp --&gt; nest1 --&gt; m12 (String)
061 * comp --&gt; nest1 --&gt; nest2 --&gt; m21 (long)
062 * comp --&gt; nest1 --&gt; nest2 --&gt; m22 (double)
063 * </pre>
064 *
065 * The data object is a Java list of six arrays: {int[], float[], char[],
066 * Stirng[], long[] and double[]}.
067 *
068 *
069 * @version 1.1 9/4/2007
070 * @author Peter X. Cao
071 */
072public abstract class CompoundDS extends Dataset implements CompoundDataFormat
073{
074    private static final long serialVersionUID = -4880399929644095662L;
075
076    private static final org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(CompoundDS.class);
077
078    /**
079     * A single character to separate the names of nested compound fields. An
080     * extended ASCII character, 0x95, is used to avoid common characters in
081     * compound names.
082     */
083    public static final String SEPARATOR = "\u0095";
084
085    /**
086     * The number of members of the compound dataset.
087     */
088    protected int numberOfMembers;
089
090    /**
091     * The names of members of the compound dataset.
092     */
093    protected String[] memberNames;
094
095    /**
096     * Returns array containing the total number of elements of the members of
097     * this compound dataset.
098     *
099     * For example, a compound dataset COMP has members of A, B and C as
100     *
101     * <pre>
102     *     COMP {
103     *         int A;
104     *         float B[5];
105     *         double C[2][3];
106     *     }
107     * </pre>
108     *
109     * memberOrders is an integer array of {1, 5, 6} to indicate that member A
110     * has one element, member B has 5 elements, and member C has 6 elements.
111     */
112    protected int[] memberOrders;
113
114    /**
115     * The dimension sizes of each member.
116     *
117     * The i-th element of the Object[] is an integer array (int[]) that
118     * contains the dimension sizes of the i-th member.
119     */
120    protected transient Object[] memberDims;
121
122    /**
123     * The datatypes of compound members.
124     */
125    protected Datatype[] memberTypes;
126
127    /**
128     * The array to store flags to indicate if a member of this compound
129     * dataset is selected for read/write.
130     *
131     * If a member is selected, the read/write will perform on the member.
132     * Applications such as HDFView will only display the selected members of
133     * the compound dataset.
134     *
135     * <pre>
136     * For example, if a compound dataset has four members
137     *     String[] memberNames = {"X", "Y", "Z", "TIME"};
138     * and
139     *     boolean[] isMemberSelected = {true, false, false, true};
140     * members "X" and "TIME" are selected for read and write.
141     * </pre>
142     */
143    protected boolean[] isMemberSelected;
144
145    /**
146     * A list of names of all fields including nested fields.
147     *
148     * The nested names are separated by CompoundDS.SEPARATOR. For example, if compound dataset "A" has
149     * the following nested structure,
150     *
151     * <pre>
152     * A --&gt; m01
153     * A --&gt; m02
154     * A --&gt; nest1 --&gt; m11
155     * A --&gt; nest1 --&gt; m12
156     * A --&gt; nest1 --&gt; nest2 --&gt; m21
157     * A --&gt; nest1 --&gt; nest2 --&gt; m22
158     * i.e.
159     * A = { m01, m02, nest1{m11, m12, nest2{ m21, m22}}}
160     * </pre>
161     *
162     * The flatNameList of compound dataset "A" will be {m01, m02, nest1[m11, nest1[m12,
163     * nest1[nest2[m21, nest1[nest2[m22}
164     *
165     */
166    protected List<String> flatNameList;
167
168    /**
169     * A list of datatypes of all fields including nested fields.
170     */
171    protected List<Datatype> flatTypeList;
172
173    /**
174     * Constructs a CompoundDS object with the given file, dataset name and path.
175     *
176     * The dataset object represents an existing dataset in the file. For
177     * example, new CompoundDS(file, "dset1", "/g0/") constructs a dataset
178     * object that corresponds to the dataset, "dset1", at group "/g0/".
179     *
180     * This object is usually constructed at FileFormat.open(), which loads the
181     * file structure and object information into memory. It is rarely used
182     * elsewhere.
183     *
184     * @param theFile
185     *            the file that contains the data object.
186     * @param theName
187     *            the name of the data object, e.g. "dset".
188     * @param thePath
189     *            the full path of the data object, e.g. "/arrays/".
190     */
191    public CompoundDS(FileFormat theFile, String theName, String thePath) {
192        this(theFile, theName, thePath, null);
193    }
194
195    /**
196     * @deprecated Not for public use in the future.<br>
197     *             Using {@link #CompoundDS(FileFormat, String, String)}
198     *
199     * @param theFile
200     *            the file that contains the data object.
201     * @param dsName
202     *            the name of the data object, e.g. "dset".
203     * @param dsPath
204     *            the full path of the data object, e.g. "/arrays/".
205     * @param oid
206     *            the oid of the data object.
207     */
208    @Deprecated
209    public CompoundDS(FileFormat theFile, String dsName, String dsPath, long[] oid) {
210        super(theFile, dsName, dsPath, oid);
211
212        numberOfMembers = 0;
213        memberNames = null;
214        isMemberSelected = null;
215        memberTypes = null;
216    }
217
218    /**
219     * Resets selection of dataspace
220     */
221    @Override
222    protected void resetSelection() {
223        super.resetSelection();
224        setAllMemberSelection(true);
225    }
226
227    /**
228     * Returns the number of members of the compound dataset.
229     *
230     * @return the number of members of the compound dataset.
231     */
232    @Override
233    public final int getMemberCount() {
234        return numberOfMembers;
235    }
236
237    /**
238     * Returns the number of selected members of the compound dataset.
239     *
240     * Selected members are the compound fields which are selected for
241     * read/write.
242     *
243     * For example, in a compound datatype of {int A, float B, char[] C},
244     * users can choose to retrieve only {A, C} from the dataset. In this
245     * case, getSelectedMemberCount() returns two.
246     *
247     * @return the number of selected members.
248     */
249    @Override
250    public final int getSelectedMemberCount() {
251        int count = 0;
252
253        if (isMemberSelected != null) {
254            for (int i = 0; i < isMemberSelected.length; i++) {
255                if (isMemberSelected[i])
256                    count++;
257            }
258        }
259        log.trace("count of selected members={}", count);
260
261        return count;
262    }
263
264    /**
265     * Returns the names of the members of the compound dataset. The names of
266     * compound members are stored in an array of Strings.
267     *
268     * For example, for a compound datatype of {int A, float B, char[] C}
269     * getMemberNames() returns ["A", "B", "C"}.
270     *
271     * @return the names of compound members.
272     */
273    @Override
274    public final String[] getMemberNames() {
275        return memberNames;
276    }
277
278    /**
279     * Returns an array of the names of the selected members of the compound dataset.
280     *
281     * @return an array of the names of the selected members of the compound dataset.
282     */
283    public final String[] getSelectedMemberNames() {
284        if (isMemberSelected == null) {
285            log.debug("getSelectedMemberNames(): isMemberSelected array is null");
286            return memberNames;
287        }
288
289        int idx = 0;
290        String[] names = new String[getSelectedMemberCount()];
291        for (int i = 0; i < isMemberSelected.length; i++) {
292            if (isMemberSelected[i])
293                names[idx++] = memberNames[i];
294        }
295
296        return names;
297    }
298
299    /**
300     * Checks if a member of the compound dataset is selected for read/write.
301     *
302     * @param idx
303     *            the index of compound member.
304     *
305     * @return true if the i-th memeber is selected; otherwise returns false.
306     */
307    @Override
308    public final boolean isMemberSelected(int idx) {
309        if ((isMemberSelected != null) && (isMemberSelected.length > idx))
310            return isMemberSelected[idx];
311        else
312            return false;
313    }
314
315    /**
316     * Selects the i-th member for read/write.
317     *
318     * @param idx
319     *            the index of compound member.
320     */
321    @Override
322    public final void selectMember(int idx) {
323        if ((isMemberSelected != null) && (isMemberSelected.length > idx))
324            isMemberSelected[idx] = true;
325    }
326
327    /**
328     * Selects/deselects all members.
329     *
330     * @param selectAll
331     *            The indicator to select or deselect all members. If true, all
332     *            members are selected for read/write. If false, no member is
333     *            selected for read/write.
334     */
335    @Override
336    public final void setAllMemberSelection(boolean selectAll) {
337        if (isMemberSelected == null)
338            return;
339
340        for (int i = 0; i < isMemberSelected.length; i++)
341            isMemberSelected[i] = selectAll;
342    }
343
344    /**
345     * Returns array containing the total number of elements of the members of
346     * the compound dataset.
347     *
348     * For example, a compound dataset COMP has members of A, B and C as
349     *
350     * <pre>
351     *     COMP {
352     *         int A;
353     *         float B[5];
354     *         double C[2][3];
355     *     }
356     * </pre>
357     *
358     * getMemberOrders() will return an integer array of {1, 5, 6} to indicate
359     * that member A has one element, member B has 5 elements, and member C has
360     * 6 elements.
361     *
362     * @return the array containing the total number of elements of the members
363     *         of compound.
364     */
365    @Override
366    public final int[] getMemberOrders() {
367        return memberOrders;
368    }
369
370    /**
371     * Returns array containing the total number of elements of the selected
372     * members of the compound dataset.
373     *
374     * For example, a compound dataset COMP has members of A, B and C as
375     *
376     * <pre>
377     *     COMP {
378     *         int A;
379     *         float B[5];
380     *         double C[2][3];
381     *     }
382     * </pre>
383     *
384     * If A and B are selected, getSelectedMemberOrders() returns an array of
385     * {1, 5}
386     *
387     * @return array containing the total number of elements of the selected
388     *         members of compound.
389     */
390    @Override
391    public final int[] getSelectedMemberOrders() {
392        if (isMemberSelected == null) {
393            log.debug("getSelectedMemberOrders(): isMemberSelected array is null");
394            return memberOrders;
395        }
396
397        int idx = 0;
398        int[] orders = new int[getSelectedMemberCount()];
399        for (int i = 0; i < isMemberSelected.length; i++) {
400            if (isMemberSelected[i])
401                orders[idx++] = memberOrders[i];
402        }
403
404        return orders;
405    }
406
407    /**
408     * Returns the dimension sizes of the i-th member.
409     *
410     * For example, a compound dataset COMP has members of A, B and C as
411     *
412     * <pre>
413     *     COMP {
414     *         int A;
415     *         float B[5];
416     *         double C[2][3];
417     *     }
418     * </pre>
419     *
420     * getMemberDims(2) returns an array of {2, 3}, while getMemberDims(1)
421     * returns an array of {5}, and getMemberDims(0) returns null.
422     *
423     * @param i  the i-th member
424     *
425     * @return the dimension sizes of the i-th member, null if the compound
426     *         member is not an array.
427     */
428    @Override
429    public final int[] getMemberDims(int i) {
430        if (memberDims == null) {
431            return null;
432        }
433        return (int[]) memberDims[i];
434    }
435
436    /**
437     * Returns an array of datatype objects of compound members.
438     *
439     * Each member of a compound dataset has its own datatype. The datatype of a
440     * member can be atomic or other compound datatype (nested compound).
441     * Sub-classes set up the datatype objects at init().
442     *
443     * @return the array of datatype objects of the compound members.
444     */
445    @Override
446    public final Datatype[] getMemberTypes() {
447        return memberTypes;
448    }
449
450    /**
451     * Returns an array of datatype objects of selected compound members.
452     *
453     * @return an array of datatype objects of selected compound members.
454     */
455    @Override
456    public final Datatype[] getSelectedMemberTypes() {
457        if (isMemberSelected == null) {
458            log.debug("getSelectedMemberTypes(): isMemberSelected array is null");
459            return memberTypes;
460        }
461
462        int idx = 0;
463        Datatype[] types = new Datatype[getSelectedMemberCount()];
464        for (int i = 0; i < isMemberSelected.length; i++) {
465            if (isMemberSelected[i])
466                types[idx++] = memberTypes[i];
467        }
468
469        return types;
470    }
471
472    /**
473     * Returns the fill values for the data object.
474     *
475     * @return the fill values for the data object.
476     */
477    @Override
478    public Object getFillValue() {
479        return null;
480    }
481
482    /**
483     * @deprecated Not implemented for compound dataset.
484     */
485    @Deprecated
486    @Override
487    public Dataset copy(Group pgroup, String name, long[] dims, Object data) throws Exception {
488        throw new UnsupportedOperationException(
489                "Writing a subset of a compound dataset to a new dataset is not implemented.");
490    }
491
492    /**
493     * Routine to convert datatypes that are read in as byte arrays to
494     * regular types.
495     *
496     * @param dtype
497     *        the datatype to convert to
498     * @param byteData
499     *        the bytes to convert
500     *
501     * @return the converted object
502     */
503    protected Object convertByteMember(final Datatype dtype, byte[] byteData) {
504        Object theObj = null;
505
506        if (dtype.getDatatypeSize() == 1) {
507            /*
508             * Normal byte[] type, such as an integer datatype of size 1.
509             */
510            theObj = byteData;
511        }
512        else if (dtype.isString() && !dtype.isVarStr() && convertByteToString) {
513            log.trace("convertByteMember(): converting byte array to string array");
514
515            theObj = byteToString(byteData, (int) dtype.getDatatypeSize());
516        }
517        else if (dtype.isInteger()) {
518            log.trace("convertByteMember(): converting byte array to integer array");
519
520            switch ((int)dtype.getDatatypeSize()) {
521            case 1:
522                /*
523                 * Normal byte[] type, such as an integer datatype of size 1.
524                 */
525                theObj = byteData;
526                break;
527            case 2:
528                theObj = HDFNativeData.byteToShort(byteData);
529                break;
530            case 4:
531                theObj = HDFNativeData.byteToInt(byteData);
532                break;
533            case 8:
534                theObj = HDFNativeData.byteToLong(byteData);
535                break;
536            default:
537                log.debug("convertByteMember(): invalid datatype size");
538                theObj = new String("*ERROR*");
539                break;
540            }
541        }
542        else if (dtype.isFloat()) {
543            log.trace("convertByteMember(): converting byte array to float array");
544
545            if (dtype.getDatatypeSize() == 8)
546                theObj = HDFNativeData.byteToDouble(byteData);
547            else
548                theObj = HDFNativeData.byteToFloat(byteData);
549        }
550        else if (dtype.isArray()) {
551            Datatype baseType = dtype.getDatatypeBase();
552
553            /*
554             * Retrieve the real base datatype in the case of ARRAY of ARRAY datatypes.
555             */
556            while (baseType.isArray())
557                baseType = baseType.getDatatypeBase();
558
559            /*
560             * Optimize for the common cases of Arrays.
561             */
562            switch (baseType.getDatatypeClass()) {
563                case Datatype.CLASS_INTEGER:
564                case Datatype.CLASS_FLOAT:
565                case Datatype.CLASS_CHAR:
566                case Datatype.CLASS_STRING:
567                case Datatype.CLASS_BITFIELD:
568                case Datatype.CLASS_OPAQUE:
569                case Datatype.CLASS_COMPOUND:
570                case Datatype.CLASS_REFERENCE:
571                case Datatype.CLASS_ENUM:
572                case Datatype.CLASS_VLEN:
573                case Datatype.CLASS_TIME:
574                    theObj = convertByteMember(baseType, byteData);
575                    break;
576
577                case Datatype.CLASS_ARRAY:
578                {
579                    Datatype arrayType = dtype.getDatatypeBase();
580
581                    long[] arrayDims = dtype.getArrayDims();
582                    int arrSize = 1;
583                    for (int i = 0; i < arrayDims.length; i++)
584                        arrSize *= arrayDims[i];
585
586                    theObj = new Object[arrSize];
587
588                    for (int i = 0; i < arrSize; i++) {
589                        byte[] indexedBytes = Arrays.copyOfRange(byteData, (int) (i * arrayType.getDatatypeSize()),
590                                (int) ((i + 1) * arrayType.getDatatypeSize()));
591                        ((Object[]) theObj)[i] = convertByteMember(arrayType, indexedBytes);
592                    }
593
594                    break;
595                }
596
597                case Datatype.CLASS_NO_CLASS:
598                default:
599                    log.debug("convertByteMember(): invalid datatype class");
600                    theObj = new String("*ERROR*");
601            }
602        }
603        else if (dtype.isCompound()) {
604            /*
605             * TODO: still valid after reading change?
606             */
607            theObj = convertCompoundByteMembers(dtype, byteData);
608        }
609        else {
610            theObj = byteData;
611        }
612
613        return theObj;
614    }
615
616    /**
617     * Given an array of bytes representing a compound Datatype, converts each of
618     * its members into Objects and returns the results.
619     *
620     * @param dtype
621     *            The compound datatype to convert
622     * @param data
623     *            The byte array representing the data of the compound Datatype
624     * @return The converted types of the bytes
625     */
626    protected Object convertCompoundByteMembers(final Datatype dtype, byte[] data) {
627        List<Object> theData = null;
628
629        List<Datatype> allSelectedTypes = Arrays.asList(this.getSelectedMemberTypes());
630        List<Datatype> localTypes = new ArrayList<>(dtype.getCompoundMemberTypes());
631        Iterator<Datatype> localIt = localTypes.iterator();
632        while (localIt.hasNext()) {
633            Datatype curType = localIt.next();
634
635            if (curType.isCompound())
636                continue;
637
638            if (!allSelectedTypes.contains(curType))
639                localIt.remove();
640        }
641
642        theData = new ArrayList<>(localTypes.size());
643        for (int i = 0, index = 0; i < localTypes.size(); i++) {
644            Datatype curType = localTypes.get(i);
645
646            if (curType.isCompound())
647                theData.add(convertCompoundByteMembers(curType,
648                        Arrays.copyOfRange(data, index, index + (int) curType.getDatatypeSize())));
649            else
650                theData.add(convertByteMember(curType,
651                        Arrays.copyOfRange(data, index, index + (int) curType.getDatatypeSize())));
652
653            index += curType.getDatatypeSize();
654        }
655
656        return theData;
657    }
658}