001/*****************************************************************************
002 * Copyright by The HDF Group.                                               *
003 * Copyright by the Board of Trustees of the University of Illinois.         *
004 * All rights reserved.                                                      *
005 *                                                                           *
006 * This file is part of the HDF Java Products distribution.                  *
007 * The full copyright notice, including terms governing use, modification,   *
008 * and redistribution, is contained in the COPYING file, which can be found  *
009 * at the root of the source code distribution tree,                         *
010 * or in https://www.hdfgroup.org/licenses.                                  *
011 * If you do not have access to either file, you may request a copy from     *
012 * help@hdfgroup.org.                                                        *
013 ****************************************************************************/
014
015package hdf.object;
016
017import java.lang.reflect.Array;
018import java.math.BigDecimal;
019import java.math.BigInteger;
020import java.text.DecimalFormat;
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.Iterator;
024import java.util.List;
025import java.util.Vector;
026
027import hdf.object.Attribute;
028import hdf.object.CompoundDS;
029import hdf.object.Dataset;
030import hdf.object.Datatype;
031import hdf.object.FileFormat;
032import hdf.object.Group;
033import hdf.object.HObject;
034import hdf.object.Utils;
035
036import hdf.hdf5lib.HDFNativeData;
037
038import org.slf4j.Logger;
039import org.slf4j.LoggerFactory;
040
041/**
042 * A CompoundDS is a dataset with compound datatype.
043 *
044 * A compound datatype is an aggregation of one or more datatypes. Each member of a compound type has a name
045 * which is unique within that type, and a datatype of that member in a compound datum. Compound datatypes can
046 * be nested, i.e. members of a compound datatype can be some other compound datatype.
047 *
048 * For more details on compound datatypes, See
049 * <a href="https://hdfgroup.github.io/hdf5/_h5_t__u_g.html#sec_datatype">HDF5 Datatypes in HDF5 User
050 * Guide</a>
051 *
052 * Since Java cannot handle C-structured compound data, data in a compound dataset is loaded in to an Java
053 * List. Each element of the list is a data array that corresponds to a compound field. The data is
054 * read/written by compound field.
055 *
056 * For example, if compound dataset "comp" has the following nested structure, and member datatypes
057 *
058 * <pre>
059 * comp --&gt; m01 (int)
060 * comp --&gt; m02 (float)
061 * comp --&gt; nest1 --&gt; m11 (char)
062 * comp --&gt; nest1 --&gt; m12 (String)
063 * comp --&gt; nest1 --&gt; nest2 --&gt; m21 (long)
064 * comp --&gt; nest1 --&gt; nest2 --&gt; m22 (double)
065 * </pre>
066 *
067 * The data object is a Java list of six arrays: {int[], float[], char[], Stirng[], long[] and double[]}.
068 *
069 *
070 * @version 1.1 9/4/2007
071 * @author Peter X. Cao
072 */
073public abstract class CompoundDS extends Dataset implements CompoundDataFormat {
074    private static final long serialVersionUID = -4880399929644095662L;
075
076    private static final Logger log = LoggerFactory.getLogger(CompoundDS.class);
077
078    /**
079     * A single character to separate the names of nested compound fields. An
080     * extended ASCII character, 0x95, is used to avoid common characters in
081     * compound names.
082     */
083    public static final String SEPARATOR = "\u0095";
084
085    /**
086     * The number of members of the compound dataset.
087     */
088    protected int numberOfMembers;
089
090    /**
091     * The names of members of the compound dataset.
092     */
093    protected String[] memberNames;
094
095    /**
096     * Returns array containing the total number of elements of the members of
097     * this compound dataset.
098     *
099     * For example, a compound dataset COMP has members of A, B and C as
100     *
101     * <pre>
102     *     COMP {
103     *         int A;
104     *         float B[5];
105     *         double C[2][3];
106     *     }
107     * </pre>
108     *
109     * memberOrders is an integer array of {1, 5, 6} to indicate that member A
110     * has one element, member B has 5 elements, and member C has 6 elements.
111     */
112    protected int[] memberOrders;
113
114    /**
115     * The dimension sizes of each member.
116     *
117     * The i-th element of the Object[] is an integer array (int[]) that
118     * contains the dimension sizes of the i-th member.
119     */
120    protected transient Object[] memberDims;
121
122    /**
123     * The datatypes of compound members.
124     */
125    protected Datatype[] memberTypes;
126
127    /**
128     * The array to store flags to indicate if a member of this compound
129     * dataset is selected for read/write.
130     *
131     * If a member is selected, the read/write will perform on the member.
132     * Applications such as HDFView will only display the selected members of
133     * the compound dataset.
134     *
135     * <pre>
136     * For example, if a compound dataset has four members
137     *     String[] memberNames = {"X", "Y", "Z", "TIME"};
138     * and
139     *     boolean[] isMemberSelected = {true, false, false, true};
140     * members "X" and "TIME" are selected for read and write.
141     * </pre>
142     */
143    protected boolean[] isMemberSelected;
144
145    /**
146     * A list of names of all fields including nested fields.
147     *
148     * The nested names are separated by CompoundDS.SEPARATOR. For example, if compound dataset "A" has
149     * the following nested structure,
150     *
151     * <pre>
152     * A --&gt; m01
153     * A --&gt; m02
154     * A --&gt; nest1 --&gt; m11
155     * A --&gt; nest1 --&gt; m12
156     * A --&gt; nest1 --&gt; nest2 --&gt; m21
157     * A --&gt; nest1 --&gt; nest2 --&gt; m22
158     * i.e.
159     * A = { m01, m02, nest1{m11, m12, nest2{ m21, m22}}}
160     * </pre>
161     *
162     * The flatNameList of compound dataset "A" will be {m01, m02, nest1[m11, nest1[m12,
163     * nest1[nest2[m21, nest1[nest2[m22}
164     *
165     */
166    protected List<String> flatNameList;
167
168    /**
169     * A list of datatypes of all fields including nested fields.
170     */
171    protected List<Datatype> flatTypeList;
172
173    /**
174     * Constructs a CompoundDS object with the given file, dataset name and path.
175     *
176     * The dataset object represents an existing dataset in the file. For
177     * example, new CompoundDS(file, "dset1", "/g0/") constructs a dataset
178     * object that corresponds to the dataset, "dset1", at group "/g0/".
179     *
180     * This object is usually constructed at FileFormat.open(), which loads the
181     * file structure and object information into memory. It is rarely used
182     * elsewhere.
183     *
184     * @param theFile
185     *            the file that contains the data object.
186     * @param theName
187     *            the name of the data object, e.g. "dset".
188     * @param thePath
189     *            the full path of the data object, e.g. "/arrays/".
190     */
191    public CompoundDS(FileFormat theFile, String theName, String thePath)
192    {
193        this(theFile, theName, thePath, null);
194    }
195
196    /**
197     * @deprecated Not for public use in the future.<br>
198     *             Using {@link #CompoundDS(FileFormat, String, String)}
199     *
200     * @param theFile
201     *            the file that contains the data object.
202     * @param dsName
203     *            the name of the data object, e.g. "dset".
204     * @param dsPath
205     *            the full path of the data object, e.g. "/arrays/".
206     * @param oid
207     *            the oid of the data object.
208     */
209    @Deprecated
210    public CompoundDS(FileFormat theFile, String dsName, String dsPath, long[] oid)
211    {
212        super(theFile, dsName, dsPath, oid);
213
214        numberOfMembers  = 0;
215        memberNames      = null;
216        isMemberSelected = null;
217        memberTypes      = null;
218    }
219
220    /**
221     * Resets selection of dataspace
222     */
223    @Override
224    protected void resetSelection()
225    {
226        super.resetSelection();
227        setAllMemberSelection(true);
228    }
229
230    /**
231     * Returns the number of members of the compound dataset.
232     *
233     * @return the number of members of the compound dataset.
234     */
235    @Override
236    public final int getMemberCount()
237    {
238        return numberOfMembers;
239    }
240
241    /**
242     * Returns the number of selected members of the compound dataset.
243     *
244     * Selected members are the compound fields which are selected for
245     * read/write.
246     *
247     * For example, in a compound datatype of {int A, float B, char[] C},
248     * users can choose to retrieve only {A, C} from the dataset. In this
249     * case, getSelectedMemberCount() returns two.
250     *
251     * @return the number of selected members.
252     */
253    @Override
254    public final int getSelectedMemberCount()
255    {
256        int count = 0;
257
258        if (isMemberSelected != null) {
259            for (int i = 0; i < isMemberSelected.length; i++) {
260                if (isMemberSelected[i])
261                    count++;
262            }
263        }
264        log.trace("count of selected members={}", count);
265
266        return count;
267    }
268
269    /**
270     * Returns the names of the members of the compound dataset. The names of
271     * compound members are stored in an array of Strings.
272     *
273     * For example, for a compound datatype of {int A, float B, char[] C}
274     * getMemberNames() returns ["A", "B", "C"}.
275     *
276     * @return the names of compound members.
277     */
278    @Override
279    public final String[] getMemberNames()
280    {
281        return memberNames;
282    }
283
284    /**
285     * Returns an array of the names of the selected members of the compound dataset.
286     *
287     * @return an array of the names of the selected members of the compound dataset.
288     */
289    @Override
290    public final String[] getSelectedMemberNames()
291    {
292        if (isMemberSelected == null) {
293            log.debug("getSelectedMemberNames(): isMemberSelected array is null");
294            return memberNames;
295        }
296
297        int idx        = 0;
298        String[] names = new String[getSelectedMemberCount()];
299        for (int i = 0; i < isMemberSelected.length; i++) {
300            if (isMemberSelected[i])
301                names[idx++] = memberNames[i];
302        }
303
304        return names;
305    }
306
307    /**
308     * Checks if a member of the compound dataset is selected for read/write.
309     *
310     * @param idx
311     *            the index of compound member.
312     *
313     * @return true if the i-th memeber is selected; otherwise returns false.
314     */
315    @Override
316    public final boolean isMemberSelected(int idx)
317    {
318        if ((isMemberSelected != null) && (isMemberSelected.length > idx))
319            return isMemberSelected[idx];
320        else
321            return false;
322    }
323
324    /**
325     * Selects the i-th member for read/write.
326     *
327     * @param idx
328     *            the index of compound member.
329     */
330    @Override
331    public final void selectMember(int idx)
332    {
333        if ((isMemberSelected != null) && (isMemberSelected.length > idx))
334            isMemberSelected[idx] = true;
335    }
336
337    /**
338     * Selects/deselects all members.
339     *
340     * @param selectAll
341     *            The indicator to select or deselect all members. If true, all
342     *            members are selected for read/write. If false, no member is
343     *            selected for read/write.
344     */
345    @Override
346    public final void setAllMemberSelection(boolean selectAll)
347    {
348        if (isMemberSelected == null)
349            return;
350
351        for (int i = 0; i < isMemberSelected.length; i++)
352            isMemberSelected[i] = selectAll;
353    }
354
355    /**
356     * Returns array containing the total number of elements of the members of
357     * the compound dataset.
358     *
359     * For example, a compound dataset COMP has members of A, B and C as
360     *
361     * <pre>
362     *     COMP {
363     *         int A;
364     *         float B[5];
365     *         double C[2][3];
366     *     }
367     * </pre>
368     *
369     * getMemberOrders() will return an integer array of {1, 5, 6} to indicate
370     * that member A has one element, member B has 5 elements, and member C has
371     * 6 elements.
372     *
373     * @return the array containing the total number of elements of the members
374     *         of compound.
375     */
376    @Override
377    public final int[] getMemberOrders()
378    {
379        return memberOrders;
380    }
381
382    /**
383     * Returns array containing the total number of elements of the selected
384     * members of the compound dataset.
385     *
386     * For example, a compound dataset COMP has members of A, B and C as
387     *
388     * <pre>
389     *     COMP {
390     *         int A;
391     *         float B[5];
392     *         double C[2][3];
393     *     }
394     * </pre>
395     *
396     * If A and B are selected, getSelectedMemberOrders() returns an array of
397     * {1, 5}
398     *
399     * @return array containing the total number of elements of the selected
400     *         members of compound.
401     */
402    @Override
403    public final int[] getSelectedMemberOrders()
404    {
405        if (isMemberSelected == null) {
406            log.debug("getSelectedMemberOrders(): isMemberSelected array is null");
407            return memberOrders;
408        }
409
410        int idx      = 0;
411        int[] orders = new int[getSelectedMemberCount()];
412        for (int i = 0; i < isMemberSelected.length; i++) {
413            if (isMemberSelected[i])
414                orders[idx++] = memberOrders[i];
415        }
416
417        return orders;
418    }
419
420    /**
421     * Returns the dimension sizes of the i-th member.
422     *
423     * For example, a compound dataset COMP has members of A, B and C as
424     *
425     * <pre>
426     *     COMP {
427     *         int A;
428     *         float B[5];
429     *         double C[2][3];
430     *     }
431     * </pre>
432     *
433     * getMemberDims(2) returns an array of {2, 3}, while getMemberDims(1)
434     * returns an array of {5}, and getMemberDims(0) returns null.
435     *
436     * @param i  the i-th member
437     *
438     * @return the dimension sizes of the i-th member, null if the compound
439     *         member is not an array.
440     */
441    @Override
442    public final int[] getMemberDims(int i)
443    {
444        if (memberDims == null) {
445            return null;
446        }
447        return (int[])memberDims[i];
448    }
449
450    /**
451     * Returns an array of datatype objects of compound members.
452     *
453     * Each member of a compound dataset has its own datatype. The datatype of a
454     * member can be atomic or other compound datatype (nested compound).
455     * Sub-classes set up the datatype objects at init().
456     *
457     * @return the array of datatype objects of the compound members.
458     */
459    @Override
460    public final Datatype[] getMemberTypes()
461    {
462        return memberTypes;
463    }
464
465    /**
466     * Returns an array of datatype objects of selected compound members.
467     *
468     * @return an array of datatype objects of selected compound members.
469     */
470    @Override
471    public final Datatype[] getSelectedMemberTypes()
472    {
473        if (isMemberSelected == null) {
474            log.debug("getSelectedMemberTypes(): isMemberSelected array is null");
475            return memberTypes;
476        }
477
478        int idx          = 0;
479        Datatype[] types = new Datatype[getSelectedMemberCount()];
480        for (int i = 0; i < isMemberSelected.length; i++) {
481            if (isMemberSelected[i])
482                types[idx++] = memberTypes[i];
483        }
484
485        return types;
486    }
487
488    /**
489     * Returns the fill values for the data object.
490     *
491     * @return the fill values for the data object.
492     */
493    @Override
494    public Object getFillValue()
495    {
496        return null;
497    }
498
499    /**
500     * @deprecated Not implemented for compound dataset.
501     */
502    @Deprecated
503    @Override
504    public Dataset copy(Group pgroup, String name, long[] dims, Object data) throws Exception
505    {
506        throw new UnsupportedOperationException(
507            "Writing a subset of a compound dataset to a new dataset is not implemented.");
508    }
509
510    /**
511     * Routine to convert datatypes that are read in as byte arrays to
512     * regular types.
513     *
514     * @param dtype
515     *        the datatype to convert to
516     * @param byteData
517     *        the bytes to convert
518     *
519     * @return the converted object
520     */
521    protected Object convertByteMember(final Datatype dtype, byte[] byteData)
522    {
523        Object theObj = null;
524        log.trace("convertByteMember(): byteData={} start", byteData);
525
526        if (dtype.getDatatypeSize() == 1) {
527            /*
528             * Normal byte[] type, such as an integer datatype of size 1.
529             */
530            theObj = byteData;
531        }
532        else if (dtype.isString() && !dtype.isVarStr() && convertByteToString &&
533                 (byteData instanceof byte[])) {
534            log.trace("convertByteMember(): converting byte array to string array");
535
536            theObj = byteToString(byteData, (int)dtype.getDatatypeSize());
537        }
538        else if (dtype.isInteger()) {
539            log.trace("convertByteMember(): converting byte array to integer array");
540
541            switch ((int)dtype.getDatatypeSize()) {
542            case 1:
543                /*
544                 * Normal byte[] type, such as an integer datatype of size 1.
545                 */
546                theObj = byteData;
547                break;
548            case 2:
549                theObj = HDFNativeData.byteToShort(byteData);
550                break;
551            case 4:
552                theObj = HDFNativeData.byteToInt(byteData);
553                break;
554            case 8:
555                theObj = HDFNativeData.byteToLong(byteData);
556                break;
557            default:
558                log.debug("convertByteMember(): invalid datatype size");
559                theObj = new String("*ERROR*");
560                break;
561            }
562        }
563        else if (dtype.isFloat()) {
564            log.trace("convertByteMember(): converting byte array to float array");
565
566            if (dtype.getDatatypeSize() == 8)
567                theObj = HDFNativeData.byteToDouble(byteData);
568            else
569                theObj = HDFNativeData.byteToFloat(byteData);
570        }
571        else if (dtype.isArray()) {
572            Datatype baseType = dtype.getDatatypeBase();
573            log.trace("convertByteMember(): converting byte array to baseType array");
574
575            /*
576             * Retrieve the real base datatype in the case of ARRAY of ARRAY datatypes.
577             */
578            while (baseType.isArray())
579                baseType = baseType.getDatatypeBase();
580
581            /*
582             * Optimize for the common cases of Arrays.
583             */
584            switch (baseType.getDatatypeClass()) {
585            case Datatype.CLASS_INTEGER:
586            case Datatype.CLASS_FLOAT:
587            case Datatype.CLASS_CHAR:
588            case Datatype.CLASS_STRING:
589            case Datatype.CLASS_BITFIELD:
590            case Datatype.CLASS_OPAQUE:
591            case Datatype.CLASS_COMPOUND:
592            case Datatype.CLASS_REFERENCE:
593            case Datatype.CLASS_ENUM:
594            case Datatype.CLASS_VLEN:
595            case Datatype.CLASS_TIME:
596                theObj = convertByteMember(baseType, byteData);
597                break;
598
599            case Datatype.CLASS_ARRAY: {
600                Datatype arrayType = dtype.getDatatypeBase();
601
602                long[] arrayDims = dtype.getArrayDims();
603                int arrSize      = 1;
604                for (int i = 0; i < arrayDims.length; i++)
605                    arrSize *= arrayDims[i];
606                log.trace("convertByteMember(): no CLASS_ARRAY arrayType={} arrSize={}", arrayType, arrSize);
607
608                theObj = new Object[arrSize];
609
610                for (int i = 0; i < arrSize; i++) {
611                    byte[] indexedBytes = Arrays.copyOfRange(byteData, (int)(i * arrayType.getDatatypeSize()),
612                                                             (int)((i + 1) * arrayType.getDatatypeSize()));
613                    ((Object[])theObj)[i] = convertByteMember(arrayType, indexedBytes);
614                }
615
616                break;
617            }
618
619            case Datatype.CLASS_NO_CLASS:
620            default:
621                log.debug("convertByteMember(): invalid datatype class");
622                theObj = new String("*ERROR*");
623            }
624        }
625        else if (dtype.isCompound()) {
626            log.debug("convertByteMember(): compound datatype class");
627            /*
628             * TODO: still valid after reading change?
629             */
630            theObj = convertCompoundByteMembers(dtype, byteData);
631        }
632        else {
633            log.debug("convertByteMember(): byteData={}", byteData);
634            theObj = byteData;
635        }
636
637        return theObj;
638    }
639
640    /**
641     * Given an array of bytes representing a compound Datatype, converts each of
642     * its members into Objects and returns the results.
643     *
644     * @param dtype
645     *            The compound datatype to convert
646     * @param data
647     *            The byte array representing the data of the compound Datatype
648     * @return The converted types of the bytes
649     */
650    protected Object convertCompoundByteMembers(final Datatype dtype, byte[] data)
651    {
652        List<Object> theData = null;
653
654        List<Datatype> allSelectedTypes = Arrays.asList(this.getSelectedMemberTypes());
655        List<Datatype> localTypes       = new ArrayList<>(dtype.getCompoundMemberTypes());
656        Iterator<Datatype> localIt      = localTypes.iterator();
657        while (localIt.hasNext()) {
658            Datatype curType = localIt.next();
659
660            if (curType.isCompound())
661                continue;
662
663            if (!allSelectedTypes.contains(curType))
664                localIt.remove();
665        }
666
667        theData = new ArrayList<>(localTypes.size());
668        for (int i = 0, index = 0; i < localTypes.size(); i++) {
669            Datatype curType = localTypes.get(i);
670
671            if (curType.isCompound())
672                theData.add(convertCompoundByteMembers(
673                    curType, Arrays.copyOfRange(data, index, index + (int)curType.getDatatypeSize())));
674            else
675                theData.add(convertByteMember(
676                    curType, Arrays.copyOfRange(data, index, index + (int)curType.getDatatypeSize())));
677
678            index += curType.getDatatypeSize();
679        }
680
681        return theData;
682    }
683}