001/***************************************************************************** 002 * Copyright by The HDF Group. * 003 * Copyright by the Board of Trustees of the University of Illinois. * 004 * All rights reserved. * 005 * * 006 * This file is part of the HDF Java Products distribution. * 007 * The full copyright notice, including terms governing use, modification, * 008 * and redistribution, is contained in the files COPYING and Copyright.html. * 009 * COPYING can be found at the root of the source code distribution tree. * 010 * Or, see https://support.hdfgroup.org/products/licenses.html * 011 * If you do not have access to either file, you may request a copy from * 012 * help@hdfgroup.org. * 013 ****************************************************************************/ 014 015package hdf.object; 016 017import java.lang.reflect.Array; 018import java.util.List; 019 020/** 021 * The abstract class provides general APIs to create and manipulate dataset 022 * objects, and retrieve dataset properties, datatype and dimension sizes. 023 * <p> 024 * This class provides two convenient functions, read()/write(), to read/write 025 * data values. Reading/writing data may take many library calls if we use the 026 * library APIs directly. The read() and write functions hide all the details of 027 * these calls from users. 028 * <p> 029 * For more details on dataset, 030 * see <b> <a href="https://support.hdfgroup.org/HDF5/doc/UG/HDF5_Users_Guide-Responsive%20HTML5/index.html">HDF5 User's Guide</a> </b> 031 * <p> 032 * 033 * @see hdf.object.ScalarDS 034 * @see hdf.object.CompoundDS 035 * 036 * @version 1.1 9/4/2007 037 * @author Peter X. Cao 038 */ 039public abstract class Dataset extends HObject implements MetaDataContainer, DataFormat { 040 private static final long serialVersionUID = -3360885430038261178L; 041 042 private static final org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(Dataset.class); 043 044 /** 045 * The memory buffer that holds the raw data array of the dataset. 046 */ 047 protected transient Object data; 048 049 /** 050 * The number of dimensions of the dataset. 051 */ 052 protected int rank; 053 054 /** 055 * The current dimension sizes of the dataset 056 */ 057 protected long[] dims; 058 059 /** 060 * The max dimension sizes of the dataset 061 */ 062 protected long[] maxDims; 063 064 /** 065 * Array that contains the number of data points selected (for read/write) 066 * in each dimension. 067 * <p> 068 * The selected size must be less than or equal to the current dimension size. 069 * A subset of a rectangle selection is defined by the starting position and 070 * selected sizes. 071 * <p> 072 * For example, if a 4 X 5 dataset is as follows: 073 * 074 * <pre> 075 * 0, 1, 2, 3, 4 076 * 10, 11, 12, 13, 14 077 * 20, 21, 22, 23, 24 078 * 30, 31, 32, 33, 34 079 * long[] dims = {4, 5}; 080 * long[] startDims = {1, 2}; 081 * long[] selectedDims = {3, 3}; 082 * then the following subset is selected by the startDims and selectedDims above: 083 * 12, 13, 14 084 * 22, 23, 24 085 * 32, 33, 34 086 * </pre> 087 */ 088 protected long[] selectedDims; 089 090 /** 091 * The starting position of each dimension of a selected subset. With both 092 * the starting position and selected sizes, the subset of a rectangle 093 * selection is fully defined. 094 */ 095 protected long[] startDims; 096 097 /** 098 * Array that contains the indices of the dimensions selected for display. 099 * <p> 100 * <B>selectedIndex[] is provided for two purposes:</B> 101 * <OL> 102 * <LI> 103 * selectedIndex[] is used to indicate the order of dimensions for display, 104 * i.e. selectedIndex[0] = row, selectedIndex[1] = column and 105 * selectedIndex[2] = depth. For example, for a four dimension dataset, if 106 * selectedIndex[] is {1, 2, 3}, then dim[1] is selected as row index, 107 * dim[2] is selected as column index and dim[3] is selected as depth index. 108 * <LI> 109 * selectedIndex[] is also used to select dimensions for display for 110 * datasets with three or more dimensions. We assume that applications such 111 * as HDFView can only display data up to three dimensions (a 2D 112 * spreadsheet/image with a third dimension that the 2D spreadsheet/image is 113 * cut from). For datasets with more than three dimensions, we need 114 * selectedIndex[] to store which three dimensions are chosen for display. 115 * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3}, 116 * then dim[1] is selected as row index, dim[2] is selected as column index 117 * and dim[3] is selected as depth index. dim[0] is not selected. Its 118 * location is fixed at 0 by default. 119 * </OL> 120 */ 121 protected final int[] selectedIndex; 122 123 /** 124 * The number of elements to move from the start location in each dimension. 125 * For example, if selectedStride[0] = 2, every other data point is selected 126 * along dim[0]. 127 */ 128 protected long[] selectedStride; 129 130 /** 131 * The array of dimension sizes for a chunk. 132 */ 133 protected long[] chunkSize; 134 135 /** The compression information. */ 136 protected StringBuilder compression; 137 public static final String COMPRESSION_GZIP_TXT = "GZIP: level = "; 138 139 /** The filters information. */ 140 protected StringBuilder filters; 141 142 /** The storage layout information. */ 143 protected StringBuilder storageLayout; 144 145 /** The storage information. */ 146 protected StringBuilder storage; 147 148 /** The datatype object of the dataset. */ 149 protected Datatype datatype; 150 151 /** 152 * Array of strings that represent the dimension names. It is null if dimension names do not exist. 153 */ 154 protected String[] dimNames; 155 156 /** Flag to indicate if the byte[] array is converted to strings */ 157 protected boolean convertByteToString = true; 158 159 /** Flag to indicate if data values are loaded into memory. */ 160 protected boolean isDataLoaded = false; 161 162 /** Flag to indicate if this dataset has been initialized */ 163 protected boolean inited = false; 164 165 /** The number of data points in the memory buffer. */ 166 protected long nPoints = 1; 167 168 /** 169 * The data buffer that contains the raw data directly reading from file 170 * (before any data conversion). 171 */ 172 protected transient Object originalBuf = null; 173 174 /** 175 * The array that holds the converted data of unsigned C-type integers. 176 * <p> 177 * For example, Suppose that the original data is an array of unsigned 178 * 16-bit short integers. Since Java does not support unsigned integer, the 179 * data is converted to an array of 32-bit singed integer. In that case, the 180 * converted buffer is the array of 32-bit singed integer. 181 */ 182 protected transient Object convertedBuf = null; 183 184 /** 185 * Constructs a Dataset object with a given file, name and path. 186 * 187 * @param theFile 188 * the file that contains the dataset. 189 * @param dsName 190 * the name of the Dataset, e.g. "dset1". 191 * @param dsPath 192 * the full group path of this Dataset, e.g. "/arrays/". 193 */ 194 public Dataset(FileFormat theFile, String dsName, String dsPath) { 195 this(theFile, dsName, dsPath, null); 196 } 197 198 /** 199 * @deprecated Not for public use in the future. <br> 200 * Using {@link #Dataset(FileFormat, String, String)} 201 * 202 * @param theFile 203 * the file that contains the dataset. 204 * @param dsName 205 * the name of the Dataset, e.g. "dset1". 206 * @param dsPath 207 * the full group path of this Dataset, e.g. "/arrays/". 208 * @param oid 209 * the oid of this Dataset. 210 */ 211 @Deprecated 212 public Dataset(FileFormat theFile, String dsName, String dsPath, long[] oid) { 213 super(theFile, dsName, dsPath, oid); 214 log.trace("Dataset: start {}", dsName); 215 216 datatype = null; 217 rank = -1; 218 data = null; 219 dims = null; 220 maxDims = null; 221 selectedDims = null; 222 startDims = null; 223 selectedStride = null; 224 chunkSize = null; 225 compression = new StringBuilder("NONE"); 226 filters = new StringBuilder("NONE"); 227 storageLayout = new StringBuilder("NONE"); 228 storage = new StringBuilder("NONE"); 229 dimNames = null; 230 231 selectedIndex = new int[3]; 232 selectedIndex[0] = 0; 233 selectedIndex[1] = 1; 234 selectedIndex[2] = 2; 235 } 236 237 /** 238 * Clears memory held by the dataset, such as the data buffer. 239 */ 240 @SuppressWarnings("rawtypes") 241 public void clear() { 242 if (data != null) { 243 if (data instanceof List) { 244 ((List) data).clear(); 245 } 246 data = null; 247 originalBuf = null; 248 convertedBuf = null; 249 } 250 isDataLoaded = false; 251 } 252 253 /** 254 * Returns the rank (number of dimensions) of the dataset. 255 * 256 * @return the number of dimensions of the dataset. 257 */ 258 @Override 259 public final int getRank() { 260 if (!inited) 261 init(); 262 263 return rank; 264 } 265 266 /** 267 * Returns the array that contains the dimension sizes of the dataset. 268 * 269 * @return the dimension sizes of the dataset. 270 */ 271 @Override 272 public final long[] getDims() { 273 if (!inited) 274 init(); 275 276 return dims; 277 } 278 279 /** 280 * Returns the array that contains the max dimension sizes of the dataset. 281 * 282 * @return the max dimension sizes of the dataset. 283 */ 284 public final long[] getMaxDims() { 285 if (!inited) init(); 286 287 if (maxDims == null) return dims; 288 289 return maxDims; 290 } 291 292 /** 293 * Returns the dimension sizes of the selected subset. 294 * <p> 295 * The SelectedDims is the number of data points of the selected subset. 296 * Applications can use this array to change the size of selected subset. 297 * 298 * The selected size must be less than or equal to the current dimension size. 299 * Combined with the starting position, selected sizes and stride, the 300 * subset of a rectangle selection is fully defined. 301 * <p> 302 * For example, if a 4 X 5 dataset is as follows: 303 * 304 * <pre> 305 * 0, 1, 2, 3, 4 306 * 10, 11, 12, 13, 14 307 * 20, 21, 22, 23, 24 308 * 30, 31, 32, 33, 34 309 * long[] dims = {4, 5}; 310 * long[] startDims = {1, 2}; 311 * long[] selectedDims = {3, 3}; 312 * long[] selectedStride = {1, 1}; 313 * then the following subset is selected by the startDims and selectedDims 314 * 12, 13, 14 315 * 22, 23, 24 316 * 32, 33, 34 317 * </pre> 318 * 319 * @return the dimension sizes of the selected subset. 320 */ 321 @Override 322 public final long[] getSelectedDims() { 323 if (!inited) init(); 324 325 return selectedDims; 326 } 327 328 /** 329 * Returns the starting position of a selected subset. 330 * <p> 331 * Applications can use this array to change the starting position of a 332 * selection. Combined with the selected dimensions, selected sizes and 333 * stride, the subset of a rectangle selection is fully defined. 334 * <p> 335 * For example, if a 4 X 5 dataset is as follows: 336 * 337 * <pre> 338 * 0, 1, 2, 3, 4 339 * 10, 11, 12, 13, 14 340 * 20, 21, 22, 23, 24 341 * 30, 31, 32, 33, 34 342 * long[] dims = {4, 5}; 343 * long[] startDims = {1, 2}; 344 * long[] selectedDims = {3, 3}; 345 * long[] selectedStride = {1, 1}; 346 * then the following subset is selected by the startDims and selectedDims 347 * 12, 13, 14 348 * 22, 23, 24 349 * 32, 33, 34 350 * </pre> 351 * 352 * @return the starting position of a selected subset. 353 */ 354 @Override 355 public final long[] getStartDims() { 356 if (!inited) init(); 357 358 return startDims; 359 } 360 361 /** 362 * Returns the selectedStride of the selected dataset. 363 * <p> 364 * Applications can use this array to change how many elements to move in 365 * each dimension. 366 * 367 * Combined with the starting position and selected sizes, the subset of a 368 * rectangle selection is defined. 369 * <p> 370 * For example, if a 4 X 5 dataset is as follows: 371 * 372 * <pre> 373 * 0, 1, 2, 3, 4 374 * 10, 11, 12, 13, 14 375 * 20, 21, 22, 23, 24 376 * 30, 31, 32, 33, 34 377 * long[] dims = {4, 5}; 378 * long[] startDims = {0, 0}; 379 * long[] selectedDims = {2, 2}; 380 * long[] selectedStride = {2, 3}; 381 * then the following subset is selected by the startDims and selectedDims 382 * 0, 3 383 * 20, 23 384 * </pre> 385 * 386 * @return the selectedStride of the selected dataset. 387 */ 388 @Override 389 public final long[] getStride() { 390 if (!inited) init(); 391 392 if (rank <= 0) { 393 return null; 394 } 395 396 if (selectedStride == null) { 397 selectedStride = new long[rank]; 398 for (int i = 0; i < rank; i++) { 399 selectedStride[i] = 1; 400 } 401 } 402 403 return selectedStride; 404 } 405 406 /** 407 * Sets the flag that indicates if a byte array is converted to a string 408 * array. 409 * <p> 410 * In a string dataset, the raw data from file is stored in a byte array. By 411 * default, this byte array is converted to an array of strings. For a large 412 * dataset (e.g. more than one million strings), the conversion takes a long 413 * time and requires a lot of memory space to store the strings. In some 414 * applications, such a conversion can be delayed. For example, A GUI 415 * application may convert only the part of the strings that is visible to the 416 * users, not the entire data array. 417 * <p> 418 * setConvertByteToString(boolean b) allows users to set the flag so that 419 * applications can choose to perform the byte-to-string conversion or not. 420 * If the flag is set to false, the getData() returns an array of byte 421 * instead of an array of strings. 422 * 423 * @param b 424 * convert bytes to strings if b is true; otherwise, if false, do 425 * not convert bytes to strings. 426 */ 427 public final void setConvertByteToString(boolean b) { 428 convertByteToString = b; 429 } 430 431 /** 432 * Returns the flag that indicates if a byte array is converted to a string 433 * array. 434 * 435 * @return true if byte array is converted to string; otherwise, returns 436 * false if there is no conversion. 437 */ 438 public final boolean getConvertByteToString() { 439 return convertByteToString; 440 } 441 442 /** 443 * Reads the raw data of the dataset from file to a byte array. 444 * <p> 445 * readBytes() reads raw data to an array of bytes instead of array of its 446 * datatype. For example, for a one-dimension 32-bit integer dataset of 447 * size 5, readBytes() returns a byte array of size 20 instead of an 448 * int array of 5. 449 * <p> 450 * readBytes() can be used to copy data from one dataset to another 451 * efficiently because the raw data is not converted to its native type, it 452 * saves memory space and CPU time. 453 * 454 * @return the byte array of the raw data. 455 * 456 * @throws Exception if data can not be read 457 */ 458 public abstract byte[] readBytes() throws Exception; 459 460 /** 461 * Writes the memory buffer of this dataset to file. 462 * 463 * @throws Exception if buffer can not be written 464 */ 465 @Override 466 public final void write() throws Exception { 467 if (data != null) { 468 write(data); 469 } 470 } 471 472 /** 473 * Creates a new dataset and writes the data buffer to the new dataset. 474 * <p> 475 * This function allows applications to create a new dataset for a given 476 * data buffer. For example, users can select a specific interesting part 477 * from a large image and create a new image with the selection. 478 * <p> 479 * The new dataset retains the datatype and dataset creation properties of 480 * this dataset. 481 * 482 * @param pgroup 483 * the group which the dataset is copied to. 484 * @param name 485 * the name of the new dataset. 486 * @param dims 487 * the dimension sizes of the the new dataset. 488 * @param data 489 * the data values of the subset to be copied. 490 * 491 * @return the new dataset. 492 * 493 * @throws Exception if dataset can not be copied 494 */ 495 public abstract Dataset copy(Group pgroup, String name, long[] dims, Object data) throws Exception; 496 497 @Override 498 public final boolean isInited() { 499 return inited; 500 } 501 502 /** 503 * Returns the data buffer of the dataset in memory. 504 * <p> 505 * If data is already loaded into memory, returns the data; otherwise, calls 506 * read() to read data from file into a memory buffer and returns the memory 507 * buffer. 508 * <p> 509 * By default, the whole dataset is read into memory. Users can also select 510 * a subset to read. Subsetting is done in an implicit way. 511 * <p> 512 * <b>How to Select a Subset</b> 513 * <p> 514 * A selection is specified by three arrays: start, stride and count. 515 * <ol> 516 * <li>start: offset of a selection 517 * <li>stride: determines how many elements to move in each dimension 518 * <li>count: number of elements to select in each dimension 519 * </ol> 520 * getStartDims(), getStride() and getSelectedDims() returns the start, 521 * stride and count arrays respectively. Applications can make a selection 522 * by changing the values of the arrays. 523 * <p> 524 * The following example shows how to make a subset. In the example, the 525 * dataset is a 4-dimensional array of [200][100][50][10], i.e. dims[0]=200; 526 * dims[1]=100; dims[2]=50; dims[3]=10; <br> 527 * We want to select every other data point in dims[1] and dims[2] 528 * 529 * <pre> 530 * int rank = dataset.getRank(); // number of dimensions of the dataset 531 * long[] dims = dataset.getDims(); // the dimension sizes of the dataset 532 * long[] selected = dataset.getSelectedDims(); // the selected size of the dataet 533 * long[] start = dataset.getStartDims(); // the offset of the selection 534 * long[] stride = dataset.getStride(); // the stride of the dataset 535 * int[] selectedIndex = dataset.getSelectedIndex(); // the selected dimensions for display 536 * 537 * // select dim1 and dim2 as 2D data for display,and slice through dim0 538 * selectedIndex[0] = 1; 539 * selectedIndex[1] = 2; 540 * selectedIndex[1] = 0; 541 * 542 * // reset the selection arrays 543 * for (int i = 0; i < rank; i++) { 544 * start[i] = 0; 545 * selected[i] = 1; 546 * stride[i] = 1; 547 * } 548 * 549 * // set stride to 2 on dim1 and dim2 so that every other data point is 550 * // selected. 551 * stride[1] = 2; 552 * stride[2] = 2; 553 * 554 * // set the selection size of dim1 and dim2 555 * selected[1] = dims[1] / stride[1]; 556 * selected[2] = dims[1] / stride[2]; 557 * 558 * // when dataset.getData() is called, the selection above will be used since 559 * // the dimension arrays are passed by reference. Changes of these arrays 560 * // outside the dataset object directly change the values of these array 561 * // in the dataset object. 562 * </pre> 563 * <p> 564 * For ScalarDS, the memory data buffer is a one-dimensional array of byte, 565 * short, int, float, double or String type based on the datatype of the 566 * dataset. 567 * <p> 568 * For CompoundDS, the memory data object is an java.util.List object. Each 569 * element of the list is a data array that corresponds to a compound field. 570 * <p> 571 * For example, if compound dataset "comp" has the following nested 572 * structure, and member datatypes 573 * 574 * <pre> 575 * comp --> m01 (int) 576 * comp --> m02 (float) 577 * comp --> nest1 --> m11 (char) 578 * comp --> nest1 --> m12 (String) 579 * comp --> nest1 --> nest2 --> m21 (long) 580 * comp --> nest1 --> nest2 --> m22 (double) 581 * </pre> 582 * 583 * getData() returns a list of six arrays: {int[], float[], char[], 584 * String[], long[] and double[]}. 585 * 586 * @return the memory buffer of the dataset. 587 * 588 * @throws Exception if object can not be read 589 * @throws OutOfMemoryError if memory is exhausted 590 */ 591 @Override 592 public final Object getData() throws Exception, OutOfMemoryError { 593 if (!isDataLoaded) { 594 data = read(); // load the data 595 originalBuf = data; 596 isDataLoaded = true; 597 nPoints = 1; 598 log.trace("getData: selectedDims length={}",selectedDims.length); 599 for (int j = 0; j < selectedDims.length; j++) { 600 nPoints *= selectedDims[j]; 601 } 602 log.trace("getData: read {}", nPoints); 603 } 604 605 return data; 606 } 607 608 /** 609 * Not for public use in the future. 610 * <p> 611 * setData() is not safe to use because it changes memory buffer 612 * of the dataset object. Dataset operations such as write/read 613 * will fail if the buffer type or size is changed. 614 * 615 * @param d the object data -must be an array of Objects 616 */ 617 @Override 618 public final void setData(Object d) { 619 if (!(this instanceof Attribute)) 620 throw new UnsupportedOperationException("setData: unsupported for non-Attribute objects"); 621 622 data = d; 623 } 624 625 /** 626 * Clears the current data buffer in memory and forces the next read() to load 627 * the data from file. 628 * <p> 629 * The function read() loads data from file into memory only if the data is 630 * not read. If data is already in memory, read() just returns the memory 631 * buffer. Sometimes we want to force read() to re-read data from file. For 632 * example, when the selection is changed, we need to re-read the data. 633 * 634 * @see #getData() 635 * @see #read() 636 */ 637 @Override 638 public void clearData() { 639 isDataLoaded = false; 640 } 641 642 /** 643 * Returns the dimension size of the vertical axis. 644 * 645 * <p> 646 * This function is used by GUI applications such as HDFView. GUI 647 * applications display a dataset in a 2D table or 2D image. The display 648 * order is specified by the index array of selectedIndex as follow: 649 * <dl> 650 * <dt>selectedIndex[0] -- height</dt> 651 * <dd>The vertical axis</dd> 652 * <dt>selectedIndex[1] -- width</dt> 653 * <dd>The horizontal axis</dd> 654 * <dt>selectedIndex[2] -- depth</dt> 655 * <dd>The depth axis is used for 3 or more dimensional datasets.</dd> 656 * </dl> 657 * Applications can use getSelectedIndex() to access and change the display 658 * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the 659 * following code will set the height=200 and width=50. 660 * 661 * <pre> 662 * int[] selectedIndex = dataset.getSelectedIndex(); 663 * selectedIndex[0] = 0; 664 * selectedIndex[1] = 1; 665 * </pre> 666 * 667 * @see #getSelectedIndex() 668 * @see #getWidth() 669 * 670 * @return the size of dimension of the vertical axis. 671 */ 672 @Override 673 public final long getHeight() { 674 if (!inited) init(); 675 676 if ((selectedDims == null) || (selectedIndex == null)) { 677 return 0; 678 } 679 680 return selectedDims[selectedIndex[0]]; 681 } 682 683 /** 684 * Returns the dimension size of the horizontal axis. 685 * 686 * <p> 687 * This function is used by GUI applications such as HDFView. GUI 688 * applications display a dataset in 2D Table or 2D Image. The display order is 689 * specified by the index array of selectedIndex as follow: 690 * <dl> 691 * <dt>selectedIndex[0] -- height</dt> 692 * <dd>The vertical axis</dd> 693 * <dt>selectedIndex[1] -- width</dt> 694 * <dd>The horizontal axis</dd> 695 * <dt>selectedIndex[2] -- depth</dt> 696 * <dd>The depth axis, which is used for 3 or more dimension datasets.</dd> 697 * </dl> 698 * Applications can use getSelectedIndex() to access and change the display 699 * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the 700 * following code will set the height=200 and width=100. 701 * 702 * <pre> 703 * int[] selectedIndex = dataset.getSelectedIndex(); 704 * selectedIndex[0] = 0; 705 * selectedIndex[1] = 1; 706 * </pre> 707 * 708 * @see #getSelectedIndex() 709 * @see #getHeight() 710 * 711 * @return the size of dimension of the horizontal axis. 712 */ 713 @Override 714 public final long getWidth() { 715 if (!inited) init(); 716 717 if ((selectedDims == null) || (selectedIndex == null)) { 718 return 0; 719 } 720 721 if ((selectedDims.length < 2) || (selectedIndex.length < 2)) { 722 return 1; 723 } 724 725 return selectedDims[selectedIndex[1]]; 726 } 727 728 /** 729 * Returns the indices of display order. 730 * <p> 731 * 732 * selectedIndex[] is provided for two purposes: 733 * <OL> 734 * <LI> 735 * selectedIndex[] is used to indicate the order of dimensions for display. 736 * selectedIndex[0] is for the row, selectedIndex[1] is for the column and 737 * selectedIndex[2] for the depth. 738 * <p> 739 * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3}, 740 * then dim[1] is selected as row index, dim[2] is selected as column index 741 * and dim[3] is selected as depth index. 742 * <LI> 743 * selectedIndex[] is also used to select dimensions for display for 744 * datasets with three or more dimensions. We assume that applications such 745 * as HDFView can only display data values up to three dimensions (2D 746 * spreadsheet/image with a third dimension which the 2D spreadsheet/image 747 * is selected from). For datasets with more than three dimensions, we need 748 * selectedIndex[] to tell applications which three dimensions are chosen 749 * for display. <br> 750 * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3}, 751 * then dim[1] is selected as row index, dim[2] is selected as column index 752 * and dim[3] is selected as depth index. dim[0] is not selected. Its 753 * location is fixed at 0 by default. 754 * </OL> 755 * 756 * @return the array of the indices of display order. 757 */ 758 @Override 759 public final int[] getSelectedIndex() { 760 if (!inited) init(); 761 762 return selectedIndex; 763 } 764 765 /** 766 * Returns the string representation of compression information. 767 * <p> 768 * For example, 769 * "SZIP: Pixels per block = 8: H5Z_FILTER_CONFIG_DECODE_ENABLED". 770 * 771 * @return the string representation of compression information. 772 */ 773 @Override 774 public final String getCompression() { 775 if (!inited) init(); 776 777 return compression.toString(); 778 } 779 780 /** 781 * Returns the string representation of filter information. 782 * 783 * @return the string representation of filter information. 784 */ 785 public final String getFilters() { 786 if (!inited) init(); 787 788 return filters.toString(); 789 } 790 791 /** 792 * Returns the string representation of storage layout information. 793 * 794 * @return the string representation of storage layout information. 795 */ 796 public final String getStorageLayout() { 797 if (!inited) init(); 798 799 return storageLayout.toString(); 800 } 801 802 /** 803 * Returns the string representation of storage information. 804 * 805 * @return the string representation of storage information. 806 */ 807 public final String getStorage() { 808 if (!inited) init(); 809 810 return storage.toString(); 811 } 812 813 /** 814 * Returns the array that contains the dimension sizes of the chunk of the 815 * dataset. Returns null if the dataset is not chunked. 816 * 817 * @return the array of chunk sizes or returns null if the dataset is not 818 * chunked. 819 */ 820 public final long[] getChunkSize() { 821 if (!inited) init(); 822 823 return chunkSize; 824 } 825 826 @Override 827 public Datatype getDatatype() { 828 return datatype; 829 } 830 831 /** 832 * @deprecated Not for public use in the future. <br> 833 * Using {@link #convertFromUnsignedC(Object, Object)} 834 * 835 * @param dataIN the object data 836 * 837 * @return the converted object 838 */ 839 @Deprecated 840 public static Object convertFromUnsignedC(Object dataIN) { 841 return Dataset.convertFromUnsignedC(dataIN, null); 842 } 843 844 /** 845 * Converts one-dimension array of unsigned C-type integers to a new array 846 * of appropriate Java integer in memory. 847 * <p> 848 * Since Java does not support unsigned integer, values of unsigned C-type 849 * integers must be converted into its appropriate Java integer. Otherwise, 850 * the data value will not displayed correctly. For example, if an unsigned 851 * C byte, x = 200, is stored into an Java byte y, y will be -56 instead of 852 * the correct value of 200. 853 * <p> 854 * Unsigned C integers are upgrade to Java integers according to the 855 * following table: 856 * <table border=1> 857 * <caption><b>Mapping Unsigned C Integers to Java Integers</b></caption> 858 * <TR> 859 * <TD><B>Unsigned C Integer</B></TD> 860 * <TD><B>JAVA Intege</B>r</TD> 861 * </TR> 862 * <TR> 863 * <TD>unsigned byte</TD> 864 * <TD>signed short</TD> 865 * </TR> 866 * <TR> 867 * <TD>unsigned short</TD> 868 * <TD>signed int</TD> 869 * </TR> 870 * <TR> 871 * <TD>unsigned int</TD> 872 * <TD>signed long</TD> 873 * </TR> 874 * <TR> 875 * <TD>unsigned long</TD> 876 * <TD>signed long</TD> 877 * </TR> 878 * </TABLE> 879 * <strong>NOTE: this conversion cannot deal with unsigned 64-bit integers. 880 * Therefore, the values of unsigned 64-bit datasets may be wrong in Java 881 * applications</strong>. 882 * <p> 883 * If memory data of unsigned integers is converted by 884 * convertFromUnsignedC(), convertToUnsignedC() must be called to convert 885 * the data back to unsigned C before data is written into file. 886 * 887 * @see #convertToUnsignedC(Object, Object) 888 * 889 * @param dataIN 890 * the input 1D array of the unsigned C-type integers. 891 * @param dataOUT 892 * the output converted (or upgraded) 1D array of Java integers. 893 * 894 * @return the upgraded 1D array of Java integers. 895 */ 896 @SuppressWarnings("rawtypes") 897 public static Object convertFromUnsignedC(Object dataIN, Object dataOUT) { 898 if (dataIN == null) { 899 log.debug("convertFromUnsignedC(): data_in is null"); 900 return null; 901 } 902 903 Class dataClass = dataIN.getClass(); 904 if (!dataClass.isArray()) { 905 log.debug("convertFromUnsignedC(): data_in not an array"); 906 return null; 907 } 908 909 if (dataOUT != null) { 910 Class dataClassOut = dataOUT.getClass(); 911 if (!dataClassOut.isArray() || (Array.getLength(dataIN) != Array.getLength(dataOUT))) { 912 log.debug("convertFromUnsignedC(): data_out not an array or does not match data_in size"); 913 dataOUT = null; 914 } 915 } 916 917 String cname = dataClass.getName(); 918 char dname = cname.charAt(cname.lastIndexOf('[') + 1); 919 int size = Array.getLength(dataIN); 920 log.trace("convertFromUnsignedC(): cname={} dname={} size={}", cname, dname, size); 921 922 if (dname == 'B') { 923 log.debug("convertFromUnsignedC(): Java convert byte to short"); 924 short[] sdata = null; 925 if (dataOUT == null) { 926 sdata = new short[size]; 927 } 928 else { 929 sdata = (short[]) dataOUT; 930 } 931 932 byte[] bdata = (byte[]) dataIN; 933 for (int i = 0; i < size; i++) { 934 sdata[i] = (short) ((bdata[i] + 256) & 0xFF); 935 } 936 937 dataOUT = sdata; 938 } 939 else if (dname == 'S') { 940 log.debug("convertFromUnsignedC(): Java convert short to int"); 941 int[] idata = null; 942 if (dataOUT == null) { 943 idata = new int[size]; 944 } 945 else { 946 idata = (int[]) dataOUT; 947 } 948 949 short[] sdata = (short[]) dataIN; 950 for (int i = 0; i < size; i++) { 951 idata[i] = (sdata[i] + 65536) & 0xFFFF; 952 } 953 954 dataOUT = idata; 955 } 956 else if (dname == 'I') { 957 log.debug("convertFromUnsignedC(): Java convert int to long"); 958 long[] ldata = null; 959 if (dataOUT == null) { 960 ldata = new long[size]; 961 } 962 else { 963 ldata = (long[]) dataOUT; 964 } 965 966 int[] idata = (int[]) dataIN; 967 for (int i = 0; i < size; i++) { 968 ldata[i] = (idata[i] + 4294967296L) & 0xFFFFFFFFL; 969 } 970 971 dataOUT = ldata; 972 } 973 else { 974 dataOUT = dataIN; 975 log.debug("convertFromUnsignedC(): Java does not support unsigned long"); 976 } 977 978 return dataOUT; 979 } 980 981 /** 982 * @deprecated Not for public use in the future. <br> 983 * Using {@link #convertToUnsignedC(Object, Object)} 984 * 985 * @param dataIN 986 * the input 1D array of the unsigned C-type integers. 987 * 988 * @return the upgraded 1D array of Java integers. 989 */ 990 @Deprecated 991 public static Object convertToUnsignedC(Object dataIN) { 992 return Dataset.convertToUnsignedC(dataIN, null); 993 } 994 995 /** 996 * Converts the array of converted unsigned integers back to unsigned C-type 997 * integer data in memory. 998 * <p> 999 * If memory data of unsigned integers is converted by 1000 * convertFromUnsignedC(), convertToUnsignedC() must be called to convert 1001 * the data back to unsigned C before data is written into file. 1002 * 1003 * @see #convertFromUnsignedC(Object, Object) 1004 * 1005 * @param dataIN 1006 * the input array of the Java integer. 1007 * @param dataOUT 1008 * the output array of the unsigned C-type integer. 1009 * 1010 * @return the converted data of unsigned C-type integer array. 1011 */ 1012 @SuppressWarnings("rawtypes") 1013 public static Object convertToUnsignedC(Object dataIN, Object dataOUT) { 1014 if (dataIN == null) { 1015 log.debug("convertToUnsignedC(): data_in is null"); 1016 return null; 1017 } 1018 1019 Class dataClass = dataIN.getClass(); 1020 if (!dataClass.isArray()) { 1021 log.debug("convertToUnsignedC(): data_in not an array"); 1022 return null; 1023 } 1024 1025 if (dataOUT != null) { 1026 Class dataClassOut = dataOUT.getClass(); 1027 if (!dataClassOut.isArray() || (Array.getLength(dataIN) != Array.getLength(dataOUT))) { 1028 log.debug("convertToUnsignedC(): data_out not an array or does not match data_in size"); 1029 dataOUT = null; 1030 } 1031 } 1032 1033 String cname = dataClass.getName(); 1034 char dname = cname.charAt(cname.lastIndexOf('[') + 1); 1035 int size = Array.getLength(dataIN); 1036 log.trace("convertToUnsignedC(): cname={} dname={} size={}", cname, dname, size); 1037 1038 if (dname == 'S') { 1039 log.debug("convertToUnsignedC(): Java convert short to byte"); 1040 byte[] bdata = null; 1041 if (dataOUT == null) { 1042 bdata = new byte[size]; 1043 } 1044 else { 1045 bdata = (byte[]) dataOUT; 1046 } 1047 short[] sdata = (short[]) dataIN; 1048 for (int i = 0; i < size; i++) { 1049 bdata[i] = (byte) sdata[i]; 1050 } 1051 dataOUT = bdata; 1052 } 1053 else if (dname == 'I') { 1054 log.debug("convertToUnsignedC(): Java convert int to short"); 1055 short[] sdata = null; 1056 if (dataOUT == null) { 1057 sdata = new short[size]; 1058 } 1059 else { 1060 sdata = (short[]) dataOUT; 1061 } 1062 int[] idata = (int[]) dataIN; 1063 for (int i = 0; i < size; i++) { 1064 sdata[i] = (short) idata[i]; 1065 } 1066 dataOUT = sdata; 1067 } 1068 else if (dname == 'J') { 1069 log.debug("convertToUnsignedC(): Java convert long to int"); 1070 int[] idata = null; 1071 if (dataOUT == null) { 1072 idata = new int[size]; 1073 } 1074 else { 1075 idata = (int[]) dataOUT; 1076 } 1077 long[] ldata = (long[]) dataIN; 1078 for (int i = 0; i < size; i++) { 1079 idata[i] = (int) ldata[i]; 1080 } 1081 dataOUT = idata; 1082 } 1083 else { 1084 dataOUT = dataIN; 1085 log.debug("convertToUnsignedC(): Java does not support unsigned long"); 1086 } 1087 1088 return dataOUT; 1089 } 1090 1091 /** 1092 * Converts an array of bytes into an array of Strings for a fixed string 1093 * dataset. 1094 * <p> 1095 * A C-string is an array of chars while an Java String is an object. When a 1096 * string dataset is read into a Java application, the data is stored in an 1097 * array of Java bytes. byteToString() is used to convert the array of bytes 1098 * into an array of Java strings so that applications can display and modify 1099 * the data content. 1100 * <p> 1101 * For example, the content of a two element C string dataset is {"ABC", 1102 * "abc"}. Java applications will read the data into a byte array of {65, 1103 * 66, 67, 97, 98, 99). byteToString(bytes, 3) returns an array of Java 1104 * String of strs[0]="ABC", and strs[1]="abc". 1105 * <p> 1106 * If memory data of strings is converted to Java Strings, stringToByte() 1107 * must be called to convert the memory data back to byte array before data 1108 * is written to file. 1109 * 1110 * @see #stringToByte(String[], int) 1111 * 1112 * @param bytes 1113 * the array of bytes to convert. 1114 * @param length 1115 * the length of string. 1116 * 1117 * @return the array of Java String. 1118 */ 1119 public static final String[] byteToString(byte[] bytes, int length) { 1120 if (bytes == null) { 1121 log.debug("byteToString(): input is null"); 1122 return null; 1123 } 1124 1125 int n = bytes.length / length; 1126 log.trace("byteToString(): n={} from length of {}", n, length); 1127 String[] strArray = new String[n]; 1128 String str = null; 1129 int idx = 0; 1130 for (int i = 0; i < n; i++) { 1131 str = new String(bytes, i * length, length); 1132 idx = str.indexOf('\0'); 1133 if (idx >= 0) { 1134 str = str.substring(0, idx); 1135 } 1136 1137 // trim only the end 1138 int end = str.length(); 1139 while (end > 0 && str.charAt(end - 1) <= '\u0020') 1140 end--; 1141 1142 strArray[i] = (end <= 0) ? "" : str.substring(0, end); 1143 } 1144 1145 return strArray; 1146 } 1147 1148 /** 1149 * Converts a string array into an array of bytes for a fixed string 1150 * dataset. 1151 * <p> 1152 * If memory data of strings is converted to Java Strings, stringToByte() 1153 * must be called to convert the memory data back to byte array before data 1154 * is written to file. 1155 * 1156 * @see #byteToString(byte[] bytes, int length) 1157 * 1158 * @param strings 1159 * the array of string. 1160 * @param length 1161 * the length of string. 1162 * 1163 * @return the array of bytes. 1164 */ 1165 public static final byte[] stringToByte(String[] strings, int length) { 1166 if (strings == null) { 1167 log.debug("stringToByte(): input is null"); 1168 return null; 1169 } 1170 1171 int size = strings.length; 1172 byte[] bytes = new byte[size * length]; 1173 log.trace("stringToByte(): size={} length={}", size, length); 1174 StringBuilder strBuff = new StringBuilder(length); 1175 for (int i = 0; i < size; i++) { 1176 // initialize the string with spaces 1177 strBuff.replace(0, length, " "); 1178 1179 if (strings[i] != null) { 1180 if (strings[i].length() > length) { 1181 strings[i] = strings[i].substring(0, length); 1182 } 1183 strBuff.replace(0, length, strings[i]); 1184 } 1185 1186 strBuff.setLength(length); 1187 System.arraycopy(strBuff.toString().getBytes(), 0, bytes, length * i, length); 1188 } 1189 1190 return bytes; 1191 } 1192 1193 /** 1194 * Returns the array of strings that represent the dimension names. Returns 1195 * null if there is no dimension name. 1196 * <p> 1197 * Some datasets have pre-defined names for each dimension such as 1198 * "Latitude" and "Longitude". getDimNames() returns these pre-defined 1199 * names. 1200 * 1201 * @return the names of dimensions, or null if there is no dimension name. 1202 */ 1203 public final String[] getDimNames() { 1204 if (!inited) init(); 1205 1206 return dimNames; 1207 } 1208 1209 /** 1210 * Checks if a given datatype is a string. Sub-classes must replace this 1211 * default implementation. 1212 * 1213 * @param tid 1214 * The data type identifier. 1215 * 1216 * @return true if the datatype is a string; otherwise returns false. 1217 */ 1218 public boolean isString(long tid) { 1219 return false; 1220 } 1221 1222 /** 1223 * Returns the size in bytes of a given datatype. Sub-classes must replace 1224 * this default implementation. 1225 * 1226 * @param tid 1227 * The data type identifier. 1228 * 1229 * @return The size of the datatype 1230 */ 1231 public long getSize(long tid) { 1232 return -1; 1233 } 1234 1235 /** 1236 * Get Class of the original data buffer if converted. 1237 * 1238 * @return the Class of originalBuf 1239 */ 1240 @Override 1241 @SuppressWarnings("rawtypes") 1242 public final Class getOriginalClass() { 1243 return originalBuf.getClass(); 1244 } 1245 1246 /* 1247 * Checks if dataset is virtual. Sub-classes must replace 1248 * this default implementation. 1249 * 1250 * @return true if the dataset is virtual; otherwise returns false. 1251 */ 1252 public boolean isVirtual() { 1253 return false; 1254 } 1255 1256 /* 1257 * Gets the source file name at index if dataset is virtual. Sub-classes must replace 1258 * this default implementation. 1259 * 1260 * @return filename if the dataset is virtual; otherwise returns null. 1261 */ 1262 public String getVirtualFilename(int index) { 1263 return null; 1264 } 1265 1266 /* 1267 * Gets the number of source files if dataset is virtual. Sub-classes must replace 1268 * this default implementation. 1269 * 1270 * @return the list size if the dataset is virtual; otherwise returns negative. 1271 */ 1272 public int getVirtualMaps() { 1273 return -1; 1274 } 1275}