001/***************************************************************************** 002 * Copyright by The HDF Group. * 003 * Copyright by the Board of Trustees of the University of Illinois. * 004 * All rights reserved. * 005 * * 006 * This file is part of the HDF Java Products distribution. * 007 * The full copyright notice, including terms governing use, modification, * 008 * and redistribution, is contained in the files COPYING and Copyright.html. * 009 * COPYING can be found at the root of the source code distribution tree. * 010 * Or, see https://support.hdfgroup.org/products/licenses.html * 011 * If you do not have access to either file, you may request a copy from * 012 * help@hdfgroup.org. * 013 ****************************************************************************/ 014 015package hdf.object; 016 017import java.lang.reflect.Array; 018import java.util.List; 019 020/** 021 * The abstract class provides general APIs to create and manipulate dataset 022 * objects, and retrieve dataset properties, datatype and dimension sizes. 023 * <p> 024 * This class provides two convenient functions, read()/write(), to read/write 025 * data values. Reading/writing data may take many library calls if we use the 026 * library APIs directly. The read() and write functions hide all the details of 027 * these calls from users. 028 * <p> 029 * For more details on dataset, 030 * see <b> <a href="https://support.hdfgroup.org/HDF5/doc/UG/HDF5_Users_Guide-Responsive%20HTML5/index.html">HDF5 User's Guide</a> </b> 031 * <p> 032 * 033 * @see hdf.object.ScalarDS 034 * @see hdf.object.CompoundDS 035 * 036 * @version 1.1 9/4/2007 037 * @author Peter X. Cao 038 */ 039public abstract class Dataset extends HObject implements MetaDataContainer, DataFormat { 040 private static final long serialVersionUID = -3360885430038261178L; 041 042 private static final org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(Dataset.class); 043 044 /** 045 * The memory buffer that holds the raw data array of the dataset. 046 */ 047 protected transient Object data; 048 049 /** 050 * The number of dimensions of the dataset. 051 */ 052 protected int rank; 053 054 /** 055 * The current dimension sizes of the dataset 056 */ 057 protected long[] dims; 058 059 /** 060 * The max dimension sizes of the dataset 061 */ 062 protected long[] maxDims; 063 064 /** 065 * Array that contains the number of data points selected (for read/write) 066 * in each dimension. 067 * <p> 068 * The selected size must be less than or equal to the current dimension size. 069 * A subset of a rectangle selection is defined by the starting position and 070 * selected sizes. 071 * <p> 072 * For example, if a 4 X 5 dataset is as follows: 073 * 074 * <pre> 075 * 0, 1, 2, 3, 4 076 * 10, 11, 12, 13, 14 077 * 20, 21, 22, 23, 24 078 * 30, 31, 32, 33, 34 079 * long[] dims = {4, 5}; 080 * long[] startDims = {1, 2}; 081 * long[] selectedDims = {3, 3}; 082 * then the following subset is selected by the startDims and selectedDims above: 083 * 12, 13, 14 084 * 22, 23, 24 085 * 32, 33, 34 086 * </pre> 087 */ 088 protected long[] selectedDims; 089 090 /** 091 * The starting position of each dimension of a selected subset. With both 092 * the starting position and selected sizes, the subset of a rectangle 093 * selection is fully defined. 094 */ 095 protected long[] startDims; 096 097 /** 098 * Array that contains the indices of the dimensions selected for display. 099 * <p> 100 * <B>selectedIndex[] is provided for two purposes:</B> 101 * <OL> 102 * <LI> 103 * selectedIndex[] is used to indicate the order of dimensions for display, 104 * i.e. selectedIndex[0] = row, selectedIndex[1] = column and 105 * selectedIndex[2] = depth. For example, for a four dimension dataset, if 106 * selectedIndex[] is {1, 2, 3}, then dim[1] is selected as row index, 107 * dim[2] is selected as column index and dim[3] is selected as depth index. 108 * <LI> 109 * selectedIndex[] is also used to select dimensions for display for 110 * datasets with three or more dimensions. We assume that applications such 111 * as HDFView can only display data up to three dimensions (a 2D 112 * spreadsheet/image with a third dimension that the 2D spreadsheet/image is 113 * cut from). For datasets with more than three dimensions, we need 114 * selectedIndex[] to store which three dimensions are chosen for display. 115 * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3}, 116 * then dim[1] is selected as row index, dim[2] is selected as column index 117 * and dim[3] is selected as depth index. dim[0] is not selected. Its 118 * location is fixed at 0 by default. 119 * </OL> 120 */ 121 protected final int[] selectedIndex; 122 123 /** 124 * The number of elements to move from the start location in each dimension. 125 * For example, if selectedStride[0] = 2, every other data point is selected 126 * along dim[0]. 127 */ 128 protected long[] selectedStride; 129 130 /** 131 * The array of dimension sizes for a chunk. 132 */ 133 protected long[] chunkSize; 134 135 /** The compression information. */ 136 protected StringBuilder compression; 137 public static final String COMPRESSION_GZIP_TXT = "GZIP: level = "; 138 139 /** The filters information. */ 140 protected StringBuilder filters; 141 142 /** The storage layout information. */ 143 protected StringBuilder storageLayout; 144 145 /** The storage information. */ 146 protected StringBuilder storage; 147 148 /** The datatype object of the dataset. */ 149 protected Datatype datatype; 150 151 /** 152 * Array of strings that represent the dimension names. It is null if dimension names do not exist. 153 */ 154 protected String[] dimNames; 155 156 /** Flag to indicate if the byte[] array is converted to strings */ 157 protected boolean convertByteToString = true; 158 159 /** Flag to indicate if data values are loaded into memory. */ 160 protected boolean isDataLoaded = false; 161 162 /** Flag to indicate if this dataset has been initialized */ 163 protected boolean inited = false; 164 165 /** The number of data points in the memory buffer. */ 166 protected long nPoints = 1; 167 168 /** 169 * The data buffer that contains the raw data directly reading from file 170 * (before any data conversion). 171 */ 172 protected transient Object originalBuf = null; 173 174 /** 175 * The array that holds the converted data of unsigned C-type integers. 176 * <p> 177 * For example, Suppose that the original data is an array of unsigned 178 * 16-bit short integers. Since Java does not support unsigned integer, the 179 * data is converted to an array of 32-bit singed integer. In that case, the 180 * converted buffer is the array of 32-bit singed integer. 181 */ 182 protected transient Object convertedBuf = null; 183 184 /** 185 * Constructs a Dataset object with a given file, name and path. 186 * 187 * @param theFile 188 * the file that contains the dataset. 189 * @param dsName 190 * the name of the Dataset, e.g. "dset1". 191 * @param dsPath 192 * the full group path of this Dataset, e.g. "/arrays/". 193 */ 194 public Dataset(FileFormat theFile, String dsName, String dsPath) { 195 this(theFile, dsName, dsPath, null); 196 } 197 198 /** 199 * @deprecated Not for public use in the future. <br> 200 * Using {@link #Dataset(FileFormat, String, String)} 201 * 202 * @param theFile 203 * the file that contains the dataset. 204 * @param dsName 205 * the name of the Dataset, e.g. "dset1". 206 * @param dsPath 207 * the full group path of this Dataset, e.g. "/arrays/". 208 * @param oid 209 * the oid of this Dataset. 210 */ 211 @Deprecated 212 public Dataset(FileFormat theFile, String dsName, String dsPath, long[] oid) { 213 super(theFile, dsName, dsPath, oid); 214 log.trace("Dataset: start {}", dsName); 215 216 datatype = null; 217 rank = -1; 218 data = null; 219 dims = null; 220 maxDims = null; 221 selectedDims = null; 222 startDims = null; 223 selectedStride = null; 224 chunkSize = null; 225 compression = new StringBuilder("NONE"); 226 filters = new StringBuilder("NONE"); 227 storageLayout = new StringBuilder("NONE"); 228 storage = new StringBuilder("NONE"); 229 dimNames = null; 230 231 selectedIndex = new int[3]; 232 selectedIndex[0] = 0; 233 selectedIndex[1] = 1; 234 selectedIndex[2] = 2; 235 } 236 237 /** 238 * Clears memory held by the dataset, such as the data buffer. 239 */ 240 @SuppressWarnings("rawtypes") 241 public void clear() { 242 if (data != null) { 243 if (data instanceof List) { 244 ((List) data).clear(); 245 } 246 data = null; 247 originalBuf = null; 248 convertedBuf = null; 249 } 250 isDataLoaded = false; 251 } 252 253 /** 254 * Returns the rank (number of dimensions) of the dataset. 255 * 256 * @return the number of dimensions of the dataset. 257 */ 258 @Override 259 public final int getRank() { 260 if (!inited) 261 init(); 262 263 return rank; 264 } 265 266 /** 267 * Returns the array that contains the dimension sizes of the dataset. 268 * 269 * @return the dimension sizes of the dataset. 270 */ 271 @Override 272 public final long[] getDims() { 273 if (!inited) 274 init(); 275 276 return dims; 277 } 278 279 /** 280 * Returns the array that contains the max dimension sizes of the dataset. 281 * 282 * @return the max dimension sizes of the dataset. 283 */ 284 public final long[] getMaxDims() { 285 if (!inited) init(); 286 287 if (maxDims == null) return dims; 288 289 return maxDims; 290 } 291 292 /** 293 * Returns the dimension sizes of the selected subset. 294 * <p> 295 * The SelectedDims is the number of data points of the selected subset. 296 * Applications can use this array to change the size of selected subset. 297 * 298 * The selected size must be less than or equal to the current dimension size. 299 * Combined with the starting position, selected sizes and stride, the 300 * subset of a rectangle selection is fully defined. 301 * <p> 302 * For example, if a 4 X 5 dataset is as follows: 303 * 304 * <pre> 305 * 0, 1, 2, 3, 4 306 * 10, 11, 12, 13, 14 307 * 20, 21, 22, 23, 24 308 * 30, 31, 32, 33, 34 309 * long[] dims = {4, 5}; 310 * long[] startDims = {1, 2}; 311 * long[] selectedDims = {3, 3}; 312 * long[] selectedStride = {1, 1}; 313 * then the following subset is selected by the startDims and selectedDims 314 * 12, 13, 14 315 * 22, 23, 24 316 * 32, 33, 34 317 * </pre> 318 * 319 * @return the dimension sizes of the selected subset. 320 */ 321 @Override 322 public final long[] getSelectedDims() { 323 if (!inited) init(); 324 325 return selectedDims; 326 } 327 328 /** 329 * Returns the starting position of a selected subset. 330 * <p> 331 * Applications can use this array to change the starting position of a 332 * selection. Combined with the selected dimensions, selected sizes and 333 * stride, the subset of a rectangle selection is fully defined. 334 * <p> 335 * For example, if a 4 X 5 dataset is as follows: 336 * 337 * <pre> 338 * 0, 1, 2, 3, 4 339 * 10, 11, 12, 13, 14 340 * 20, 21, 22, 23, 24 341 * 30, 31, 32, 33, 34 342 * long[] dims = {4, 5}; 343 * long[] startDims = {1, 2}; 344 * long[] selectedDims = {3, 3}; 345 * long[] selectedStride = {1, 1}; 346 * then the following subset is selected by the startDims and selectedDims 347 * 12, 13, 14 348 * 22, 23, 24 349 * 32, 33, 34 350 * </pre> 351 * 352 * @return the starting position of a selected subset. 353 */ 354 @Override 355 public final long[] getStartDims() { 356 if (!inited) init(); 357 358 return startDims; 359 } 360 361 /** 362 * Returns the selectedStride of the selected dataset. 363 * <p> 364 * Applications can use this array to change how many elements to move in 365 * each dimension. 366 * 367 * Combined with the starting position and selected sizes, the subset of a 368 * rectangle selection is defined. 369 * <p> 370 * For example, if a 4 X 5 dataset is as follows: 371 * 372 * <pre> 373 * 0, 1, 2, 3, 4 374 * 10, 11, 12, 13, 14 375 * 20, 21, 22, 23, 24 376 * 30, 31, 32, 33, 34 377 * long[] dims = {4, 5}; 378 * long[] startDims = {0, 0}; 379 * long[] selectedDims = {2, 2}; 380 * long[] selectedStride = {2, 3}; 381 * then the following subset is selected by the startDims and selectedDims 382 * 0, 3 383 * 20, 23 384 * </pre> 385 * 386 * @return the selectedStride of the selected dataset. 387 */ 388 @Override 389 public final long[] getStride() { 390 if (!inited) init(); 391 392 if (rank <= 0) { 393 return null; 394 } 395 396 if (selectedStride == null) { 397 selectedStride = new long[rank]; 398 for (int i = 0; i < rank; i++) { 399 selectedStride[i] = 1; 400 } 401 } 402 403 return selectedStride; 404 } 405 406 /** 407 * Sets the flag that indicates if a byte array is converted to a string 408 * array. 409 * <p> 410 * In a string dataset, the raw data from file is stored in a byte array. By 411 * default, this byte array is converted to an array of strings. For a large 412 * dataset (e.g. more than one million strings), the conversion takes a long 413 * time and requires a lot of memory space to store the strings. In some 414 * applications, such a conversion can be delayed. For example, A GUI 415 * application may convert only the part of the strings that is visible to the 416 * users, not the entire data array. 417 * <p> 418 * setConvertByteToString(boolean b) allows users to set the flag so that 419 * applications can choose to perform the byte-to-string conversion or not. 420 * If the flag is set to false, the getData() returns an array of byte 421 * instead of an array of strings. 422 * 423 * @param b 424 * convert bytes to strings if b is true; otherwise, if false, do 425 * not convert bytes to strings. 426 */ 427 public final void setConvertByteToString(boolean b) { 428 convertByteToString = b; 429 } 430 431 /** 432 * Returns the flag that indicates if a byte array is converted to a string 433 * array. 434 * 435 * @return true if byte array is converted to string; otherwise, returns 436 * false if there is no conversion. 437 */ 438 public final boolean getConvertByteToString() { 439 return convertByteToString; 440 } 441 442 /** 443 * Reads the raw data of the dataset from file to a byte array. 444 * <p> 445 * readBytes() reads raw data to an array of bytes instead of array of its 446 * datatype. For example, for a one-dimension 32-bit integer dataset of 447 * size 5, readBytes() returns a byte array of size 20 instead of an 448 * int array of 5. 449 * <p> 450 * readBytes() can be used to copy data from one dataset to another 451 * efficiently because the raw data is not converted to its native type, it 452 * saves memory space and CPU time. 453 * 454 * @return the byte array of the raw data. 455 * 456 * @throws Exception if data can not be read 457 */ 458 public abstract byte[] readBytes() throws Exception; 459 460 /** 461 * Writes the memory buffer of this dataset to file. 462 * 463 * @throws Exception if buffer can not be written 464 */ 465 @Override 466 public final void write() throws Exception { 467 if (data != null) { 468 write(data); 469 } 470 } 471 472 /** 473 * Creates a new dataset and writes the data buffer to the new dataset. 474 * <p> 475 * This function allows applications to create a new dataset for a given 476 * data buffer. For example, users can select a specific interesting part 477 * from a large image and create a new image with the selection. 478 * <p> 479 * The new dataset retains the datatype and dataset creation properties of 480 * this dataset. 481 * 482 * @param pgroup 483 * the group which the dataset is copied to. 484 * @param name 485 * the name of the new dataset. 486 * @param dims 487 * the dimension sizes of the the new dataset. 488 * @param data 489 * the data values of the subset to be copied. 490 * 491 * @return the new dataset. 492 * 493 * @throws Exception if dataset can not be copied 494 */ 495 public abstract Dataset copy(Group pgroup, String name, long[] dims, Object data) throws Exception; 496 497 @Override 498 public final boolean isInited() { 499 return inited; 500 } 501 502 /** 503 * Returns the data buffer of the dataset in memory. 504 * <p> 505 * If data is already loaded into memory, returns the data; otherwise, calls 506 * read() to read data from file into a memory buffer and returns the memory 507 * buffer. 508 * <p> 509 * By default, the whole dataset is read into memory. Users can also select 510 * a subset to read. Subsetting is done in an implicit way. 511 * <p> 512 * <b>How to Select a Subset</b> 513 * <p> 514 * A selection is specified by three arrays: start, stride and count. 515 * <ol> 516 * <li>start: offset of a selection 517 * <li>stride: determines how many elements to move in each dimension 518 * <li>count: number of elements to select in each dimension 519 * </ol> 520 * getStartDims(), getStride() and getSelectedDims() returns the start, 521 * stride and count arrays respectively. Applications can make a selection 522 * by changing the values of the arrays. 523 * <p> 524 * The following example shows how to make a subset. In the example, the 525 * dataset is a 4-dimensional array of [200][100][50][10], i.e. dims[0]=200; 526 * dims[1]=100; dims[2]=50; dims[3]=10; <br> 527 * We want to select every other data point in dims[1] and dims[2] 528 * 529 * <pre> 530 * int rank = dataset.getRank(); // number of dimensions of the dataset 531 * long[] dims = dataset.getDims(); // the dimension sizes of the dataset 532 * long[] selected = dataset.getSelectedDims(); // the selected size of the dataet 533 * long[] start = dataset.getStartDims(); // the offset of the selection 534 * long[] stride = dataset.getStride(); // the stride of the dataset 535 * int[] selectedIndex = dataset.getSelectedIndex(); // the selected dimensions for display 536 * 537 * // select dim1 and dim2 as 2D data for display,and slice through dim0 538 * selectedIndex[0] = 1; 539 * selectedIndex[1] = 2; 540 * selectedIndex[1] = 0; 541 * 542 * // reset the selection arrays 543 * for (int i = 0; i < rank; i++) { 544 * start[i] = 0; 545 * selected[i] = 1; 546 * stride[i] = 1; 547 * } 548 * 549 * // set stride to 2 on dim1 and dim2 so that every other data point is 550 * // selected. 551 * stride[1] = 2; 552 * stride[2] = 2; 553 * 554 * // set the selection size of dim1 and dim2 555 * selected[1] = dims[1] / stride[1]; 556 * selected[2] = dims[1] / stride[2]; 557 * 558 * // when dataset.getData() is called, the selection above will be used since 559 * // the dimension arrays are passed by reference. Changes of these arrays 560 * // outside the dataset object directly change the values of these array 561 * // in the dataset object. 562 * </pre> 563 * <p> 564 * For ScalarDS, the memory data buffer is a one-dimensional array of byte, 565 * short, int, float, double or String type based on the datatype of the 566 * dataset. 567 * <p> 568 * For CompoundDS, the memory data object is an java.util.List object. Each 569 * element of the list is a data array that corresponds to a compound field. 570 * <p> 571 * For example, if compound dataset "comp" has the following nested 572 * structure, and member datatypes 573 * 574 * <pre> 575 * comp --> m01 (int) 576 * comp --> m02 (float) 577 * comp --> nest1 --> m11 (char) 578 * comp --> nest1 --> m12 (String) 579 * comp --> nest1 --> nest2 --> m21 (long) 580 * comp --> nest1 --> nest2 --> m22 (double) 581 * </pre> 582 * 583 * getData() returns a list of six arrays: {int[], float[], char[], 584 * String[], long[] and double[]}. 585 * 586 * @return the memory buffer of the dataset. 587 * 588 * @throws Exception if object can not be read 589 * @throws OutOfMemoryError if memory is exhausted 590 */ 591 @Override 592 public final Object getData() throws Exception, OutOfMemoryError { 593 if (!isDataLoaded) { 594 data = read(); // load the data 595 if (data != null) { 596 originalBuf = data; 597 isDataLoaded = true; 598 nPoints = 1; 599 log.trace("getData: selectedDims length={}",selectedDims.length); 600 for (int j = 0; j < selectedDims.length; j++) { 601 nPoints *= selectedDims[j]; 602 } 603 } 604 log.trace("getData: read {}", nPoints); 605 } 606 607 return data; 608 } 609 610 /** 611 * Not for public use in the future. 612 * <p> 613 * setData() is not safe to use because it changes memory buffer 614 * of the dataset object. Dataset operations such as write/read 615 * will fail if the buffer type or size is changed. 616 * 617 * @param d the object data -must be an array of Objects 618 */ 619 @Override 620 public final void setData(Object d) { 621 if (!(this instanceof Attribute)) 622 throw new UnsupportedOperationException("setData: unsupported for non-Attribute objects"); 623 624 data = d; 625 } 626 627 /** 628 * Clears the current data buffer in memory and forces the next read() to load 629 * the data from file. 630 * <p> 631 * The function read() loads data from file into memory only if the data is 632 * not read. If data is already in memory, read() just returns the memory 633 * buffer. Sometimes we want to force read() to re-read data from file. For 634 * example, when the selection is changed, we need to re-read the data. 635 * 636 * @see #getData() 637 * @see #read() 638 */ 639 @Override 640 public void clearData() { 641 isDataLoaded = false; 642 } 643 644 /** 645 * Returns the dimension size of the vertical axis. 646 * 647 * <p> 648 * This function is used by GUI applications such as HDFView. GUI 649 * applications display a dataset in a 2D table or 2D image. The display 650 * order is specified by the index array of selectedIndex as follow: 651 * <dl> 652 * <dt>selectedIndex[0] -- height</dt> 653 * <dd>The vertical axis</dd> 654 * <dt>selectedIndex[1] -- width</dt> 655 * <dd>The horizontal axis</dd> 656 * <dt>selectedIndex[2] -- depth</dt> 657 * <dd>The depth axis is used for 3 or more dimensional datasets.</dd> 658 * </dl> 659 * Applications can use getSelectedIndex() to access and change the display 660 * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the 661 * following code will set the height=200 and width=50. 662 * 663 * <pre> 664 * int[] selectedIndex = dataset.getSelectedIndex(); 665 * selectedIndex[0] = 0; 666 * selectedIndex[1] = 1; 667 * </pre> 668 * 669 * @see #getSelectedIndex() 670 * @see #getWidth() 671 * 672 * @return the size of dimension of the vertical axis. 673 */ 674 @Override 675 public final long getHeight() { 676 if (!inited) init(); 677 678 if ((selectedDims == null) || (selectedIndex == null)) { 679 return 0; 680 } 681 682 return selectedDims[selectedIndex[0]]; 683 } 684 685 /** 686 * Returns the dimension size of the horizontal axis. 687 * 688 * <p> 689 * This function is used by GUI applications such as HDFView. GUI 690 * applications display a dataset in 2D Table or 2D Image. The display order is 691 * specified by the index array of selectedIndex as follow: 692 * <dl> 693 * <dt>selectedIndex[0] -- height</dt> 694 * <dd>The vertical axis</dd> 695 * <dt>selectedIndex[1] -- width</dt> 696 * <dd>The horizontal axis</dd> 697 * <dt>selectedIndex[2] -- depth</dt> 698 * <dd>The depth axis, which is used for 3 or more dimension datasets.</dd> 699 * </dl> 700 * Applications can use getSelectedIndex() to access and change the display 701 * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the 702 * following code will set the height=200 and width=100. 703 * 704 * <pre> 705 * int[] selectedIndex = dataset.getSelectedIndex(); 706 * selectedIndex[0] = 0; 707 * selectedIndex[1] = 1; 708 * </pre> 709 * 710 * @see #getSelectedIndex() 711 * @see #getHeight() 712 * 713 * @return the size of dimension of the horizontal axis. 714 */ 715 @Override 716 public final long getWidth() { 717 if (!inited) init(); 718 719 if ((selectedDims == null) || (selectedIndex == null)) { 720 return 0; 721 } 722 723 if ((selectedDims.length < 2) || (selectedIndex.length < 2)) { 724 return 1; 725 } 726 727 return selectedDims[selectedIndex[1]]; 728 } 729 730 /** 731 * Returns the indices of display order. 732 * <p> 733 * 734 * selectedIndex[] is provided for two purposes: 735 * <OL> 736 * <LI> 737 * selectedIndex[] is used to indicate the order of dimensions for display. 738 * selectedIndex[0] is for the row, selectedIndex[1] is for the column and 739 * selectedIndex[2] for the depth. 740 * <p> 741 * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3}, 742 * then dim[1] is selected as row index, dim[2] is selected as column index 743 * and dim[3] is selected as depth index. 744 * <LI> 745 * selectedIndex[] is also used to select dimensions for display for 746 * datasets with three or more dimensions. We assume that applications such 747 * as HDFView can only display data values up to three dimensions (2D 748 * spreadsheet/image with a third dimension which the 2D spreadsheet/image 749 * is selected from). For datasets with more than three dimensions, we need 750 * selectedIndex[] to tell applications which three dimensions are chosen 751 * for display. <br> 752 * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3}, 753 * then dim[1] is selected as row index, dim[2] is selected as column index 754 * and dim[3] is selected as depth index. dim[0] is not selected. Its 755 * location is fixed at 0 by default. 756 * </OL> 757 * 758 * @return the array of the indices of display order. 759 */ 760 @Override 761 public final int[] getSelectedIndex() { 762 if (!inited) init(); 763 764 return selectedIndex; 765 } 766 767 /** 768 * Returns the string representation of compression information. 769 * <p> 770 * For example, 771 * "SZIP: Pixels per block = 8: H5Z_FILTER_CONFIG_DECODE_ENABLED". 772 * 773 * @return the string representation of compression information. 774 */ 775 @Override 776 public final String getCompression() { 777 if (!inited) init(); 778 779 return compression.toString(); 780 } 781 782 /** 783 * Returns the string representation of filter information. 784 * 785 * @return the string representation of filter information. 786 */ 787 public final String getFilters() { 788 if (!inited) init(); 789 790 return filters.toString(); 791 } 792 793 /** 794 * Returns the string representation of storage layout information. 795 * 796 * @return the string representation of storage layout information. 797 */ 798 public final String getStorageLayout() { 799 if (!inited) init(); 800 801 return storageLayout.toString(); 802 } 803 804 /** 805 * Returns the string representation of storage information. 806 * 807 * @return the string representation of storage information. 808 */ 809 public final String getStorage() { 810 if (!inited) init(); 811 812 return storage.toString(); 813 } 814 815 /** 816 * Returns the array that contains the dimension sizes of the chunk of the 817 * dataset. Returns null if the dataset is not chunked. 818 * 819 * @return the array of chunk sizes or returns null if the dataset is not 820 * chunked. 821 */ 822 public final long[] getChunkSize() { 823 if (!inited) init(); 824 825 return chunkSize; 826 } 827 828 @Override 829 public Datatype getDatatype() { 830 return datatype; 831 } 832 833 /** 834 * @deprecated Not for public use in the future. <br> 835 * Using {@link #convertFromUnsignedC(Object, Object)} 836 * 837 * @param dataIN the object data 838 * 839 * @return the converted object 840 */ 841 @Deprecated 842 public static Object convertFromUnsignedC(Object dataIN) { 843 return Dataset.convertFromUnsignedC(dataIN, null); 844 } 845 846 /** 847 * Converts one-dimension array of unsigned C-type integers to a new array 848 * of appropriate Java integer in memory. 849 * <p> 850 * Since Java does not support unsigned integer, values of unsigned C-type 851 * integers must be converted into its appropriate Java integer. Otherwise, 852 * the data value will not displayed correctly. For example, if an unsigned 853 * C byte, x = 200, is stored into an Java byte y, y will be -56 instead of 854 * the correct value of 200. 855 * <p> 856 * Unsigned C integers are upgrade to Java integers according to the 857 * following table: 858 * <table border=1> 859 * <caption><b>Mapping Unsigned C Integers to Java Integers</b></caption> 860 * <TR> 861 * <TD><B>Unsigned C Integer</B></TD> 862 * <TD><B>JAVA Intege</B>r</TD> 863 * </TR> 864 * <TR> 865 * <TD>unsigned byte</TD> 866 * <TD>signed short</TD> 867 * </TR> 868 * <TR> 869 * <TD>unsigned short</TD> 870 * <TD>signed int</TD> 871 * </TR> 872 * <TR> 873 * <TD>unsigned int</TD> 874 * <TD>signed long</TD> 875 * </TR> 876 * <TR> 877 * <TD>unsigned long</TD> 878 * <TD>signed long</TD> 879 * </TR> 880 * </TABLE> 881 * <strong>NOTE: this conversion cannot deal with unsigned 64-bit integers. 882 * Therefore, the values of unsigned 64-bit datasets may be wrong in Java 883 * applications</strong>. 884 * <p> 885 * If memory data of unsigned integers is converted by 886 * convertFromUnsignedC(), convertToUnsignedC() must be called to convert 887 * the data back to unsigned C before data is written into file. 888 * 889 * @see #convertToUnsignedC(Object, Object) 890 * 891 * @param dataIN 892 * the input 1D array of the unsigned C-type integers. 893 * @param dataOUT 894 * the output converted (or upgraded) 1D array of Java integers. 895 * 896 * @return the upgraded 1D array of Java integers. 897 */ 898 @SuppressWarnings("rawtypes") 899 public static Object convertFromUnsignedC(Object dataIN, Object dataOUT) { 900 if (dataIN == null) { 901 log.debug("convertFromUnsignedC(): data_in is null"); 902 return null; 903 } 904 905 Class dataClass = dataIN.getClass(); 906 if (!dataClass.isArray()) { 907 log.debug("convertFromUnsignedC(): data_in not an array"); 908 return null; 909 } 910 911 if (dataOUT != null) { 912 Class dataClassOut = dataOUT.getClass(); 913 if (!dataClassOut.isArray() || (Array.getLength(dataIN) != Array.getLength(dataOUT))) { 914 log.debug("convertFromUnsignedC(): data_out not an array or does not match data_in size"); 915 dataOUT = null; 916 } 917 } 918 919 String cname = dataClass.getName(); 920 char dname = cname.charAt(cname.lastIndexOf('[') + 1); 921 int size = Array.getLength(dataIN); 922 log.trace("convertFromUnsignedC(): cname={} dname={} size={}", cname, dname, size); 923 924 if (dname == 'B') { 925 log.debug("convertFromUnsignedC(): Java convert byte to short"); 926 short[] sdata = null; 927 if (dataOUT == null) { 928 sdata = new short[size]; 929 } 930 else { 931 sdata = (short[]) dataOUT; 932 } 933 934 byte[] bdata = (byte[]) dataIN; 935 for (int i = 0; i < size; i++) { 936 sdata[i] = (short) ((bdata[i] + 256) & 0xFF); 937 } 938 939 dataOUT = sdata; 940 } 941 else if (dname == 'S') { 942 log.debug("convertFromUnsignedC(): Java convert short to int"); 943 int[] idata = null; 944 if (dataOUT == null) { 945 idata = new int[size]; 946 } 947 else { 948 idata = (int[]) dataOUT; 949 } 950 951 short[] sdata = (short[]) dataIN; 952 for (int i = 0; i < size; i++) { 953 idata[i] = (sdata[i] + 65536) & 0xFFFF; 954 } 955 956 dataOUT = idata; 957 } 958 else if (dname == 'I') { 959 log.debug("convertFromUnsignedC(): Java convert int to long"); 960 long[] ldata = null; 961 if (dataOUT == null) { 962 ldata = new long[size]; 963 } 964 else { 965 ldata = (long[]) dataOUT; 966 } 967 968 int[] idata = (int[]) dataIN; 969 for (int i = 0; i < size; i++) { 970 ldata[i] = (idata[i] + 4294967296L) & 0xFFFFFFFFL; 971 } 972 973 dataOUT = ldata; 974 } 975 else { 976 dataOUT = dataIN; 977 log.debug("convertFromUnsignedC(): Java does not support unsigned long"); 978 } 979 980 return dataOUT; 981 } 982 983 /** 984 * @deprecated Not for public use in the future. <br> 985 * Using {@link #convertToUnsignedC(Object, Object)} 986 * 987 * @param dataIN 988 * the input 1D array of the unsigned C-type integers. 989 * 990 * @return the upgraded 1D array of Java integers. 991 */ 992 @Deprecated 993 public static Object convertToUnsignedC(Object dataIN) { 994 return Dataset.convertToUnsignedC(dataIN, null); 995 } 996 997 /** 998 * Converts the array of converted unsigned integers back to unsigned C-type 999 * integer data in memory. 1000 * <p> 1001 * If memory data of unsigned integers is converted by 1002 * convertFromUnsignedC(), convertToUnsignedC() must be called to convert 1003 * the data back to unsigned C before data is written into file. 1004 * 1005 * @see #convertFromUnsignedC(Object, Object) 1006 * 1007 * @param dataIN 1008 * the input array of the Java integer. 1009 * @param dataOUT 1010 * the output array of the unsigned C-type integer. 1011 * 1012 * @return the converted data of unsigned C-type integer array. 1013 */ 1014 @SuppressWarnings("rawtypes") 1015 public static Object convertToUnsignedC(Object dataIN, Object dataOUT) { 1016 if (dataIN == null) { 1017 log.debug("convertToUnsignedC(): data_in is null"); 1018 return null; 1019 } 1020 1021 Class dataClass = dataIN.getClass(); 1022 if (!dataClass.isArray()) { 1023 log.debug("convertToUnsignedC(): data_in not an array"); 1024 return null; 1025 } 1026 1027 if (dataOUT != null) { 1028 Class dataClassOut = dataOUT.getClass(); 1029 if (!dataClassOut.isArray() || (Array.getLength(dataIN) != Array.getLength(dataOUT))) { 1030 log.debug("convertToUnsignedC(): data_out not an array or does not match data_in size"); 1031 dataOUT = null; 1032 } 1033 } 1034 1035 String cname = dataClass.getName(); 1036 char dname = cname.charAt(cname.lastIndexOf('[') + 1); 1037 int size = Array.getLength(dataIN); 1038 log.trace("convertToUnsignedC(): cname={} dname={} size={}", cname, dname, size); 1039 1040 if (dname == 'S') { 1041 log.debug("convertToUnsignedC(): Java convert short to byte"); 1042 byte[] bdata = null; 1043 if (dataOUT == null) { 1044 bdata = new byte[size]; 1045 } 1046 else { 1047 bdata = (byte[]) dataOUT; 1048 } 1049 short[] sdata = (short[]) dataIN; 1050 for (int i = 0; i < size; i++) { 1051 bdata[i] = (byte) sdata[i]; 1052 } 1053 dataOUT = bdata; 1054 } 1055 else if (dname == 'I') { 1056 log.debug("convertToUnsignedC(): Java convert int to short"); 1057 short[] sdata = null; 1058 if (dataOUT == null) { 1059 sdata = new short[size]; 1060 } 1061 else { 1062 sdata = (short[]) dataOUT; 1063 } 1064 int[] idata = (int[]) dataIN; 1065 for (int i = 0; i < size; i++) { 1066 sdata[i] = (short) idata[i]; 1067 } 1068 dataOUT = sdata; 1069 } 1070 else if (dname == 'J') { 1071 log.debug("convertToUnsignedC(): Java convert long to int"); 1072 int[] idata = null; 1073 if (dataOUT == null) { 1074 idata = new int[size]; 1075 } 1076 else { 1077 idata = (int[]) dataOUT; 1078 } 1079 long[] ldata = (long[]) dataIN; 1080 for (int i = 0; i < size; i++) { 1081 idata[i] = (int) ldata[i]; 1082 } 1083 dataOUT = idata; 1084 } 1085 else { 1086 dataOUT = dataIN; 1087 log.debug("convertToUnsignedC(): Java does not support unsigned long"); 1088 } 1089 1090 return dataOUT; 1091 } 1092 1093 /** 1094 * Converts an array of bytes into an array of Strings for a fixed string 1095 * dataset. 1096 * <p> 1097 * A C-string is an array of chars while an Java String is an object. When a 1098 * string dataset is read into a Java application, the data is stored in an 1099 * array of Java bytes. byteToString() is used to convert the array of bytes 1100 * into an array of Java strings so that applications can display and modify 1101 * the data content. 1102 * <p> 1103 * For example, the content of a two element C string dataset is {"ABC", 1104 * "abc"}. Java applications will read the data into a byte array of {65, 1105 * 66, 67, 97, 98, 99). byteToString(bytes, 3) returns an array of Java 1106 * String of strs[0]="ABC", and strs[1]="abc". 1107 * <p> 1108 * If memory data of strings is converted to Java Strings, stringToByte() 1109 * must be called to convert the memory data back to byte array before data 1110 * is written to file. 1111 * 1112 * @see #stringToByte(String[], int) 1113 * 1114 * @param bytes 1115 * the array of bytes to convert. 1116 * @param length 1117 * the length of string. 1118 * 1119 * @return the array of Java String. 1120 */ 1121 public static final String[] byteToString(byte[] bytes, int length) { 1122 if (bytes == null) { 1123 log.debug("byteToString(): input is null"); 1124 return null; 1125 } 1126 1127 int n = bytes.length / length; 1128 log.trace("byteToString(): n={} from length of {}", n, length); 1129 String[] strArray = new String[n]; 1130 String str = null; 1131 int idx = 0; 1132 for (int i = 0; i < n; i++) { 1133 str = new String(bytes, i * length, length); 1134 idx = str.indexOf('\0'); 1135 if (idx >= 0) { 1136 str = str.substring(0, idx); 1137 } 1138 1139 // trim only the end 1140 int end = str.length(); 1141 while (end > 0 && str.charAt(end - 1) <= '\u0020') 1142 end--; 1143 1144 strArray[i] = (end <= 0) ? "" : str.substring(0, end); 1145 } 1146 1147 return strArray; 1148 } 1149 1150 /** 1151 * Converts a string array into an array of bytes for a fixed string 1152 * dataset. 1153 * <p> 1154 * If memory data of strings is converted to Java Strings, stringToByte() 1155 * must be called to convert the memory data back to byte array before data 1156 * is written to file. 1157 * 1158 * @see #byteToString(byte[] bytes, int length) 1159 * 1160 * @param strings 1161 * the array of string. 1162 * @param length 1163 * the length of string. 1164 * 1165 * @return the array of bytes. 1166 */ 1167 public static final byte[] stringToByte(String[] strings, int length) { 1168 if (strings == null) { 1169 log.debug("stringToByte(): input is null"); 1170 return null; 1171 } 1172 1173 int size = strings.length; 1174 byte[] bytes = new byte[size * length]; 1175 log.trace("stringToByte(): size={} length={}", size, length); 1176 StringBuilder strBuff = new StringBuilder(length); 1177 for (int i = 0; i < size; i++) { 1178 // initialize the string with spaces 1179 strBuff.replace(0, length, " "); 1180 1181 if (strings[i] != null) { 1182 if (strings[i].length() > length) { 1183 strings[i] = strings[i].substring(0, length); 1184 } 1185 strBuff.replace(0, length, strings[i]); 1186 } 1187 1188 strBuff.setLength(length); 1189 System.arraycopy(strBuff.toString().getBytes(), 0, bytes, length * i, length); 1190 } 1191 1192 return bytes; 1193 } 1194 1195 /** 1196 * Returns the array of strings that represent the dimension names. Returns 1197 * null if there is no dimension name. 1198 * <p> 1199 * Some datasets have pre-defined names for each dimension such as 1200 * "Latitude" and "Longitude". getDimNames() returns these pre-defined 1201 * names. 1202 * 1203 * @return the names of dimensions, or null if there is no dimension name. 1204 */ 1205 public final String[] getDimNames() { 1206 if (!inited) init(); 1207 1208 return dimNames; 1209 } 1210 1211 /** 1212 * Checks if a given datatype is a string. Sub-classes must replace this 1213 * default implementation. 1214 * 1215 * @param tid 1216 * The data type identifier. 1217 * 1218 * @return true if the datatype is a string; otherwise returns false. 1219 */ 1220 public boolean isString(long tid) { 1221 return false; 1222 } 1223 1224 /** 1225 * Returns the size in bytes of a given datatype. Sub-classes must replace 1226 * this default implementation. 1227 * 1228 * @param tid 1229 * The data type identifier. 1230 * 1231 * @return The size of the datatype 1232 */ 1233 public long getSize(long tid) { 1234 return -1; 1235 } 1236 1237 /** 1238 * Get Class of the original data buffer if converted. 1239 * 1240 * @return the Class of originalBuf 1241 */ 1242 @Override 1243 @SuppressWarnings("rawtypes") 1244 public final Class getOriginalClass() { 1245 return originalBuf.getClass(); 1246 } 1247 1248 /* 1249 * Checks if dataset is virtual. Sub-classes must replace 1250 * this default implementation. 1251 * 1252 * @return true if the dataset is virtual; otherwise returns false. 1253 */ 1254 public boolean isVirtual() { 1255 return false; 1256 } 1257 1258 /* 1259 * Gets the source file name at index if dataset is virtual. Sub-classes must replace 1260 * this default implementation. 1261 * 1262 * @return filename if the dataset is virtual; otherwise returns null. 1263 */ 1264 public String getVirtualFilename(int index) { 1265 return null; 1266 } 1267 1268 /* 1269 * Gets the number of source files if dataset is virtual. Sub-classes must replace 1270 * this default implementation. 1271 * 1272 * @return the list size if the dataset is virtual; otherwise returns negative. 1273 */ 1274 public int getVirtualMaps() { 1275 return -1; 1276 } 1277}