001/***************************************************************************** 002 * Copyright by The HDF Group. * 003 * Copyright by the Board of Trustees of the University of Illinois. * 004 * All rights reserved. * 005 * * 006 * This file is part of the HDF Java Products distribution. * 007 * The full copyright notice, including terms governing use, modification, * 008 * and redistribution, is contained in the file COPYING. * 009 * COPYING can be found at the root of the source code distribution tree. * 010 * If you do not have access to this file, you may request a copy from * 011 * help@hdfgroup.org. * 012 ****************************************************************************/ 013 014package hdf.object; 015 016import java.lang.reflect.Array; 017import java.util.Vector; 018 019/** 020 * The abstract class provides general APIs to create and manipulate dataset 021 * objects, and retrieve dataset properties, datatype and dimension sizes. 022 * <p> 023 * This class provides two convenient functions, read()/write(), to read/write 024 * data values. Reading/writing data may take many library calls if we use the 025 * library APIs directly. The read() and write functions hide all the details of 026 * these calls from users. 027 * <p> 028 * For more details on dataset, 029 * see <b> <a href="https://www.hdfgroup.org/HDF5/doc/UG/HDF5_Users_Guide-Responsive%20HTML5/index.html">HDF5 User's Guide</a> </b> 030 * <p> 031 * 032 * @see hdf.object.ScalarDS 033 * @see hdf.object.CompoundDS 034 * 035 * @version 1.1 9/4/2007 036 * @author Peter X. Cao 037 */ 038public abstract class Dataset extends HObject { 039 private static final long serialVersionUID = -3360885430038261178L; 040 041 private final static org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(Dataset.class); 042 043 /** 044 * The memory buffer that holds the raw data of the dataset. 045 */ 046 protected Object data; 047 048 /** 049 * The number of dimensions of the dataset. 050 */ 051 protected int rank; 052 053 /** 054 * The current dimension sizes of the dataset 055 */ 056 protected long[] dims; 057 058 /** 059 * The max dimension sizes of the dataset 060 */ 061 protected long[] maxDims; 062 063 /** 064 * Array that contains the number of data points selected (for read/write) 065 * in each dimension. 066 * <p> 067 * The selected size must be less than or equal to the current dimension size. 068 * A subset of a rectangle selection is defined by the starting position and 069 * selected sizes. 070 * <p> 071 * For example, if a 4 X 5 dataset is as follows: 072 * 073 * <pre> 074 * 0, 1, 2, 3, 4 075 * 10, 11, 12, 13, 14 076 * 20, 21, 22, 23, 24 077 * 30, 31, 32, 33, 34 078 * long[] dims = {4, 5}; 079 * long[] startDims = {1, 2}; 080 * long[] selectedDims = {3, 3}; 081 * then the following subset is selected by the startDims and selectedDims above: 082 * 12, 13, 14 083 * 22, 23, 24 084 * 32, 33, 34 085 * </pre> 086 */ 087 protected long[] selectedDims; 088 089 /** 090 * The starting position of each dimension of a selected subset. With both 091 * the starting position and selected sizes, the subset of a rectangle 092 * selection is fully defined. 093 */ 094 protected long[] startDims; 095 096 /** 097 * Array that contains the indices of the dimensions selected for display. 098 * <p> 099 * <B>selectedIndex[] is provided for two purposes:</B> 100 * <OL> 101 * <LI> 102 * selectedIndex[] is used to indicate the order of dimensions for display, 103 * i.e. selectedIndex[0] = row, selectedIndex[1] = column and 104 * selectedIndex[2] = depth. For example, for a four dimension dataset, if 105 * selectedIndex[] is {1, 2, 3}, then dim[1] is selected as row index, 106 * dim[2] is selected as column index and dim[3] is selected as depth index. 107 * <LI> 108 * selectedIndex[] is also used to select dimensions for display for 109 * datasets with three or more dimensions. We assume that applications such 110 * as HDFView can only display data up to three dimensions (a 2D 111 * spreadsheet/image with a third dimension that the 2D spreadsheet/image is 112 * cut from). For datasets with more than three dimensions, we need 113 * selectedIndex[] to store which three dimensions are chosen for display. 114 * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3}, 115 * then dim[1] is selected as row index, dim[2] is selected as column index 116 * and dim[3] is selected as depth index. dim[0] is not selected. Its 117 * location is fixed at 0 by default. 118 * </OL> 119 */ 120 protected final int[] selectedIndex; 121 122 /** 123 * The number of elements to move from the start location in each dimension. 124 * For example, if selectedStride[0] = 2, every other data point is selected 125 * along dim[0]. 126 */ 127 protected long[] selectedStride; 128 129 /** 130 * The array of dimension sizes for a chunk. 131 */ 132 protected long[] chunkSize; 133 134 /** The compression information. */ 135 protected String compression; 136 public final static String compression_gzip_txt = "GZIP: level = "; 137 138 /** The filters information. */ 139 protected String filters; 140 141 /** The storage layout information. */ 142 protected String storage_layout; 143 144 /** The storage information. */ 145 protected String storage; 146 147 /** The datatype object of the dataset. */ 148 protected Datatype datatype; 149 150 /** 151 * Array of strings that represent the dimension names. It is null if 152 * dimension names do not exist. 153 */ 154 protected String[] dimNames; 155 156 /** Flag to indicate if the byte[] array is converted to strings */ 157 protected boolean convertByteToString = true; 158 159 /** Flag to indicate if data values are loaded into memory. */ 160 protected boolean isDataLoaded = false; 161 162 /** The number of data points in the memory buffer. */ 163 protected long nPoints = 1; 164 165 /** 166 * The data buffer that contains the raw data directly reading from file 167 * (before any data conversion). 168 */ 169 protected Object originalBuf = null; 170 171 /** 172 * The array that holds the converted data of unsigned C-type integers. 173 * <p> 174 * For example, Suppose that the original data is an array of unsigned 175 * 16-bit short integers. Since Java does not support unsigned integer, the 176 * data is converted to an array of 32-bit singed integer. In that case, the 177 * converted buffer is the array of 32-bit singed integer. 178 */ 179 protected Object convertedBuf = null; 180 181 /** 182 * Flag to indicate if the enum data is converted to strings. 183 */ 184 protected boolean enumConverted = false; 185 186 /** 187 * Constructs a Dataset object with a given file, name and path. 188 * 189 * @param theFile 190 * the file that contains the dataset. 191 * @param name 192 * the name of the Dataset, e.g. "dset1". 193 * @param path 194 * the full group path of this Dataset, e.g. "/arrays/". 195 */ 196 public Dataset(FileFormat theFile, String name, String path) { 197 this(theFile, name, path, null); 198 } 199 200 /** 201 * @deprecated Not for public use in the future. <br> 202 * Using {@link #Dataset(FileFormat, String, String)} 203 * 204 * @param theFile 205 * the file that contains the dataset. 206 * @param name 207 * the name of the Dataset, e.g. "dset1". 208 * @param path 209 * the full group path of this Dataset, e.g. "/arrays/". 210 * @param oid 211 * the oid of this Dataset. 212 */ 213 @Deprecated 214 public Dataset(FileFormat theFile, String name, String path, long[] oid) { 215 super(theFile, name, path, oid); 216 217 rank = 0; 218 data = null; 219 dims = null; 220 maxDims = null; 221 selectedDims = null; 222 startDims = null; 223 selectedStride = null; 224 chunkSize = null; 225 compression = "NONE"; 226 filters = "NONE"; 227 storage = "NONE"; 228 dimNames = null; 229 230 selectedIndex = new int[3]; 231 selectedIndex[0] = 0; 232 selectedIndex[1] = 1; 233 selectedIndex[2] = 2; 234 } 235 236 /** 237 * Clears memory held by the dataset, such as the data buffer. 238 */ 239 @SuppressWarnings("rawtypes") 240 public void clear() { 241 if (data != null) { 242 if (data instanceof Vector) { 243 ((Vector) data).setSize(0); 244 } 245 data = null; 246 originalBuf = null; 247 convertedBuf = null; 248 } 249 isDataLoaded = false; 250 } 251 252 /** 253 * Retrieves datatype and dataspace information from file and sets the 254 * dataset in memory. 255 * <p> 256 * The init() is designed to support lazy operation in a dataset object. When 257 * a data object is retrieved from file, the datatype, dataspace and raw 258 * data are not loaded into memory. When it is asked to read the raw data 259 * from file, init() is first called to get the datatype and dataspace 260 * information, then load the raw data from file. 261 * <p> 262 * init() is also used to reset the selection of a dataset (start, stride and 263 * count) to the default, which is the entire dataset for 1D or 2D datasets. 264 * In the following example, init() at step 1) retrieves datatype and 265 * dataspace information from file. getData() at step 3) reads only one data 266 * point. init() at step 4) resets the selection to the whole dataset. 267 * getData() at step 4) reads the values of whole dataset into memory. 268 * 269 * <pre> 270 * dset = (Dataset) file.get(NAME_DATASET); 271 * 272 * // 1) get datatype and dataspace information from file 273 * dset.init(); 274 * rank = dset.getRank(); // rank = 2, a 2D dataset 275 * count = dset.getSelectedDims(); 276 * start = dset.getStartDims(); 277 * dims = dset.getDims(); 278 * 279 * // 2) select only one data point 280 * for (int i = 0; i < rank; i++) { 281 * start[0] = 0; 282 * count[i] = 1; 283 * } 284 * 285 * // 3) read one data point 286 * data = dset.getData(); 287 * 288 * // 4) reset selection to the whole dataset 289 * dset.init(); 290 * 291 * // 5) clean the memory data buffer 292 * dset.clearData(); 293 * 294 * // 6) Read the whole dataset 295 * data = dset.getData(); 296 * </pre> 297 */ 298 public abstract void init(); 299 300 /** 301 * Returns the rank (number of dimensions) of the dataset. 302 * 303 * @return the number of dimensions of the dataset. 304 */ 305 public final int getRank() { 306 if (rank < 0) init(); 307 308 return rank; 309 } 310 311 /** 312 * Returns the array that contains the dimension sizes of the dataset. 313 * 314 * @return the dimension sizes of the dataset. 315 */ 316 public final long[] getDims() { 317 if (rank < 0) init(); 318 319 return dims; 320 } 321 322 /** 323 * Returns the array that contains the max dimension sizes of the dataset. 324 * 325 * @return the max dimension sizes of the dataset. 326 */ 327 public final long[] getMaxDims() { 328 if (rank < 0) init(); 329 330 if (maxDims == null) return dims; 331 332 return maxDims; 333 } 334 335 /** 336 * Returns the dimension sizes of the selected subset. 337 * <p> 338 * The SelectedDims is the number of data points of the selected subset. 339 * Applications can use this array to change the size of selected subset. 340 * 341 * The selected size must be less than or equal to the current dimension size. 342 * Combined with the starting position, selected sizes and stride, the 343 * subset of a rectangle selection is fully defined. 344 * <p> 345 * For example, if a 4 X 5 dataset is as follows: 346 * 347 * <pre> 348 * 0, 1, 2, 3, 4 349 * 10, 11, 12, 13, 14 350 * 20, 21, 22, 23, 24 351 * 30, 31, 32, 33, 34 352 * long[] dims = {4, 5}; 353 * long[] startDims = {1, 2}; 354 * long[] selectedDims = {3, 3}; 355 * long[] selectedStride = {1, 1}; 356 * then the following subset is selected by the startDims and selectedDims 357 * 12, 13, 14 358 * 22, 23, 24 359 * 32, 33, 34 360 * </pre> 361 * 362 * @return the dimension sizes of the selected subset. 363 */ 364 public final long[] getSelectedDims() { 365 if (rank < 0) init(); 366 367 return selectedDims; 368 } 369 370 /** 371 * Returns the starting position of a selected subset. 372 * <p> 373 * Applications can use this array to change the starting position of a 374 * selection. Combined with the selected dimensions, selected sizes and 375 * stride, the subset of a rectangle selection is fully defined. 376 * <p> 377 * For example, if a 4 X 5 dataset is as follows: 378 * 379 * <pre> 380 * 0, 1, 2, 3, 4 381 * 10, 11, 12, 13, 14 382 * 20, 21, 22, 23, 24 383 * 30, 31, 32, 33, 34 384 * long[] dims = {4, 5}; 385 * long[] startDims = {1, 2}; 386 * long[] selectedDims = {3, 3}; 387 * long[] selectedStride = {1, 1}; 388 * then the following subset is selected by the startDims and selectedDims 389 * 12, 13, 14 390 * 22, 23, 24 391 * 32, 33, 34 392 * </pre> 393 * 394 * @return the starting position of a selected subset. 395 */ 396 public final long[] getStartDims() { 397 if (rank < 0) init(); 398 399 return startDims; 400 } 401 402 /** 403 * Returns the selectedStride of the selected dataset. 404 * <p> 405 * Applications can use this array to change how many elements to move in 406 * each dimension. 407 * 408 * Combined with the starting position and selected sizes, the subset of a 409 * rectangle selection is defined. 410 * <p> 411 * For example, if a 4 X 5 dataset is as follows: 412 * 413 * <pre> 414 * 0, 1, 2, 3, 4 415 * 10, 11, 12, 13, 14 416 * 20, 21, 22, 23, 24 417 * 30, 31, 32, 33, 34 418 * long[] dims = {4, 5}; 419 * long[] startDims = {0, 0}; 420 * long[] selectedDims = {2, 2}; 421 * long[] selectedStride = {2, 3}; 422 * then the following subset is selected by the startDims and selectedDims 423 * 0, 3 424 * 20, 23 425 * </pre> 426 * 427 * @return the selectedStride of the selected dataset. 428 */ 429 public final long[] getStride() { 430 if (rank < 0) init(); 431 432 if (rank <= 0) { 433 return null; 434 } 435 436 if (selectedStride == null) { 437 selectedStride = new long[rank]; 438 for (int i = 0; i < rank; i++) { 439 selectedStride[i] = 1; 440 } 441 } 442 443 return selectedStride; 444 } 445 446 /** 447 * Sets the flag that indicates if a byte array is converted to a string 448 * array. 449 * <p> 450 * In a string dataset, the raw data from file is stored in a byte array. By 451 * default, this byte array is converted to an array of strings. For a large 452 * dataset (e.g. more than one million strings), the conversion takes a long 453 * time and requires a lot of memory space to store the strings. In some 454 * applications, such a conversion can be delayed. For example, A GUI 455 * application may convert only the part of the strings that is visible to the 456 * users, not the entire data array. 457 * <p> 458 * setConvertByteToString(boolean b) allows users to set the flag so that 459 * applications can choose to perform the byte-to-string conversion or not. 460 * If the flag is set to false, the getData() returns an array of byte 461 * instead of an array of strings. 462 * 463 * @param b 464 * convert bytes to strings if b is true; otherwise, if false, do 465 * not convert bytes to strings. 466 */ 467 public final void setConvertByteToString(boolean b) { 468 convertByteToString = b; 469 } 470 471 /** 472 * Returns the flag that indicates if a byte array is converted to a string 473 * array. 474 * 475 * @return true if byte array is converted to string; otherwise, returns 476 * false if there is no conversion. 477 */ 478 public final boolean getConvertByteToString() { 479 return convertByteToString; 480 } 481 482 /** 483 * Reads the data from file. 484 * <p> 485 * read() reads the data from file to a memory buffer and returns the memory 486 * buffer. The dataset object does not hold the memory buffer. To store the 487 * memory buffer in the dataset object, one must call getData(). 488 * <p> 489 * By default, the whole dataset is read into memory. Users can also select 490 * a subset to read. Subsetting is done in an implicit way. 491 * <p> 492 * <b>How to Select a Subset</b> 493 * <p> 494 * A selection is specified by three arrays: start, stride and count. 495 * <ol> 496 * <li>start: offset of a selection 497 * <li>stride: determines how many elements to move in each dimension 498 * <li>count: number of elements to select in each dimension 499 * </ol> 500 * getStartDims(), getStride() and getSelectedDims() returns the start, 501 * stride and count arrays respectively. Applications can make a selection 502 * by changing the values of the arrays. 503 * <p> 504 * The following example shows how to make a subset. In the example, the 505 * dataset is a 4-dimensional array of [200][100][50][10], i.e. dims[0]=200; 506 * dims[1]=100; dims[2]=50; dims[3]=10; <br> 507 * We want to select every other data point in dims[1] and dims[2] 508 * 509 * <pre> 510 * int rank = dataset.getRank(); // number of dimensions of the dataset 511 * long[] dims = dataset.getDims(); // the dimension sizes of the dataset 512 * long[] selected = dataset.getSelectedDims(); // the selected size of the dataset 513 * long[] start = dataset.getStartDims(); // the offset of the selection 514 * long[] stride = dataset.getStride(); // the stride of the dataset 515 * int[] selectedIndex = dataset.getSelectedIndex(); // the selected dimensions for display 516 * 517 * // select dim1 and dim2 as 2D data for display, and slice through dim0 518 * selectedIndex[0] = 1; 519 * selectedIndex[1] = 2; 520 * selectedIndex[1] = 0; 521 * 522 * // reset the selection arrays 523 * for (int i = 0; i < rank; i++) { 524 * start[i] = 0; 525 * selected[i] = 1; 526 * stride[i] = 1; 527 * } 528 * 529 * // set stride to 2 on dim1 and dim2 so that every other data point is 530 * // selected. 531 * stride[1] = 2; 532 * stride[2] = 2; 533 * 534 * // set the selection size of dim1 and dim2 535 * selected[1] = dims[1] / stride[1]; 536 * selected[2] = dims[1] / stride[2]; 537 * 538 * // when dataset.getData() is called, the selection above will be used since 539 * // the dimension arrays are passed by reference. Changes of these arrays 540 * // outside the dataset object directly change the values of these array 541 * // in the dataset object. 542 * </pre> 543 * <p> 544 * For ScalarDS, the memory data buffer is a one-dimensional array of byte, 545 * short, int, float, double or String type based on the datatype of the 546 * dataset. 547 * <p> 548 * For CompoundDS, the memory data object is an java.util.List object. Each 549 * element of the list is a data array that corresponds to a compound field. 550 * <p> 551 * For example, if compound dataset "comp" has the following nested 552 * structure, and member datatypes 553 * 554 * <pre> 555 * comp --> m01 (int) 556 * comp --> m02 (float) 557 * comp --> nest1 --> m11 (char) 558 * comp --> nest1 --> m12 (String) 559 * comp --> nest1 --> nest2 --> m21 (long) 560 * comp --> nest1 --> nest2 --> m22 (double) 561 * </pre> 562 * 563 * getData() returns a list of six arrays: {int[], float[], char[], 564 * String[], long[] and double[]}. 565 * 566 * @return the data read from file. 567 * 568 * @see #getData() 569 * 570 * @throws Exception if object can not be read 571 * @throws OutOfMemoryError if memory is exhausted 572 */ 573 public abstract Object read() throws Exception, OutOfMemoryError; 574 575 /** 576 * Reads the raw data of the dataset from file to a byte array. 577 * <p> 578 * readBytes() reads raw data to an array of bytes instead of array of its 579 * datatype. For example, for a one-dimension 32-bit integer dataset of 580 * size 5, readBytes() returns a byte array of size 20 instead of an 581 * int array of 5. 582 * <p> 583 * readBytes() can be used to copy data from one dataset to another 584 * efficiently because the raw data is not converted to its native type, it 585 * saves memory space and CPU time. 586 * 587 * @return the byte array of the raw data. 588 * 589 * @throws Exception if data can not be read 590 */ 591 public abstract byte[] readBytes() throws Exception; 592 593 /** 594 * Writes a memory buffer to the dataset in file. 595 * 596 * @param buf 597 * the data to write 598 * 599 * @throws Exception if data can not be written 600 */ 601 public abstract void write(Object buf) throws Exception; 602 603 /** 604 * Writes the memory buffer of this dataset to file. 605 * 606 * @throws Exception if buffer can not be written 607 */ 608 public final void write() throws Exception { 609 if (data != null) { 610 write(data); 611 } 612 } 613 614 /** 615 * Creates a new dataset and writes the data buffer to the new dataset. 616 * <p> 617 * This function allows applications to create a new dataset for a given 618 * data buffer. For example, users can select a specific interesting part 619 * from a large image and create a new image with the selection. 620 * <p> 621 * The new dataset retains the datatype and dataset creation properties of 622 * this dataset. 623 * 624 * @param pgroup 625 * the group which the dataset is copied to. 626 * @param name 627 * the name of the new dataset. 628 * @param dims 629 * the dimension sizes of the the new dataset. 630 * @param data 631 * the data values of the subset to be copied. 632 * 633 * @return the new dataset. 634 * 635 * @throws Exception if dataset can not be copied 636 */ 637 public abstract Dataset copy(Group pgroup, String name, long[] dims, Object data) throws Exception; 638 639 /** 640 * Returns the datatype object of the dataset. 641 * 642 * @return the datatype object of the dataset. 643 */ 644 public abstract Datatype getDatatype(); 645 646 /** 647 * Returns the data buffer of the dataset in memory. 648 * <p> 649 * If data is already loaded into memory, returns the data; otherwise, calls 650 * read() to read data from file into a memory buffer and returns the memory 651 * buffer. 652 * <p> 653 * By default, the whole dataset is read into memory. Users can also select 654 * a subset to read. Subsetting is done in an implicit way. 655 * <p> 656 * <b>How to Select a Subset</b> 657 * <p> 658 * A selection is specified by three arrays: start, stride and count. 659 * <ol> 660 * <li>start: offset of a selection 661 * <li>stride: determines how many elements to move in each dimension 662 * <li>count: number of elements to select in each dimension 663 * </ol> 664 * getStartDims(), getStride() and getSelectedDims() returns the start, 665 * stride and count arrays respectively. Applications can make a selection 666 * by changing the values of the arrays. 667 * <p> 668 * The following example shows how to make a subset. In the example, the 669 * dataset is a 4-dimensional array of [200][100][50][10], i.e. dims[0]=200; 670 * dims[1]=100; dims[2]=50; dims[3]=10; <br> 671 * We want to select every other data point in dims[1] and dims[2] 672 * 673 * <pre> 674 * int rank = dataset.getRank(); // number of dimensions of the dataset 675 * long[] dims = dataset.getDims(); // the dimension sizes of the dataset 676 * long[] selected = dataset.getSelectedDims(); // the selected size of the dataet 677 * long[] start = dataset.getStartDims(); // the offset of the selection 678 * long[] stride = dataset.getStride(); // the stride of the dataset 679 * int[] selectedIndex = dataset.getSelectedIndex(); // the selected dimensions for display 680 * 681 * // select dim1 and dim2 as 2D data for display,and slice through dim0 682 * selectedIndex[0] = 1; 683 * selectedIndex[1] = 2; 684 * selectedIndex[1] = 0; 685 * 686 * // reset the selection arrays 687 * for (int i = 0; i < rank; i++) { 688 * start[i] = 0; 689 * selected[i] = 1; 690 * stride[i] = 1; 691 * } 692 * 693 * // set stride to 2 on dim1 and dim2 so that every other data point is 694 * // selected. 695 * stride[1] = 2; 696 * stride[2] = 2; 697 * 698 * // set the selection size of dim1 and dim2 699 * selected[1] = dims[1] / stride[1]; 700 * selected[2] = dims[1] / stride[2]; 701 * 702 * // when dataset.getData() is called, the selection above will be used since 703 * // the dimension arrays are passed by reference. Changes of these arrays 704 * // outside the dataset object directly change the values of these array 705 * // in the dataset object. 706 * </pre> 707 * <p> 708 * For ScalarDS, the memory data buffer is a one-dimensional array of byte, 709 * short, int, float, double or String type based on the datatype of the 710 * dataset. 711 * <p> 712 * For CompoundDS, the memory data object is an java.util.List object. Each 713 * element of the list is a data array that corresponds to a compound field. 714 * <p> 715 * For example, if compound dataset "comp" has the following nested 716 * structure, and member datatypes 717 * 718 * <pre> 719 * comp --> m01 (int) 720 * comp --> m02 (float) 721 * comp --> nest1 --> m11 (char) 722 * comp --> nest1 --> m12 (String) 723 * comp --> nest1 --> nest2 --> m21 (long) 724 * comp --> nest1 --> nest2 --> m22 (double) 725 * </pre> 726 * 727 * getData() returns a list of six arrays: {int[], float[], char[], 728 * String[], long[] and double[]}. 729 * 730 * @return the memory buffer of the dataset. 731 * 732 * @throws Exception if object can not be read 733 * @throws OutOfMemoryError if memory is exhausted 734 */ 735 public final Object getData() throws Exception, OutOfMemoryError { 736 if (!isDataLoaded) { 737 log.trace("getData: read"); 738 data = read(); // load the data; 739 originalBuf = data; 740 isDataLoaded = true; 741 nPoints = 1; 742 log.trace("getData: selectedDims length={}",selectedDims.length); 743 for (int j = 0; j < selectedDims.length; j++) { 744 nPoints *= selectedDims[j]; 745 } 746 log.trace("getData: read {}", nPoints); 747 } 748 749 return data; 750 } 751 752 /** 753 * @deprecated Not for public use in the future. 754 * <p> 755 * setData() is not safe to use because it changes memory buffer 756 * of the dataset object. Dataset operations such as write/read 757 * will fail if the buffer type or size is changed. 758 * 759 * @param d the object data 760 */ 761 @Deprecated 762 public final void setData(Object d) { 763 data = d; 764 } 765 766 /** 767 * Clears the current data buffer in memory and forces the next read() to load 768 * the data from file. 769 * <p> 770 * The function read() loads data from file into memory only if the data is 771 * not read. If data is already in memory, read() just returns the memory 772 * buffer. Sometimes we want to force read() to re-read data from file. For 773 * example, when the selection is changed, we need to re-read the data. 774 * 775 * @see #getData() 776 * @see #read() 777 */ 778 public void clearData() { 779 isDataLoaded = false; 780 } 781 782 /** 783 * Returns the dimension size of the vertical axis. 784 * 785 * <p> 786 * This function is used by GUI applications such as HDFView. GUI 787 * applications display a dataset in a 2D table or 2D image. The display 788 * order is specified by the index array of selectedIndex as follow: 789 * <dl> 790 * <dt>selectedIndex[0] -- height</dt> 791 * <dd>The vertical axis</dd> 792 * <dt>selectedIndex[1] -- width</dt> 793 * <dd>The horizontal axis</dd> 794 * <dt>selectedIndex[2] -- depth</dt> 795 * <dd>The depth axis is used for 3 or more dimensional datasets.</dd> 796 * </dl> 797 * Applications can use getSelectedIndex() to access and change the display 798 * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the 799 * following code will set the height=200 and width=50. 800 * 801 * <pre> 802 * int[] selectedIndex = dataset.getSelectedIndex(); 803 * selectedIndex[0] = 0; 804 * selectedIndex[1] = 1; 805 * </pre> 806 * 807 * @see #getSelectedIndex() 808 * @see #getWidth() 809 * 810 * @return the size of dimension of the vertical axis. 811 */ 812 public final int getHeight() { 813 if (rank < 0) init(); 814 815 if ((selectedDims == null) || (selectedIndex == null)) { 816 return 0; 817 } 818 819 return (int) selectedDims[selectedIndex[0]]; 820 } 821 822 /** 823 * Returns the dimension size of the horizontal axis. 824 * 825 * <p> 826 * This function is used by GUI applications such as HDFView. GUI 827 * applications display a dataset in 2D Table or 2D Image. The display order is 828 * specified by the index array of selectedIndex as follow: 829 * <dl> 830 * <dt>selectedIndex[0] -- height</dt> 831 * <dd>The vertical axis</dd> 832 * <dt>selectedIndex[1] -- width</dt> 833 * <dd>The horizontal axis</dd> 834 * <dt>selectedIndex[2] -- depth</dt> 835 * <dd>The depth axis, which is used for 3 or more dimension datasets.</dd> 836 * </dl> 837 * Applications can use getSelectedIndex() to access and change the display 838 * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the 839 * following code will set the height=200 and width=100. 840 * 841 * <pre> 842 * int[] selectedIndex = dataset.getSelectedIndex(); 843 * selectedIndex[0] = 0; 844 * selectedIndex[1] = 1; 845 * </pre> 846 * 847 * @see #getSelectedIndex() 848 * @see #getHeight() 849 * 850 * @return the size of dimension of the horizontal axis. 851 */ 852 public final int getWidth() { 853 if (rank < 0) init(); 854 855 if ((selectedDims == null) || (selectedIndex == null)) { 856 return 0; 857 } 858 859 if ((selectedDims.length < 2) || (selectedIndex.length < 2)) { 860 return 1; 861 } 862 863 return (int) selectedDims[selectedIndex[1]]; 864 } 865 866 /** 867 * Returns the indices of display order. 868 * <p> 869 * 870 * selectedIndex[] is provided for two purposes: 871 * <OL> 872 * <LI> 873 * selectedIndex[] is used to indicate the order of dimensions for display. 874 * selectedIndex[0] is for the row, selectedIndex[1] is for the column and 875 * selectedIndex[2] for the depth. 876 * <p> 877 * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3}, 878 * then dim[1] is selected as row index, dim[2] is selected as column index 879 * and dim[3] is selected as depth index. 880 * <LI> 881 * selectedIndex[] is also used to select dimensions for display for 882 * datasets with three or more dimensions. We assume that applications such 883 * as HDFView can only display data values up to three dimensions (2D 884 * spreadsheet/image with a third dimension which the 2D spreadsheet/image 885 * is selected from). For datasets with more than three dimensions, we need 886 * selectedIndex[] to tell applications which three dimensions are chosen 887 * for display. <br> 888 * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3}, 889 * then dim[1] is selected as row index, dim[2] is selected as column index 890 * and dim[3] is selected as depth index. dim[0] is not selected. Its 891 * location is fixed at 0 by default. 892 * </OL> 893 * 894 * @return the array of the indices of display order. 895 */ 896 public final int[] getSelectedIndex() { 897 if (rank < 0) init(); 898 899 return selectedIndex; 900 } 901 902 /** 903 * Returns the string representation of compression information. 904 * <p> 905 * For example, 906 * "SZIP: Pixels per block = 8: H5Z_FILTER_CONFIG_DECODE_ENABLED". 907 * 908 * @return the string representation of compression information. 909 */ 910 public final String getCompression() { 911 if (rank < 0) init(); 912 913 return compression; 914 } 915 916 /** 917 * Returns the string representation of filter information. 918 * 919 * @return the string representation of filter information. 920 */ 921 public final String getFilters() { 922 if (rank < 0) init(); 923 924 return filters; 925 } 926 927 /** 928 * Returns the string representation of storage layout information. 929 * 930 * @return the string representation of storage layout information. 931 */ 932 public final String getStorageLayout() { 933 if (rank < 0) init(); 934 935 return storage_layout; 936 } 937 938 /** 939 * Returns the string representation of storage information. 940 * 941 * @return the string representation of storage information. 942 */ 943 public final String getStorage() { 944 if (rank < 0) init(); 945 946 return storage; 947 } 948 949 /** 950 * Returns the array that contains the dimension sizes of the chunk of the 951 * dataset. Returns null if the dataset is not chunked. 952 * 953 * @return the array of chunk sizes or returns null if the dataset is not 954 * chunked. 955 */ 956 public final long[] getChunkSize() { 957 if (rank < 0) init(); 958 959 return chunkSize; 960 } 961 962 /** 963 * @deprecated Not for public use in the future. <br> 964 * Using {@link #convertFromUnsignedC(Object, Object)} 965 * 966 * @param data_in the object data 967 * 968 * @return the converted object 969 */ 970 @Deprecated 971 public static Object convertFromUnsignedC(Object data_in) { 972 return Dataset.convertFromUnsignedC(data_in, null); 973 } 974 975 /** 976 * Converts one-dimension array of unsigned C-type integers to a new array 977 * of appropriate Java integer in memory. 978 * <p> 979 * Since Java does not support unsigned integer, values of unsigned C-type 980 * integers must be converted into its appropriate Java integer. Otherwise, 981 * the data value will not displayed correctly. For example, if an unsigned 982 * C byte, x = 200, is stored into an Java byte y, y will be -56 instead of 983 * the correct value of 200. 984 * <p> 985 * Unsigned C integers are upgrade to Java integers according to the 986 * following table: 987 * <TABLE CELLSPACING=0 BORDER=1 CELLPADDING=5 WIDTH=400> 988 * <caption><b>Mapping Unsigned C Integers to Java Integers</b></caption> 989 * <TR> 990 * <TD><B>Unsigned C Integer</B></TD> 991 * <TD><B>JAVA Intege</B>r</TD> 992 * </TR> 993 * <TR> 994 * <TD>unsigned byte</TD> 995 * <TD>signed short</TD> 996 * </TR> 997 * <TR> 998 * <TD>unsigned short</TD> 999 * <TD>signed int</TD> 1000 * </TR> 1001 * <TR> 1002 * <TD>unsigned int</TD> 1003 * <TD>signed long</TD> 1004 * </TR> 1005 * <TR> 1006 * <TD>unsigned long</TD> 1007 * <TD>signed long</TD> 1008 * </TR> 1009 * </TABLE> 1010 * <strong>NOTE: this conversion cannot deal with unsigned 64-bit integers. 1011 * Therefore, the values of unsigned 64-bit datasets may be wrong in Java 1012 * applications</strong>. 1013 * <p> 1014 * If memory data of unsigned integers is converted by 1015 * convertFromUnsignedC(), convertToUnsignedC() must be called to convert 1016 * the data back to unsigned C before data is written into file. 1017 * 1018 * @see #convertToUnsignedC(Object, Object) 1019 * 1020 * @param data_in 1021 * the input 1D array of the unsigned C-type integers. 1022 * @param data_out 1023 * the output converted (or upgraded) 1D array of Java integers. 1024 * 1025 * @return the upgraded 1D array of Java integers. 1026 */ 1027 @SuppressWarnings("rawtypes") 1028 public static Object convertFromUnsignedC(Object data_in, Object data_out) { 1029 log.trace("convertFromUnsignedC(): start"); 1030 1031 if (data_in == null) { 1032 log.debug("convertFromUnsignedC(): data_in is null"); 1033 log.trace("convertFromUnsignedC(): finish"); 1034 return null; 1035 } 1036 1037 Class data_class = data_in.getClass(); 1038 if (!data_class.isArray()) { 1039 log.debug("convertFromUnsignedC(): data_in not an array"); 1040 log.trace("convertFromUnsignedC(): finish"); 1041 return null; 1042 } 1043 1044 if (data_out != null) { 1045 Class data_class_out = data_out.getClass(); 1046 if (!data_class_out.isArray() || (Array.getLength(data_in) != Array.getLength(data_out))) { 1047 log.debug("convertFromUnsignedC(): data_out not an array or does not match data_in size"); 1048 data_out = null; 1049 } 1050 } 1051 1052 String cname = data_class.getName(); 1053 char dname = cname.charAt(cname.lastIndexOf("[") + 1); 1054 int size = Array.getLength(data_in); 1055 log.trace("convertFromUnsignedC(): cname={} dname={} size={}", cname, dname, size); 1056 1057 if (dname == 'B') { 1058 short[] sdata = null; 1059 if (data_out == null) { 1060 sdata = new short[size]; 1061 } 1062 else { 1063 sdata = (short[]) data_out; 1064 } 1065 1066 byte[] bdata = (byte[]) data_in; 1067 for (int i = 0; i < size; i++) { 1068 sdata[i] = (short) ((bdata[i] + 256) & 0xFF); 1069 } 1070 1071 data_out = sdata; 1072 } 1073 else if (dname == 'S') { 1074 int[] idata = null; 1075 if (data_out == null) { 1076 idata = new int[size]; 1077 } 1078 else { 1079 idata = (int[]) data_out; 1080 } 1081 1082 short[] sdata = (short[]) data_in; 1083 for (int i = 0; i < size; i++) { 1084 idata[i] = (sdata[i] + 65536) & 0xFFFF; 1085 } 1086 1087 data_out = idata; 1088 } 1089 else if (dname == 'I') { 1090 long[] ldata = null; 1091 if (data_out == null) { 1092 ldata = new long[size]; 1093 } 1094 else { 1095 ldata = (long[]) data_out; 1096 } 1097 1098 int[] idata = (int[]) data_in; 1099 for (int i = 0; i < size; i++) { 1100 ldata[i] = (idata[i] + 4294967296L) & 0xFFFFFFFFL; 1101 } 1102 1103 data_out = ldata; 1104 } 1105 else { 1106 data_out = data_in; 1107 log.debug("convertFromUnsignedC(): Java does not support unsigned long"); 1108 } 1109 1110 return data_out; 1111 } 1112 1113 /** 1114 * @deprecated Not for public use in the future. <br> 1115 * Using {@link #convertToUnsignedC(Object, Object)} 1116 * 1117 * @param data_in 1118 * the input 1D array of the unsigned C-type integers. 1119 * 1120 * @return the upgraded 1D array of Java integers. 1121 */ 1122 @Deprecated 1123 public static Object convertToUnsignedC(Object data_in) { 1124 return Dataset.convertToUnsignedC(data_in, null); 1125 } 1126 1127 /** 1128 * Converts the array of converted unsigned integers back to unsigned C-type 1129 * integer data in memory. 1130 * <p> 1131 * If memory data of unsigned integers is converted by 1132 * convertFromUnsignedC(), convertToUnsignedC() must be called to convert 1133 * the data back to unsigned C before data is written into file. 1134 * 1135 * @see #convertFromUnsignedC(Object, Object) 1136 * 1137 * @param data_in 1138 * the input array of the Java integer. 1139 * @param data_out 1140 * the output array of the unsigned C-type integer. 1141 * 1142 * @return the converted data of unsigned C-type integer array. 1143 */ 1144 @SuppressWarnings("rawtypes") 1145 public static Object convertToUnsignedC(Object data_in, Object data_out) { 1146 log.trace("convertToUnsignedC(): start"); 1147 1148 if (data_in == null) { 1149 log.debug("convertToUnsignedC(): data_in is null"); 1150 log.trace("convertToUnsignedC(): finish"); 1151 return null; 1152 } 1153 1154 Class data_class = data_in.getClass(); 1155 if (!data_class.isArray()) { 1156 log.debug("convertToUnsignedC(): data_in not an array"); 1157 log.trace("convertToUnsignedC(): finish"); 1158 return null; 1159 } 1160 1161 if (data_out != null) { 1162 Class data_class_out = data_out.getClass(); 1163 if (!data_class_out.isArray() || (Array.getLength(data_in) != Array.getLength(data_out))) { 1164 log.debug("convertToUnsignedC(): data_out not an array or does not match data_in size"); 1165 data_out = null; 1166 } 1167 } 1168 1169 String cname = data_class.getName(); 1170 char dname = cname.charAt(cname.lastIndexOf("[") + 1); 1171 int size = Array.getLength(data_in); 1172 log.trace("convertToUnsignedC(): cname={} dname={} size={}", cname, dname, size); 1173 1174 if (dname == 'S') { 1175 byte[] bdata = null; 1176 if (data_out == null) { 1177 bdata = new byte[size]; 1178 } 1179 else { 1180 bdata = (byte[]) data_out; 1181 } 1182 short[] sdata = (short[]) data_in; 1183 for (int i = 0; i < size; i++) { 1184 bdata[i] = (byte) sdata[i]; 1185 } 1186 data_out = bdata; 1187 } 1188 else if (dname == 'I') { 1189 short[] sdata = null; 1190 if (data_out == null) { 1191 sdata = new short[size]; 1192 } 1193 else { 1194 sdata = (short[]) data_out; 1195 } 1196 int[] idata = (int[]) data_in; 1197 for (int i = 0; i < size; i++) { 1198 sdata[i] = (short) idata[i]; 1199 } 1200 data_out = sdata; 1201 } 1202 else if (dname == 'J') { 1203 int[] idata = null; 1204 if (data_out == null) { 1205 idata = new int[size]; 1206 } 1207 else { 1208 idata = (int[]) data_out; 1209 } 1210 long[] ldata = (long[]) data_in; 1211 for (int i = 0; i < size; i++) { 1212 idata[i] = (int) ldata[i]; 1213 } 1214 data_out = idata; 1215 } 1216 else { 1217 data_out = data_in; 1218 log.debug("convertToUnsignedC(): Java does not support unsigned long"); 1219 } 1220 1221 return data_out; 1222 } 1223 1224 /** 1225 * Converts an array of bytes into an array of Strings for a fixed string 1226 * dataset. 1227 * <p> 1228 * A C-string is an array of chars while an Java String is an object. When a 1229 * string dataset is read into a Java application, the data is stored in an 1230 * array of Java bytes. byteToString() is used to convert the array of bytes 1231 * into an array of Java strings so that applications can display and modify 1232 * the data content. 1233 * <p> 1234 * For example, the content of a two element C string dataset is {"ABC", 1235 * "abc"}. Java applications will read the data into a byte array of {65, 1236 * 66, 67, 97, 98, 99). byteToString(bytes, 3) returns an array of Java 1237 * String of strs[0]="ABC", and strs[1]="abc". 1238 * <p> 1239 * If memory data of strings is converted to Java Strings, stringToByte() 1240 * must be called to convert the memory data back to byte array before data 1241 * is written to file. 1242 * 1243 * @see #stringToByte(String[], int) 1244 * 1245 * @param bytes 1246 * the array of bytes to convert. 1247 * @param length 1248 * the length of string. 1249 * 1250 * @return the array of Java String. 1251 */ 1252 public static final String[] byteToString(byte[] bytes, int length) { 1253 log.trace("byteToString(): start"); 1254 1255 if (bytes == null) { 1256 log.debug("byteToString(): input is null"); 1257 log.trace("byteToString(): finish"); 1258 return null; 1259 } 1260 1261 int n = bytes.length / length; 1262 log.trace("byteToString(): n={} from length of {}", n, length); 1263 // String bigstr = new String(bytes); 1264 String[] strArray = new String[n]; 1265 String str = null; 1266 int idx = 0; 1267 for (int i = 0; i < n; i++) { 1268 str = new String(bytes, i * length, length); 1269 // bigstr.substring uses less memory space 1270 // NOTE: bigstr does not work on linux if bytes.length is very large 1271 // see bug 1091 1272 // offset = i*length; 1273 // str = bigstr.substring(offset, offset+length); 1274 1275 idx = str.indexOf('\0'); 1276 if (idx > 0) { 1277 str = str.substring(0, idx); 1278 } 1279 1280 // trim only the end 1281 int end = str.length(); 1282 while (end > 0 && str.charAt(end - 1) <= '\u0020') 1283 end--; 1284 1285 strArray[i] = (end <= 0) ? "" : str.substring(0, end); 1286 1287 // trim both start and end 1288 // strArray[i] = str.trim(); 1289 } 1290 1291 log.trace("byteToString(): finish"); 1292 return strArray; 1293 } 1294 1295 /** 1296 * Converts a string array into an array of bytes for a fixed string 1297 * dataset. 1298 * <p> 1299 * If memory data of strings is converted to Java Strings, stringToByte() 1300 * must be called to convert the memory data back to byte array before data 1301 * is written to file. 1302 * 1303 * @see #byteToString(byte[] bytes, int length) 1304 * 1305 * @param strings 1306 * the array of string. 1307 * @param length 1308 * the length of string. 1309 * 1310 * @return the array of bytes. 1311 */ 1312 public static final byte[] stringToByte(String[] strings, int length) { 1313 log.trace("stringToByte(): start"); 1314 1315 if (strings == null) { 1316 log.debug("stringToByte(): input is null"); 1317 log.trace("stringToByte(): finish"); 1318 return null; 1319 } 1320 1321 int size = strings.length; 1322 byte[] bytes = new byte[size * length]; 1323 log.trace("stringToByte(): size={} length={}", size, length); 1324 StringBuffer strBuff = new StringBuffer(length); 1325 for (int i = 0; i < size; i++) { 1326 // initialize the string with spaces 1327 strBuff.replace(0, length, " "); 1328 1329 if (strings[i] != null) { 1330 if (strings[i].length() > length) { 1331 strings[i] = strings[i].substring(0, length); 1332 } 1333 strBuff.replace(0, length, strings[i]); 1334 } 1335 1336 strBuff.setLength(length); 1337 System.arraycopy(strBuff.toString().getBytes(), 0, bytes, length * i, length); 1338 } 1339 1340 log.trace("stringToByte(): finish"); 1341 1342 return bytes; 1343 } 1344 1345 /** 1346 * Returns the array of strings that represent the dimension names. Returns 1347 * null if there is no dimension name. 1348 * <p> 1349 * Some datasets have pre-defined names for each dimension such as 1350 * "Latitude" and "Longitude". getDimNames() returns these pre-defined 1351 * names. 1352 * 1353 * @return the names of dimensions, or null if there is no dimension name. 1354 */ 1355 public final String[] getDimNames() { 1356 if (rank < 0) init(); 1357 1358 return dimNames; 1359 } 1360 1361 /** 1362 * Checks if a given datatype is a string. Sub-classes must replace this 1363 * default implementation. 1364 * 1365 * @param tid 1366 * The data type identifier. 1367 * 1368 * @return true if the datatype is a string; otherwise returns false. 1369 */ 1370 public boolean isString(int tid) { 1371 return false; 1372 } 1373 1374 /** 1375 * Returns the size in bytes of a given datatype. Sub-classes must replace 1376 * this default implementation. 1377 * 1378 * @param tid 1379 * The data type identifier. 1380 * 1381 * @return The size of the datatype 1382 */ 1383 public int getSize(int tid) { 1384 return -1; 1385 } 1386 1387 /** 1388 * Get Class of the original data buffer if converted. 1389 * 1390 * @return the Class of originalBuf 1391 */ 1392 @SuppressWarnings("rawtypes") 1393 public final Class getOriginalClass() { 1394 return originalBuf.getClass(); 1395 } 1396 1397 /** 1398 * Get flag that indicate if enum data is converted to strings. 1399 * 1400 * @return the enumConverted 1401 */ 1402 public boolean isEnumConverted() { 1403 return enumConverted; 1404 } 1405 1406 /** 1407 * Set flag that indicate if enum data is converted to strings. 1408 * 1409 * @param b 1410 * the enumConverted to set 1411 */ 1412 public void setEnumConverted(boolean b) { 1413 if (enumConverted != b) { 1414 originalBuf = convertedBuf = null; 1415 this.clearData(); 1416 } 1417 1418 enumConverted = b; 1419 } 1420}