AmosCloud

Library

Have a Question?

If you have any question you can ask below or enter what you are looking for!

2020年5月26日_day09_Hadoop


  // 切片大小的计算

  protected long computeSplitSize(
  long blockSize, //    HDFS块大小  128M 字节byte  134217728
  long minSize, //  最小分片大小  1字节
  long maxSize  // 最大分片大小  Long.MAX_VALUE
  ) {
    return Math.max(minSize, Math.min(maxSize, blockSize));
  }

  //怎么切片的
  // generate splits

    //创建一个list用来存储切片结果
    List<InputSplit> splits = new ArrayList<InputSplit>();
    //从job中获取需要处理的原文件信息
    List<FileStatus> files = listStatus(job);
    for (FileStatus file: files) {
        //获取文件路径
      Path path = file.getPath();
      // 获取文件大小
      long length = file.getLen();

      if (length != 0) {
          //创建块所在的位置对象的数组
        BlockLocation[] blkLocations;

        //判断如果file是LocatedFileStatus,
        //我们就强转成LocatedFileStatus 并且取出这个文件的块信息
        if (file instanceof LocatedFileStatus) {
          blkLocations = ((LocatedFileStatus) file).getBlockLocations();
        } else {
          FileSystem fs = path.getFileSystem(job.getConfiguration());
          blkLocations = fs.getFileBlockLocations(file, 0, length);
        }

        if (isSplitable(job, path)) {
            //获取当前文件的块大小 128M
          long blockSize = file.getBlockSize();
           // 计算切片大小 128M
          long splitSize = computeSplitSize(blockSize, minSize, maxSize);
            // 剩余大小赋值为文件大小
          long bytesRemaining = length;

        //                  剩余大小       切片大小    1.1
        //                  300        /128       >
        //                  172        /128       >

          while (((double) bytesRemaining)/splitSize > SPLIT_SLOP) {
              //获取块索引信息
            int blkIndex = getBlockIndex(blkLocations, length-bytesRemaining);
            //创建一个分片信息加入到分片结果集中
            splits.add(makeSplit(path, length-bytesRemaining, splitSize,
                        blkLocations[blkIndex].getHosts(),
                        blkLocations[blkIndex].getCachedHosts()));
            //剩余大小-=分片大小
            bytesRemaining -= splitSize;
          }
        //                  44  
          if (bytesRemaining != 0) {
            int blkIndex = getBlockIndex(blkLocations, length-bytesRemaining);
            splits.add(makeSplit(path, length-bytesRemaining, bytesRemaining,
                       blkLocations[blkIndex].getHosts(),
                       blkLocations[blkIndex].getCachedHosts()));
          }
        } else { // not splitable
          splits.add(makeSplit(path, 0, length, blkLocations[0].getHosts(),
                      blkLocations[0].getCachedHosts()));
        }
      } else { 
        //Create empty hosts array for zero length files
        // 对于空文件来说 创建一个空的切片信息
        splits.add(makeSplit(path, 0, length, new String[0]));
      }
    }

You must be logged in to post a comment.