第2回 Hadoop 輪読会
Upcoming SlideShare
Loading in...5
×
 

第2回 Hadoop 輪読会

on

  • 2,506 views

第2回 Hadoop 輪読会の発表資料

第2回 Hadoop 輪読会の発表資料

Statistics

Views

Total Views
2,506
Views on SlideShare
2,029
Embed Views
477

Actions

Likes
3
Downloads
55
Comments
0

5 Embeds 477

http://d.hatena.ne.jp 332
http://brfrn169.hatenablog.com 129
http://tchiba12.appspot.com 7
http://webcache.googleusercontent.com 5
http://www.slideshare.net 4

Accessibility

Categories

Upload Details

Uploaded via as Adobe PDF

Usage Rights

© All Rights Reserved

Report content

Flagged as inappropriate Flag as inappropriate
Flag as inappropriate

Select your reason for flagging this presentation as inappropriate.

Cancel
  • Full Name Full Name Comment goes here.
    Are you sure you want to
    Your message goes here
    Processing…
Post Comment
Edit your comment

第2回 Hadoop 輪読会 第2回 Hadoop 輪読会 Presentation Transcript

  • Hadoop 3 Hadoop
  • Hadoop • - - - • HDFS(Hadoop Distributed Filesystem)
  • HDFS • - ‣ MB, GB, TB - ‣ - ‣ ‣ View slide
  • HDFS • - ‣ - ‣ - ‣ ‣ View slide
  • HDFS • - 64MB - ‣ ‣ ‣
  • HDFS • - / - ( ) - ( )
  • HDFS • - ( ) - ( )
  • HDFS • - - -
  • HDFS • - ( , ) -
  • HDFS • - NameNode SecondaryNameNode
  • HDFS • - open() append() write() NameNode SecondaryNameNode
  • HDFS • - open() append() write() NameNode SecondaryNameNode
  • HDFS • - open() append() write() NameNode SecondaryNameNode
  • HDFS • - NameNode SecondaryNameNode
  • HDFS • - NameNode SecondaryNameNode
  • HDFS • - NameNode SecondaryNameNode
  • HDFS • - NameNode SecondaryNameNode
  • • hadoop fs -copyFromLocal <localsrc> ... <dst> • hadoop fs -copyToLocal <src> <localdst> • hadoop fs -ls <path> • hadoop fs -mkdir <path> • hadoop fs -help
  • Hadoop •hadoop fs -ls file:/// •hadoop fs -ls hdfs:/// •hadoop fs -ls hftp:/// URI java local file org.apache.hadoop.fs.localFileSystem HDFS hdfs org.apache.hadoop.hdfs.DistributesFileSystem HFTP hftp org.apache.hadoop.hdfs.HftpFileSystem HSFTP hsftp org.apache.hadoop.hdfs.HsftpFileSystem HAR har org.apache.hadoop.fs.HarFileSystem KFS kfs org.apache.hadoop.fs.kfs.KosmosFileSystem FTP ftp org.apache.hadoop.fs.ftp.FTPFileSystem S3 s3n org.apache.hadoop.fs.s3native.NativeS3FileSystem ( ) S3 s3 org.apache.hadoop.fs.S3FileSystem ( )
  • • Thrift • C - libhdfs • FUSE(FileSystem in Userspace) • WebDAV • - HTTP, FTP( )
  • Java • Hadoop URL public class URLCat { static { URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory()); } public static void main(String[] args) throws Exception { InputStream in = null; try { in = new URL(args[0]).openStream(); IOUtils.copyBytes(in, System.out, 4096, false); } finally { IOUtils.closeStream(in); } } }
  • Java • FileSystem API public class FileSystemCat { public static void main(String[] args) throws Exception { String uri = args[0]; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); InputStream in = null; try { in = fs.open(new Path(uri)); IOUtils.copyBytes(in, System.out, 4096, false); } finally { IOUtils.closeStream(in); } }
  • Java • FSDataInputStream public class FSDataInputStream extends DataInputStream implements Seekable, PositionedReadable { // } public interface Seekable { void seek(long pos) throws IOException; long getPos() throws IOException; boolean seekToNewSource(long targetPos) throws IOException; }
  • Java • FSDataInputStream public class FileSystemDoubleCat { public static void main(String[] args) throws Exception { String uri = args[0]; FileSystem fs = FileSystem.get(URI.create(uri), new Configuration()); FSDataInputStream in = null; try { in = fs.open(new Path(uri)); IOUtils.copyBytes(in, System.out, 4096, false); in.seek(0); IOUtils.copyBytes(in, System.out, 4096, false); } finally { IOUtils.closeStream(in); } } }
  • Java • FSDataInputStream public class FSDataInputStream extends DataInputStream implements Seekable, PositionedReadable { // } public interface PositionedReadable { int read(long position, byte buffer[], int offset, int length) throws IOException; void readFully(long position, byte buffer[], int offset, int length) throws IOException; void readFully(long position, byte buffer[]) throws IOException; }
  • Java • - public FSDataOutputStream create(Path f) throws IOException - public FSDataOutputStream append(Path f) throws IOException
  • Java • FSDateOutputStream - FileSystem create(), append() - public class FSDataOutputStream extends DataOutputStream implements Syncable { public long getPos() throws IOException { // } // }
  • Java • - public boolean mkdirs(Path f) throws IOException
  • Java • FileStatus status = fs.getFileStatus(new Path("hdfs://localhost/hogehoge")); status.isDir(); // status.getLen(); // status.getModificationTime(); // status.getReplication(); // status.getBlockSize(); // ( 64MB) status.getOwner(); // status.getGroup(); // status.getPermission().toString(); //
  • Java • - public FileStatus[] listStatus(Path f) throws IOException; - public FileStatus[] listStatus(Path f, PathFilter filter) throws IOException; - public FileStatus[] listStatus(Path[] files) throws IOException; - public FileStatus[] listStatus(Path[] files, PathFilter filter) throws IOException;
  • Java • public class ListStatus { public static void main(String[] args) throws Exception { String uri = args[0]; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); Path[] paths = new Path[args.length]; for (int i = 0; i < paths.length; i++) { paths[i] = new Path(args[i]); } FileStatus[] status = fs.listStatus(paths); for (FileStatus stat : status) { System.out.println(stat.getPath().toUri().getPath()); } } }
  • Java • - public FileStatus[] globStatus(Path pathPattern) throws IOException - public FileStatus[] globStatus(Path pathPattern, PathFilter filter) throws IOException
  • Java • [ab] {a,b} [^ab] {a,b} {a,b} (a b ) [a-b] a b {a,b} (a b ) a b [^a-b] {a,b} a b ¥c c c
  • Java • public interface PathFilter { boolean accept(Path path); }
  • Java • public class RegexExcludePathFilter implements PathFilter { private final String regex; public RegexExcludePathFilter(String regex) { this.regex = regex; } @Override public boolean accept(Path path) { return !path.toString().matches(regex); } } fs.globStatus(new Path("/2007/*/*"), new RegexExcludePathFilter("^.*/2007/12/31$"));
  • Java • - public boolean delete(Path f, boolean recursive) throws IOException;
  • • HDFS DistributedFileSystem NameNode FSDataInputStream DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • • open(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode FSDataInputStream DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • • “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode FSDataInputStream DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • • “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • • “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • • “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • • “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • • “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • • “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • • “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • • “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • • “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • • “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 close() block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • • “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • • “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • • “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • • “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • • “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • • “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • • “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • • “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • • “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • • “/aaa.txt” open(new Path(“/aaa.txt”)) ( ) HDFS DistributedFileSystem NameNode block1 : DataNode1, DataNode3 read() block2 : DataNode3, DataNode4 aaa.txt : block1, block2. block3, block4 block1 : DataNode1, DataNode3 block2 : DataNode3, DataNode4 FSDataInputStream block3 : DataNode2, DataNode3 block4 : DataNode1, DataNode2 DateNode1 DateNode2 DateNode3 DateNode4 block1 block3 block1 block2 block4 block4 block2 block3
  • • - - -
  • • - - 9.1.1 - (/d1/r1/n1, /d1/r1/n1) = 0 d1 d2 (/d1/r1/n1, /d1/r1/n2) = 2 (/d1/r1/n1, /d1/r2/n3) = 4 r1 r2 r3 (/d1/r1/n1, /d2/r3/n4) = 6 n1 n2 n3 n4
  • • HDFS DistributedFileSystem NameNode FSDataOutputStream DateNode1 DateNode2 DateNode3
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode FSDataOutputStream DateNode1 DateNode2 DateNode3
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode FSDataOutputStream DateNode1 DateNode2 DateNode3
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode FSDataOutputStream DateNode1 DateNode2 DateNode3
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DateNode1 DateNode2 DateNode3
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream ack DateNode1 DateNode2 DateNode3
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream ack DateNode1 DateNode2 DateNode3
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream ack DateNode1 DateNode2 DateNode3
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3 DataStreamer ack DateNode1 DateNode2 DateNode3
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3 DataStreamer ack DateNode1 DateNode2 DateNode3
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3 DataStreamer ack DateNode1 DateNode2 DateNode3 block1
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3 DataStreamer ack DateNode1 DateNode2 DateNode3 block1 block1
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3 DataStreamer ack DateNode1 DateNode2 DateNode3 block1 block1 block1
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3 DataStreamer ack DateNode1 DateNode2 DateNode3 block1 block1 block1
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3 DataStreamer ack DateNode1 DateNode2 DateNode3 block1 block1 block1
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3 DataStreamer ack DateNode1 DateNode2 DateNode3 block1 block1 block1
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3 DataStreamer ack DateNode1 DateNode2 DateNode3 block1 block1 block1
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3 DataStreamer ack DateNode1 DateNode2 DateNode3 block1 block1 block1
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() close() FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3 DataStreamer ack DateNode1 DateNode2 DateNode3 block1 block1 block1
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() close() FSDataOutputStream block1 : DataNode1, DataNode2, DataNode3 DataStreamer ack DateNode1 DateNode2 DateNode3 block1 block1 block1
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block1
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block1 block1
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block1 block1
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block1 block1
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block2 block1 block1
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block2 block1 block1
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block2 block1 block1
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block2 block1 block1 block2
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block2 block1 block1 block2
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block2 block1 block1 block2
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block2 block1 block1 block2
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block2 block1 block1 block2
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block2 block1 block2
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block2 block1 block2
  • • create(new Path(“/aaa.txt”)) HDFS DistributedFileSystem NameNode write() FSDataOutputStream DataStreamer ack DateNode1 DateNode2 DateNode3 block2 block1 block2 block2
  • • - dfs.replication.min( 1) - (dfs.replication 3) -
  • • 1. ( ) 2. 3. 4. ( )
  • • - fs.create(new Path("p")); - OutputStream out = fs.create(new Path("p")); out.write("content".getBytes("UTF-8")); out.flush();
  • • - FSDataOutputStream sync() - sync() close() FSDataOutputStream out = fs.create(new Path("p")); out.write("content".getBytes("UTF-8")); out.flush(); out.sync();
  • • - ‣ sync() ‣ sync() ‣ sync()
  • distcp • 2 HDFS - hadoop distcp hdfs://namenode1/foo hdfs://namenode2/bar - hadoop distcp -overwrite hdfs://namenode1/foo hdfs://namenode2/bar/foo - hadoop distcp -update hdfs://namenode1/foo hdfs://namenode2/bar/foo • MapReduce - 256MB (1GB 4 ) - map ( ) - map 1 (tasktraker) 20map
  • Hadoop • • HAR • hadoop archive -archiveName files.har /my/files /my
  • Hadoop • - ( ) - - HAR MapReduce ( 7.2.1.4 CombineFileInputFormat )
  • • HDFS - - - - • distcp • HAR
  • • •