//org.apache.hadoop.hdfs.server.datanode.DataNode#makeInstance/**
* 在确定有一个data目录下,创建datanode实例
* @param dataDirs List of directories, where the new DataNode instance should
* keep its files.
* @param conf Configuration instance to use.
* @param resources Secure resources needed to run under Kerberos
* @return DataNode instance for given list of data dirs and conf, or null if
* no directory from this directory list can be created.
* @throws IOException
*/static DataNode makeInstance(Collection<StorageLocation> dataDirs,
Configuration conf, SecureResources resources)throws IOException {//获取本地文件系统
LocalFileSystem localFS = FileSystem.getLocal(conf);//实例化文件或文件夹的权限管理类
FsPermission permission =newFsPermission(
conf.get(DFS_DATANODE_DATA_DIR_PERMISSION_KEY,
DFS_DATANODE_DATA_DIR_PERMISSION_DEFAULT));
DataNodeDiskChecker dataNodeDiskChecker =newDataNodeDiskChecker(permission);//检查配置文件中给定目录下的文件夹或文件,具体是检查权限,并且要保证文件可读
List<StorageLocation> locations =checkStorageLocations(dataDirs, localFS, dataNodeDiskChecker);
DefaultMetricsSystem.initialize("DataNode");//如果目录检查不过关,报错assert locations.size()>0:"number of data directories should be > 0";returnnewDataNode(conf, locations, resources);}
6.1.1 DataNode 构造方法
//org.apache.hadoop.hdfs.server.datanode.DataNode#DataNodestartDataNode(conf, dataDirs, resources);//大概在428行//org.apache.hadoop.hdfs.server.datanode.DataNode#startDataNodevoidstartDataNode(Configuration conf,
List<StorageLocation> dataDirs,
SecureResources resources
)throws IOException {//.....//Data storage information file.
storage =newDataStorage();// global DN settingsregisterMXBean();initDataXceiver(conf);startInfoServer(conf);//java 虚拟机监控
pauseMonitor =newJvmPauseMonitor(conf);
pauseMonitor.start();// BlockPoolTokenSecretManager is required to create ipc server.this.blockPoolTokenSecretManager =newBlockPoolTokenSecretManager();//....// 初始化IpcServer(RPC通信),DataNode#runDatanodeDaemon()中启动initIpcServer(conf);//...// 按照namespace(nameservice)、namenode的二级结构进行初始化
blockPoolManager =newBlockPoolManager(this);//重点
blockPoolManager.refreshNamenodes(conf);//....}
//org.apache.hadoop.hdfs.server.datanode.BlockPoolManager#createBPOSprotected BPOfferService createBPOS(List<InetSocketAddress> nnAddrs){returnnewBPOfferService(nnAddrs, dn);}//org.apache.hadoop.hdfs.server.datanode.BPOfferService#BPOfferServiceBPOfferService(List<InetSocketAddress> nnAddrs, DataNode dn){
Preconditions.checkArgument(!nnAddrs.isEmpty(),"Must pass at least one NN.");this.dn = dn;for(InetSocketAddress addr : nnAddrs){//每一个bpServices都会绑定一个BPServiceActor//BPOfferService通过bpServices维护同一个namespace下各namenode对应的BPServiceActor。this.bpServices.add(newBPServiceActor(addr,this));//关于BPServiceActor描述:A thread per active or standby namenode to perform://Pre-registration handshake with namenode 握手//Registration with namenode//Send periodic heartbeats to the namenode//Handle commands received from the namenode}}
6.1.1.3 startAll
//org.apache.hadoop.hdfs.server.datanode.BlockPoolManager#startAllfor(BPOfferService bpos : offerServices){
bpos.start();}//org.apache.hadoop.hdfs.server.datanode.BPServiceActor#startvoidstart(){if((bpThread != null)&&(bpThread.isAlive())){//Thread is started alreadyreturn;}
bpThread =newThread(this,formatThreadName());
bpThread.setDaemon(true);// needed for JUnit testing
bpThread.start();}//直接看run()
6.1.2.1 BPServiceActor
//org.apache.hadoop.hdfs.server.datanode.BPServiceActor#run@Overridepublicvoidrun(){
LOG.info(this+" starting to offer service");try{while(true){// init stufftry{//与namenode 握手connectToNNAndHandshake();break;}catch(IOException ioe){//大部分握手失败的情况都需要重试,除非抛出了非IOException异常或datanode关闭
runningState = RunningState.INIT_FAILED;//......}}}while(shouldRun()){try{// BPServiceActor提供的服务offerService();}catch(Exception ex){//不管抛出任何异常,都持续提供服务(包括心跳、数据块汇报等),直到datanode关闭
LOG.error("Exception in BPOfferService for "+this, ex);sleepAndLogInterrupts(5000,"offering service");}}//.......}}//主要看connectToNNAndHandshake
6.1.2.1 connectToNNAndHandshake
//org.apache.hadoop.hdfs.server.datanode.BPServiceActor#connectToNNAndHandshakeprivatevoidconnectToNNAndHandshake()throws IOException {// get NN proxy
bpNamenode = dn.connectToNN(nnAddr);// First phase of the handshake with NN - get the namespace info.
NamespaceInfo nsInfo =retrieveNamespaceInfo();// 验证并初始化该datanode上的BlockPool
bpos.verifyAndSetNamespaceInfo(nsInfo);// Second phase of the handshake with the NN.register(nsInfo);}//主要看verifyAndSetNamespaceInfo
/**
* 一个Block Pools成功连接到Namenode,该block pool需要在本地存储中初始化,
* 并且检查集群ID的一致性
* 如果这是第一个注册的block pool,那么它也需要为datanode存储区域做初始化
*
* @param bpos Block pool offer service
* @throws IOException if the NN is inconsistent with the local storage.
*/voidinitBlockPool(BPOfferService bpos)throws IOException {//获取getNamespace信息
NamespaceInfo nsInfo = bpos.getNamespaceInfo();//...setClusterId(nsInfo.clusterID, nsInfo.getBlockPoolID());// Register the new block pool with the BP manager.
blockPoolManager.addBlockPool(bpos);// In the case that this is the first block pool to connect, initialize// the dataset, block scanners, etc.initStorage(nsInfo);//在初始化块池之前排除故障磁盘以避免启动失败。checkDiskError();//将blockpool添加到FsDatasetIpml,并继续初始化存储结构
data.addBlockPool(nsInfo.getBlockPoolID(), conf);
blockScanner.enableBlockPoolId(bpos.getBlockPoolId());initDirectoryScanner(conf);}
5.1.2.5 initStorage
//org.apache.hadoop.hdfs.server.datanode.DataNode#initStorageprivatevoidinitStorage(final NamespaceInfo nsInfo)throws IOException {final FsDatasetSpi.Factory<?extendsFsDatasetSpi<?>> factory
= FsDatasetSpi.Factory.getFactory(conf);if(!factory.isSimulated()){...// 构造参数// 初始化DataStorage(每个datanode分别只持有一个)。可能会触发DataStorage级别的状态装换,因此,要在DataNode上加锁synchronized(this){//read storage info, lock data dirs and transition fs state if necessary
storage.recoverTransitionRead(this, bpid, nsInfo, dataDirs, startOpt);}final StorageInfo bpStorage = storage.getBPStorage(bpid);
LOG.info("Setting up storage: nsid="+ bpStorage.getNamespaceID()+";bpid="+ bpid +";lv="+ storage.getLayoutVersion()+";nsInfo="+ nsInfo +";dnuuid="+ storage.getDatanodeUuid());}...// 检查// 初始化FsDatasetImpl(同上,每个datanode分别只持有一个)synchronized(this){if(data == null){
data = factory.newInstance(this, storage, conf);}}}
//org.apache.hadoop.hdfs.server.datanode.DataXceiverServer#DataXceiverServer//关于DataXceiverServer的描述:用于接收/发送数据块的服务器。或侦听来自此客户端的请求publicvoidrun(){
Peer peer = null;//只要是datanode在运行while(datanode.shouldRun &&!datanode.shutdownForUpgrade){try{//socket阻塞
peer = peerServer.accept();// 确保没有超过xceiver计数int curXceiverCount = datanode.getXceiverCount();//启动线程DataXceiver去交互newDaemon(datanode.threadGroup,
DataXceiver.create(peer, datanode,this)).start();}//......}// Close the server to stop reception of more requests.try{
peerServer.close();
closed =true;//...closeAllPeers();}
5.2.2 DataXceiver
//org.apache.hadoop.hdfs.server.datanode.DataXceiver#run/**
* Read/write data from/to the DataXceiverServer.
*/@Overridepublicvoidrun(){int opsProcessed =0;
Op op = null;try{
dataXceiverServer.addPeer(peer, Thread.currentThread(),this);
peer.setWriteTimeout(datanode.getDnConf().socketWriteTimeout);
InputStream input = socketIn;try{
IOStreamPair saslStreams = datanode.saslServer.receive(peer, socketOut,
socketIn, datanode.getXferAddress().getPort(),
datanode.getDatanodeId());
input =newBufferedInputStream(saslStreams.in,
smallBufferSize);
socketOut = saslStreams.out;}catch(InvalidMagicNumberException imne){//....}super.initialize(newDataInputStream(input));// We process requests in a loop, and stay around for a short timeout.// This optimistic behaviour allows the other end to reuse connections.// Setting keepalive timeout to 0 disable this behavior.do{updateCurrentThreadName("Waiting for operation #"+(opsProcessed +1));try{if(opsProcessed !=0){assert dnConf.socketKeepaliveTimeout >0;
peer.setReadTimeout(dnConf.socketKeepaliveTimeout);}else{
peer.setReadTimeout(dnConf.socketTimeout);}
op =readOp();}catch(InterruptedIOException ignored){// Time out while we wait for client rpc}// restore normal timeoutif(opsProcessed !=0){
peer.setReadTimeout(dnConf.socketTimeout);}
opStartTime =monotonicNow();processOp(op);++opsProcessed;}while((peer != null)&&(!peer.isClosed()&& dnConf.socketKeepaliveTimeout >0));}catch(Throwable t){//....}finally{//...}