Saturday, March 2, 2019

Hadoop HDFS - Recovering From Accidental Data Loss

--Trash



sudo vi /etc/hadoop/conf/core-site.xml



<property>

 <name>fs.trash.interval</name>

 <value>60</value>

</property>



<property>

 <name>fs.trash.checkpoint.interval</name>

 <value>45</value>

</property>



sudo service hadoop-hdfs-namenode restart



--Skip Trash



hadoop fs -rm -skipTrash delete-file2



--Snapshots

hadoop fs -mkdir important-files



hadoop fs -copyFromLocal file1 file2 important-files



hdfs dfs -createSnapshot /user/ubuntu/important-files



--Require admin rights

hdfs dfsadmin -allowSnapshot /user/ubuntu/important-files



--Snapshot creation - demonstrate file add & deletes

hdfs dfs -createSnapshot /user/ubuntu/important-files snapshot1



hadoop fs -ls /user/ubuntu/important-files/.snapshot



hadoop fs -ls /user/ubuntu/important-files/.snapshot/snapshot1



hadoop fs -rm /user/ubuntu/important-files/.snapshot/snapshot1/file1



hadoop fs -rm /user/ubuntu/important-files/file2



hadoop fs -copyFromLocal file3 /user/ubuntu/important-files



hdfs dfs -createSnapshot /user/ubuntu/important-files snapshot2



hadoop fs -ls /user/ubuntu/important-files/.snapshot/snapshot2



hadoop fs -ls /user/ubuntu/important-files



hadoop fs -ls /user/ubuntu/important-files/.snapshot/snapshot1



hadoop fs -cat /user/ubuntu/important-files/.snapshot/snapshot1/file2



hdfs snapshotDiff /user/ubuntu/important-files snapshot2 snapshot1



--Demonstrate file modifications

cat append-file



hadoop fs -appendToFile append-file /user/ubuntu/important-files/file1



hadoop fs -cat /user/ubuntu/important-files/file1



hdfs dfs -createSnapshot /user/ubuntu/important-files snapshot3



hadoop fs -ls /user/ubuntu/important-files/.snapshot/snapshot3



hdfs snapshotDiff /user/ubuntu/important-files snapshot3 snapshot2



hadoop fs -cat /user/ubuntu/important-files/.snapshot/snapshot2/file1



hadoop fs -cat /user/ubuntu/important-files/.snapshot/snapshot3/file1



hadoop fs -cat /user/ubuntu/important-files/file1



--Delete snapshots

hdfs dfs -deleteSnapshot /user/ubuntu/important-files snapshot1



hadoop fs -ls /user/ubuntu/important-files/.snapshot



--Require admin rights

hdfs dfsadmin -disallowSnapshot /user/ubuntu/important-files