labunix's blog

labunixのラボUnix

OSS-DB Goldの例題をダウンロードして一ファイルにまとめるワンライナー

■OSS-DB Goldの例題をダウンロードして一ファイルにまとめるワンライナー

$ test -d OSS-DB_GOLD || mkdir OSS-DB_GOLD && cd OSS-DB_GOLD; \
  BASE="http://www.oss-db.jp/measures/"; \
  for target in \
      sample_gold_management.shtml \
      sample_gold_monitoring.shtml \
      sample_gold_performance.shtml \
    ;do \
    wget -O - "${BASE}${target}" | \
       grep "<em>Q." | lv -s | grep -v "★★" | awk -F\" '{print $2}' | \
       for list in `xargs`;do \
         wget "${BASE}/${list}"; \
    done; \
  done; \
  commonline=`ls *.shtml | wc -l`; \
  for list in *.shtml;do \
    w3m -dump "$list" > $(echo "$list" | sed s/"shtml"/"txt"/); \
  done;unset list; \
  for list in *.txt;do \
    totalline=`wc -l "$list" | awk '{print $1}'`; \
    tail -n $(($totalline - 28)) "$list" | \
      head -n $(($totalline - 114 - 28)) > $(echo "$list" | \
      sed s/"txt"/"log"/); \
  done; unset list; \
  for list in *.log;do \
    for n in `seq 1 100`;do echo -n "-";done; echo ; \
    grep -v 'Yahoo!ブックマークに登録' "$list" | \
      grep -v '• English' ; \
  done > ../ossdb_gold.log

■「ossdb_gold.log」がそれです。

$ wc -l ../ossdb_gold.log
1161 ../ossdb_gold.log

■足りないなと思った場合でもファイルが残っているので安心ですね。
 不要なら削除を。

$ ls OSS-DB_GOLD/ | column -c 80
gold_sample_111115_01.log	gold_sample_130807_01.log
gold_sample_111115_01.shtml	gold_sample_130807_01.shtml
gold_sample_111115_01.txt	gold_sample_130807_01.txt
gold_sample_111130_01.log	gold_sample_130807_02.log
gold_sample_111130_01.shtml	gold_sample_130807_02.shtml
gold_sample_111130_01.txt	gold_sample_130807_02.txt
gold_sample_111227_01.log	gold_sample_130910_01.log
gold_sample_111227_01.shtml	gold_sample_130910_01.shtml
gold_sample_111227_01.txt	gold_sample_130910_01.txt
gold_sample_120210_01.log	gold_sample_140307_01.log
gold_sample_120210_01.shtml	gold_sample_140307_01.shtml
gold_sample_120210_01.txt	gold_sample_140307_01.txt
gold_sample_120210_02.log	gold_sample_140307_02.log
gold_sample_120210_02.shtml	gold_sample_140307_02.shtml
gold_sample_120210_02.txt	gold_sample_140307_02.txt
gold_sample_120314_01.log	gold_sample_140417_01.log
gold_sample_120314_01.shtml	gold_sample_140417_01.shtml
gold_sample_120314_01.txt	gold_sample_140417_01.txt
gold_sample_130401_01.log	gold_sample_140417_02.log
gold_sample_130401_01.shtml	gold_sample_140417_02.shtml
gold_sample_130401_01.txt	gold_sample_140417_02.txt
gold_sample_130402_01.log	gold_sample_140530_01.log
gold_sample_130402_01.shtml	gold_sample_140530_01.shtml
gold_sample_130402_01.txt	gold_sample_140530_01.txt
gold_sample_130510_02.log	gold_sample_140702_01.log
gold_sample_130510_02.shtml	gold_sample_140702_01.shtml
gold_sample_130510_02.txt	gold_sample_140702_01.txt
gold_sample_130619_01.log	gold_sample_140702_02.log
gold_sample_130619_01.shtml	gold_sample_140702_02.shtml
gold_sample_130619_01.txt	gold_sample_140702_02.txt
gold_sample_130704_01.log	gold_sample_140812_03.log
gold_sample_130704_01.shtml	gold_sample_140812_03.shtml
gold_sample_130704_01.txt	gold_sample_140812_03.txt
gold_sample_130704_02.log	gold_sample_140812_04.log
gold_sample_130704_02.shtml	gold_sample_140812_04.shtml
gold_sample_130704_02.txt	gold_sample_140812_04.txt