Browse Source

vandoucli: awk

master
Maxim Likhachev 6 years ago
parent
commit
c17b782290
  1. 47
      vandroucli
  2. 32
      vandroucli.sed

47
vandroucli

@ -1,19 +1,46 @@
#!/bin/bash #!/bin/bash
#Создан: Чт 05 апр 2018 10:26:40 #Создан: Чт 05 апр 2018 10:26:40
#Изменён: Чт 05 апр 2018 10:41:39 #Изменён: Вс 22 сен 2019 08:29:53
# Получение последних записей с сайта vandrouki.ru # Получение последних записей с сайта vandrouki.ru
URL="https://vandrouki.ru" URLS="https://vandrouki.asia https://vandrouki.by https://vandrouki.ru"
curl -s -q "$URL" \ rstc="\033[00m"
yellow="\033[1;33m"
blue="\033[1;36m"
green="\033[1;32m"
for url in $URLS; {
echo -e "$green> $url $rstc\n"
curl -s -q "$url" \
| sed 's/<html.*xmlns.*$//g' \ | sed 's/<html.*xmlns.*$//g' \
| xmllint --html --xpath "//html/body/div[@id='content']/div[@id='primary']/div/*[self::div/span[@class='published'] or self::h2/a]" - \ | xmllint --html --xpath "//html/body/div[@id='content']/div[@id='primary']/div/*[self::div/span[@class='published'] or self::h2/a]" - 2>&- \
| tr '\n' '\r' \ | awk 'BEGIN { RS="" }
| sed -e 's/\r\r/####/g; s/\r/@/g; s/####/\n/g' \ {
| sed 's/href="/@/; s/" rel=[^>]*>/@/; s/published.>/@/' \ gsub(/\n+/, "@");
| awk -F'@' '{print $5 "@" $3 "@" $2}' \ gsub(/<h2 class="entry-title">/, "\n");
| sed -e :a -e 's/<[^>]*>//g;/</N;//ba' \ gsub(/(href=")|(" rel=[^>]*>)|(published.>)/, "@");
| column -t -s @ gsub(/(&#[0-9]+;)|(<[^@>]*>*)/, "");
gsub(/@\s*@*/,"@");
print
}' \
| awk -F'@' -v blue="$blue" -v yellow="$yellow" -v green="$green" -v rstc="$rstc" '
/^\s*$/ {
next
} {
if ($4 ~ /^\s*$/) {
date = $5
} else {
date = $4
}
print blue " # " date "\t" yellow toupper($3) "\n\t\t" green $2 rstc
}'
echo -e "\n--------------------------------\n"
}

32
vandroucli.sed

@ -0,0 +1,32 @@
#!/bin/bash
URLS="https://vandrouki.ru https://vandrouki.by https://vandrouki.asia"
# URLS="https://vandrouki.by"
rstc="\033[00m"
yellow="\033[1;33m"
blue="\033[1;36m"
green="\033[1;32m"
for url in $URLS; {
echo -e "$green> $url $rstc\n"
curl -s -q "$url" \
| sed 's/<html.*xmlns.*$//g' \
| xmllint --html --xpath "//html/body/div[@id='content']/div[@id='primary']/div/*[self::div/span[@class='published'] or self::h2/a]" - 2>&- \
| tr '\n' '\r' \
| sed -e 's/\r\r/####/g; s/\r/@/g; s/####/\n/g; s/<h2 class="entry-title">/\n/g' \
| sed 's/href="/@/; s/" rel=[^>]*>/@/; s/published.>/@/' \
| sed -E 's|^</div>@||; s/&#[0-9]+;//g' \
| sed '/^\s*$/d' \
| awk -F'@' -v blue="$blue" -v yellow="$yellow" -v green="$green" -v rstc="$rstc" \
'{
print blue " # " $6 "@" yellow toupper($3) "\n@ " green $2 rstc
}' \
| sed 's/@@@*/@@/g' \
| sed -e :a -e 's/<[^>]*>//g;/</N;//ba' \
| column -t -s @
echo
}
Loading…
Cancel
Save