2 changed files with 69 additions and 10 deletions
@ -1,19 +1,46 @@ |
|||||||
#!/bin/bash |
#!/bin/bash |
||||||
|
|
||||||
#Создан: Чт 05 апр 2018 10:26:40 |
#Создан: Чт 05 апр 2018 10:26:40 |
||||||
#Изменён: Чт 05 апр 2018 10:41:39 |
#Изменён: Вс 22 сен 2019 08:29:53 |
||||||
|
|
||||||
# Получение последних записей с сайта vandrouki.ru |
# Получение последних записей с сайта vandrouki.ru |
||||||
|
|
||||||
URL="https://vandrouki.ru" |
URLS="https://vandrouki.asia https://vandrouki.by https://vandrouki.ru" |
||||||
|
|
||||||
curl -s -q "$URL" \ |
rstc="\033[00m" |
||||||
|
yellow="\033[1;33m" |
||||||
|
blue="\033[1;36m" |
||||||
|
green="\033[1;32m" |
||||||
|
|
||||||
|
for url in $URLS; { |
||||||
|
echo -e "$green> $url $rstc\n" |
||||||
|
|
||||||
|
curl -s -q "$url" \ |
||||||
| sed 's/<html.*xmlns.*$//g' \ |
| sed 's/<html.*xmlns.*$//g' \ |
||||||
| xmllint --html --xpath "//html/body/div[@id='content']/div[@id='primary']/div/*[self::div/span[@class='published'] or self::h2/a]" - \ |
| xmllint --html --xpath "//html/body/div[@id='content']/div[@id='primary']/div/*[self::div/span[@class='published'] or self::h2/a]" - 2>&- \ |
||||||
| tr '\n' '\r' \ |
| awk 'BEGIN { RS="" } |
||||||
| sed -e 's/\r\r/####/g; s/\r/@/g; s/####/\n/g' \ |
{ |
||||||
| sed 's/href="/@/; s/" rel=[^>]*>/@/; s/published.>/@/' \ |
gsub(/\n+/, "@"); |
||||||
| awk -F'@' '{print $5 "@" $3 "@" $2}' \ |
gsub(/<h2 class="entry-title">/, "\n"); |
||||||
| sed -e :a -e 's/<[^>]*>//g;/</N;//ba' \ |
gsub(/(href=")|(" rel=[^>]*>)|(published.>)/, "@"); |
||||||
| column -t -s @ |
gsub(/(&#[0-9]+;)|(<[^@>]*>*)/, ""); |
||||||
|
gsub(/@\s*@*/,"@"); |
||||||
|
|
||||||
|
print |
||||||
|
}' \ |
||||||
|
| awk -F'@' -v blue="$blue" -v yellow="$yellow" -v green="$green" -v rstc="$rstc" ' |
||||||
|
/^\s*$/ { |
||||||
|
next |
||||||
|
} { |
||||||
|
if ($4 ~ /^\s*$/) { |
||||||
|
date = $5 |
||||||
|
} else { |
||||||
|
date = $4 |
||||||
|
} |
||||||
|
|
||||||
|
print blue " # " date "\t" yellow toupper($3) "\n\t\t" green $2 rstc |
||||||
|
}' |
||||||
|
|
||||||
|
echo -e "\n--------------------------------\n" |
||||||
|
} |
||||||
|
|
||||||
|
@ -0,0 +1,32 @@ |
|||||||
|
#!/bin/bash |
||||||
|
|
||||||
|
URLS="https://vandrouki.ru https://vandrouki.by https://vandrouki.asia" |
||||||
|
# URLS="https://vandrouki.by" |
||||||
|
|
||||||
|
rstc="\033[00m" |
||||||
|
yellow="\033[1;33m" |
||||||
|
blue="\033[1;36m" |
||||||
|
green="\033[1;32m" |
||||||
|
|
||||||
|
for url in $URLS; { |
||||||
|
echo -e "$green> $url $rstc\n" |
||||||
|
|
||||||
|
curl -s -q "$url" \ |
||||||
|
| sed 's/<html.*xmlns.*$//g' \ |
||||||
|
| xmllint --html --xpath "//html/body/div[@id='content']/div[@id='primary']/div/*[self::div/span[@class='published'] or self::h2/a]" - 2>&- \ |
||||||
|
| tr '\n' '\r' \ |
||||||
|
| sed -e 's/\r\r/####/g; s/\r/@/g; s/####/\n/g; s/<h2 class="entry-title">/\n/g' \ |
||||||
|
| sed 's/href="/@/; s/" rel=[^>]*>/@/; s/published.>/@/' \ |
||||||
|
| sed -E 's|^</div>@||; s/&#[0-9]+;//g' \ |
||||||
|
| sed '/^\s*$/d' \ |
||||||
|
| awk -F'@' -v blue="$blue" -v yellow="$yellow" -v green="$green" -v rstc="$rstc" \ |
||||||
|
'{ |
||||||
|
print blue " # " $6 "@" yellow toupper($3) "\n@ " green $2 rstc |
||||||
|
}' \ |
||||||
|
| sed 's/@@@*/@@/g' \ |
||||||
|
| sed -e :a -e 's/<[^>]*>//g;/</N;//ba' \ |
||||||
|
| column -t -s @ |
||||||
|
|
||||||
|
echo |
||||||
|
} |
||||||
|
|
Loading…
Reference in new issue