2 changed files with 69 additions and 10 deletions
@ -1,19 +1,46 @@
@@ -1,19 +1,46 @@
|
||||
#!/bin/bash |
||||
|
||||
#Создан: Чт 05 апр 2018 10:26:40 |
||||
#Изменён: Чт 05 апр 2018 10:41:39 |
||||
#Изменён: Вс 22 сен 2019 08:29:53 |
||||
|
||||
# Получение последних записей с сайта vandrouki.ru |
||||
|
||||
URL="https://vandrouki.ru" |
||||
URLS="https://vandrouki.asia https://vandrouki.by https://vandrouki.ru" |
||||
|
||||
curl -s -q "$URL" \ |
||||
rstc="\033[00m" |
||||
yellow="\033[1;33m" |
||||
blue="\033[1;36m" |
||||
green="\033[1;32m" |
||||
|
||||
for url in $URLS; { |
||||
echo -e "$green> $url $rstc\n" |
||||
|
||||
curl -s -q "$url" \ |
||||
| sed 's/<html.*xmlns.*$//g' \ |
||||
| xmllint --html --xpath "//html/body/div[@id='content']/div[@id='primary']/div/*[self::div/span[@class='published'] or self::h2/a]" - \ |
||||
| tr '\n' '\r' \ |
||||
| sed -e 's/\r\r/####/g; s/\r/@/g; s/####/\n/g' \ |
||||
| sed 's/href="/@/; s/" rel=[^>]*>/@/; s/published.>/@/' \ |
||||
| awk -F'@' '{print $5 "@" $3 "@" $2}' \ |
||||
| sed -e :a -e 's/<[^>]*>//g;/</N;//ba' \ |
||||
| column -t -s @ |
||||
| xmllint --html --xpath "//html/body/div[@id='content']/div[@id='primary']/div/*[self::div/span[@class='published'] or self::h2/a]" - 2>&- \ |
||||
| awk 'BEGIN { RS="" } |
||||
{ |
||||
gsub(/\n+/, "@"); |
||||
gsub(/<h2 class="entry-title">/, "\n"); |
||||
gsub(/(href=")|(" rel=[^>]*>)|(published.>)/, "@"); |
||||
gsub(/(&#[0-9]+;)|(<[^@>]*>*)/, ""); |
||||
gsub(/@\s*@*/,"@"); |
||||
|
||||
print |
||||
}' \ |
||||
| awk -F'@' -v blue="$blue" -v yellow="$yellow" -v green="$green" -v rstc="$rstc" ' |
||||
/^\s*$/ { |
||||
next |
||||
} { |
||||
if ($4 ~ /^\s*$/) { |
||||
date = $5 |
||||
} else { |
||||
date = $4 |
||||
} |
||||
|
||||
print blue " # " date "\t" yellow toupper($3) "\n\t\t" green $2 rstc |
||||
}' |
||||
|
||||
echo -e "\n--------------------------------\n" |
||||
} |
||||
|
||||
|
@ -0,0 +1,32 @@
@@ -0,0 +1,32 @@
|
||||
#!/bin/bash |
||||
|
||||
URLS="https://vandrouki.ru https://vandrouki.by https://vandrouki.asia" |
||||
# URLS="https://vandrouki.by" |
||||
|
||||
rstc="\033[00m" |
||||
yellow="\033[1;33m" |
||||
blue="\033[1;36m" |
||||
green="\033[1;32m" |
||||
|
||||
for url in $URLS; { |
||||
echo -e "$green> $url $rstc\n" |
||||
|
||||
curl -s -q "$url" \ |
||||
| sed 's/<html.*xmlns.*$//g' \ |
||||
| xmllint --html --xpath "//html/body/div[@id='content']/div[@id='primary']/div/*[self::div/span[@class='published'] or self::h2/a]" - 2>&- \ |
||||
| tr '\n' '\r' \ |
||||
| sed -e 's/\r\r/####/g; s/\r/@/g; s/####/\n/g; s/<h2 class="entry-title">/\n/g' \ |
||||
| sed 's/href="/@/; s/" rel=[^>]*>/@/; s/published.>/@/' \ |
||||
| sed -E 's|^</div>@||; s/&#[0-9]+;//g' \ |
||||
| sed '/^\s*$/d' \ |
||||
| awk -F'@' -v blue="$blue" -v yellow="$yellow" -v green="$green" -v rstc="$rstc" \ |
||||
'{ |
||||
print blue " # " $6 "@" yellow toupper($3) "\n@ " green $2 rstc |
||||
}' \ |
||||
| sed 's/@@@*/@@/g' \ |
||||
| sed -e :a -e 's/<[^>]*>//g;/</N;//ba' \ |
||||
| column -t -s @ |
||||
|
||||
echo |
||||
} |
||||
|
Loading…
Reference in new issue