#!/usr/bin/env python3 # -*- coding: utf-8 -*- # # Copyright (C) 2019, Maxim Lihachev, # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation, version 3. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # import io import os import re import time import click import vk_api import datetime import urllib.request from string import Template from progressbar import * ############################################################################ class Color: GRAY = '\033[1;47m' GREEN = '\033[1;32m' YELLOW = '\033[1;33m' CYAN = '\033[1;36m' MAGENTA = '\033[1;35m' RESET = '\033[0m' def printc(color, text, end='\n'): print("{}{}{}".format( getattr(Color, color), text, Color.RESET ), end=end) def timestamp_to_date(timestamp, fmt='%Y-%m-%d %H:%M:%S'): '''Преобразование временного штампа в читаемую дату''' return datetime.datetime.fromtimestamp(int(timestamp)).strftime(fmt) ############################################################################ def auth(login, password): '''Аутентификация в vk.com''' vk_session = vk_api.VkApi(login, password) try: vk_session.auth() return vk_session.get_api() except vk_api.AuthError as error_msg: print(error_msg) return ############################################################################ class Group(object): '''Группа vk.com''' def __init__(self, vk, url): self.vk = vk self.url = url self.info = self._get_info(self._get_name(url)) self.id = self.info['id'] self.name = self.info['screen_name'] self.title = self.info['name'] self.status = self.info['status'] self.avatar = self.info['photo_100'] self.avatar_full = self._largest_photo(self.info) if 'links' in self.info: self.links = [l['url'] + '+@+' + l['name'] + ' | ' + (l['desc'] if 'desc' in l else '') + '+@+' + (l['photo_50'] if 'photo_50' in l else 'NOICON') for l in self.info['links']] else: self.links = [] if self.info['is_admin'] != 0: self.settings = self._get_settings(self.id) self.description = self.settings['description'] else: self.description = "" self.wall = self._get_wall() @staticmethod def _largest_photo(photo): '''Выбор наибольшего доступного разрешения фотографии''' sizes = sorted([ r for r in photo.keys() if re.match(r'photo_.*', r) ], key=lambda x: int(x[6:]), reverse=True) return photo[sizes[0]] @staticmethod def _get_name(url): '''HTTP-имя группы''' return url.split('/')[-1] def _get_info(self, name): '''Информация о сообществе { u'screen_name': u'havelove_willtravel', u'name': 'name' u'is_member': 0, u'is_advertiser': 0, u'is_admin': 0, u'type': u'group', u'id': 91087679, u'is_closed': 0, u'photo_50': u'https://pp.userapi.com/JGKzwLMZQ9s.jpg?ava=1', u'photo_100': u'https://pp.userapi.com/4M20VvCkRV8.jpg?ava=1', u'photo_200': u'https://pp.userapi.com/7fucdcg44a4.jpg?ava=1', u'status': u'status', u'links': [ { u'url': u'url', u'desc': u'envrm.info', u'id': 84867105, u'name': u'name', u'edit_title': 1 } ], } ''' return self.vk.groups.getById( group_ids=name, extended=1, fields="status,links" )[0] def _get_settings(self, id): '''Информация о сообществе, доступная администраторам { u'wiki': 2, u'city_id': 0, u'topics': 0, u'video': 2, u'sections_list': [[0, u'section']], u'obscene_stopwords': 0, u'subject': 25, u'obscene_filter': 0, u'age_limits': 1, u'title': u'title', u'country_id': 0, u'access': 0, u'wall': 2, u'market': {u'enabled': 0}, u'website': u'', u'description': u'', u'docs': 0, u'secondary_section': 0, u'photos': 2, u'address': u'havelove_willtravel', u'main_section': 0, u'articles': 1, u'obscene_words': [], u'messages': 0, u'action_button': {}, u'subject_list': [], } ''' return self.vk.groups.getSettings(group_id=id) def _get_wall(self): '''Получение записей со стены сообщества''' global tools id = self.id content = tools.get_all('wall.get', 100, {'owner_id': -id}) return content def save_info(self): '''Сохранение информации о группе в файл''' if len(self.links) > 1: links = "\n- ".join(self.links) else: links = "".join(self.links) data = { 'name': self.name, 'status': self.status, 'description': self.description.replace('\n', ' '), 'logo': self.avatar, 'logo_full': self.avatar_full, 'url': self.url, 'links': "\n- " + links } tpl = TMPL(self.template_group, data) tpl.write(os.path.join(self.info_file)) class TMPL(object): '''Markdown-файл из шаблона''' def __init__(self, template, data): with io.open(template, "r", encoding="utf-8") as filein: src = Template(filein.read()) self.md = src.substitute(data) def write(self, filename): with io.open(filename, "w", encoding="utf-8") as fileout: fileout.write(self.md) class Album(object): '''Альбом vk.com''' def __init__(self, album): self.title = album['title'] self.description = album['description'] self.thumbnail = self._largest_photo(album['thumb']) if ' - ' in self.title: self.tags = [x.strip() for x in re.search(".* - (.*)", self.title).group(1).split(",")] else: self.tags = [] album_id = album['id'] album_owner_id = album['owner_id'] self.photos = [Photo(x) for x in tools.get_all('photos.get', 100, {'owner_id': album_owner_id, 'album_id': album_id})['items']] @staticmethod def _largest_photo(photo): max_size = sorted([p['width'] for p in photo['sizes']])[-1] return list(filter(lambda p: p['width'] == max_size, photo['sizes']))[0]['url'] def archive(self, directory): img_directory = os.path.join(directory, self.title) if not os.path.exists(img_directory): os.makedirs(img_directory) urllib.request.urlretrieve(self.thumbnail, os.path.join(img_directory, "cover.jpg")) bar_elements = [f'\t{Color.MAGENTA}Альбом. Фотографии:{Color.RESET} ', Percentage(), f' {Color.CYAN}', Bar(), f'{Color.RESET} ', ETA()] bar = ProgressBar(widgets=bar_elements, maxval=len(self.photos)).start() for index, photo in enumerate(self.photos, 1): file_name = os.path.join(img_directory, str(index).zfill(3)) urllib.request.urlretrieve(photo.url, file_name + ".jpg") if len(photo.description) > 0: with open(file_name + ".txt", "w") as info_file: info_file.write(photo.description) bar.update(index) print("") class Photo(object): '''Фотография vk.com { u'id': 456243888, u'album_id': 259402486, u'date': 1545675847, u'text': u'', u'height': 1464, u'width': 2448, u'user_id': 100, u'owner_id': -91087679 u'sizes': [{ 'type': 'm', 'url': 'https://sun9-27.userapi.com/c851124/v851124613/1b85ca/UacqE2Zs2tc.jpg', 'width': 130, 'height': 87 }, { 'type': 'o', 'url': 'https://sun9-9.userapi.com/c851124/v851124613/1b85cf/59MhcAHsMKg.jpg', 'width': 130, 'height': 87 }, { 'type': 'p', 'url': 'https://sun9-57.userapi.com/c851124/v851124613/1b85d0/Ue4FW1UkwFE.jpg', 'width': 200, 'height': 133 }] ''' def __init__(self, photo): self.description = photo['text'] self.date = photo['date'] self.thumbnail = self._get_thumbnail(photo) self.url = self._largest_photo(photo) @staticmethod def _get_thumbnail(photo): min_size = sorted([p['width'] for p in photo['sizes']])[0] return list(filter(lambda p: p['width'] == min_size, photo['sizes']))[0]['url'] @staticmethod def _largest_photo(photo): max_size = sorted([p['width'] for p in photo['sizes']])[-1] return list(filter(lambda p: p['width'] == max_size, photo['sizes']))[0]['url'] def archive(self, directory, date, num): img_directory = os.path.join(directory, date + " - Фотографии") if not os.path.exists(img_directory): os.makedirs(img_directory) file_name = os.path.join(img_directory, str(num).zfill(3)) urllib.request.urlretrieve(self.url, file_name + ".jpg") if len(self.description) > 0: with open(file_name + ".txt", "w") as info_file: info_file.write(self.description) class Link(object): '''Ссылка vk.com''' def __init__(self, link): self.title = link['title'] self.description = link['description'] self.url = link['url'] if 'photo' in link: self.thumbnail = self._get_thumbnail(link['photo']) else: self.thumbnail = "" @staticmethod def _get_thumbnail(photo): min_size = sorted([p['width'] for p in photo['sizes']])[0] return list(filter(lambda p: p['width'] == min_size, photo['sizes']))[0]['url'] class Post(object): '''Запись vk.com''' def __init__(self, wall, index): self.post = wall['items'][index] # Репост if 'copy_history' in self.post: self.post = self.post['copy_history'][0] post_id = str(self.post['id']) wall_id = str(self.post['owner_id']) self.url = 'https://vk.com/wall' + wall_id + '_' + post_id self.id = timestamp_to_date(self.post['date'], fmt='%Y-%m-%d-%H.%M.%S') self.date = timestamp_to_date(self.post['date']) self.title = [] self.link_title = "" self.link_description = "" self.link_thumbnail = "" self.link_url = "" self.albums = [] self.photo_num = 0 if 'signer_id' in self.post: self.author = str(self.post['signer_id']) else: self.author = "" self.text = self.post['text'] if 'attachments' in self.post: self.attachments = self.post['attachments'] else: self.attachments = [] self.tags = re.findall("#([^ ]+)", self.text) global archive_directory self.post_directory = os.path.join(archive_directory, self.id) printc('GRAY', self.date, end='') def _mkdir(self): if not os.path.exists(archive_directory): os.makedirs(archive_directory) if not os.path.exists(self.post_directory): os.makedirs(self.post_directory) return True else: return False def archive(self): if self._mkdir(): print("") if len(self.attachments) > 0: self._save_attachments() print("") data = { 'layout': 'post', 'author': self.author, 'title': " / ".join(self.title) if len(self.title) > 1 else "".join(self.title), 'categories': '[' + ", ".join(sorted(set([w.capitalize() for w in self.tags]))) + ']', 'vk': self.url, 'link_title': self.link_title, 'link_description': self.link_description, 'link_thumbnail': self.link_thumbnail, 'link_url': self.link_url, 'albums': "\n- " + ("\n- ".join(self.albums) if len(self.albums) > 1 else "".join(self.albums)), 'content': self.text } self.md = TMPL(self.template, data) self.md.write(os.path.join(self.post_directory, self.id + '.md')) else: printc('YELLOW', f" | Запись уже загружена.") def _save_attachments(self): for t in self.attachments: attachment_type = t['type'] if attachment_type == 'album': album = Album(t['album']) if len(album.title.strip()) == 0: album.title = "Фотографии" printc('MAGENTA', f"\tАльбом. Название: {Color.GREEN}{album.title} {Color.MAGENTA}{album.description}") self.title.extend(album.title.split(" - ")[1:]) self.tags.extend(album.tags) self.albums.extend([album.title]) album.archive(self.post_directory) elif attachment_type == 'photo': ph = Photo(t['photo']) self.photo_num += 1 printc('YELLOW', f"\tФотография: {str(self.photo_num).zfill(3)}.jpg | {ph.description}") ph.archive(self.post_directory, self.id, self.photo_num) base_album = self.id + u" - Фотографии" if base_album not in self.albums: self.albums.extend([base_album]) elif attachment_type == 'link': url = Link(t['link']) printc('YELLOW', f"\tСсылка: {url.url}") printc('YELLOW', f"\tСсылка. Заголовок: {url.title}") self.link_title = url.title self.link_description = url.description self.link_thumbnail = url.thumbnail self.link_url = url.url ############################################################################ tools = '' archive_directory = '' @click.command() @click.option('-u', '--user', help='Имя пользователя', prompt='VK User') @click.option('-p', '--password', help='Пароль', prompt='VK Password', hide_input=True) @click.option('-g', '--group', 'group_url', help='Сообщество для сохранения', prompt='Group URL') @click.option('-o', '--out', 'directory', help='Директория для сохранения', default='') def dump(user, password, group_url, directory): # Учётная запись vk.com global tools global archive_directory vk = auth(user, password) tools = vk_api.VkTools(vk) # Сообщество для сохранения if 'vk.com' not in group_url: group_url = 'http://vk.com/' + group_url group = Group(vk, group_url) # Директория сохранения постов if directory == '': # ./ archive_directory = os.path.join(os.path.dirname(__file__), group_url.split('/')[-1]) else: archive_directory = directory if not os.path.exists(archive_directory): os.makedirs(archive_directory) # Директория сохранения изображений photos_directory = archive_directory # Файл шаблона информации о группе group.template_group = os.path.abspath(os.path.join(os.path.dirname(__file__), 'tpl', 'config.yml')) group.info_file = os.path.abspath(os.path.join(archive_directory, 'config.yml')) header_message = f'# Сохранение сообщества {group_url} в директорию {archive_directory} #' printc('YELLOW', '#' * len(header_message)) printc('YELLOW', header_message) printc('YELLOW', '#' * len(header_message)) print("") group.save_info() # Файл шаблона поста post_template_file = os.path.abspath(os.path.join(os.path.dirname(__file__), 'tpl', 'post.md')) i = 0 posts = group.wall['count'] while i < posts: printc('CYAN', "#### {:3}/{} ".format(i + 1, posts), end='') post = Post(group.wall, i) post.template = post_template_file post.archive() i += 1 if __name__ == '__main__': # Dump vk data with Moscow timestamps os.environ['TZ'] = 'Europe/Moscow' time.tzset() dump()