Skip to content

Instantly share code, notes, and snippets.

@putnik
Created March 14, 2021 01:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save putnik/31c2e2b2e0695caad6d51dc92ae4c690 to your computer and use it in GitHub Desktop.
Save putnik/31c2e2b2e0695caad6d51dc92ae4c690 to your computer and use it in GitHub Desktop.
Скрипт синхронизации списков русского Викигида и Викиданных
# Copyright 2020 Sergey Leschina (mail@putnik.tech)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import mwparserfromhell
import pywikibot
import re
from pywikibot import pagegenerators
site = pywikibot.Site('ru', 'wikivoyage')
repo = site.data_repository()
category_name = 'Категория:Списки культурного наследия России'
item_create_summary = 'Create item from the data in [[v:ru:%s]]'
item_update_summary = 'Update item from the data in [[v:ru:%s]]'
page_update_summary = 'Update data'
PARAM_MAPPING = {
#'P18': 'image',
'P131': ['munid', 'string'],
'P373': 'commonscat',
'P1483': 'knid',
'P5381': 'knid-new',
}
def log(string):
print(string)
def get_label_data(title, page):
return {
'labels': {
page.site.lang: {
'language': page.site.lang,
'value': title
}
}
}
def add_source(claim, page):
ruwv_claim = pywikibot.Claim(repo, 'P143')
ruwv_target = pywikibot.ItemPage(repo, 'Q17601812')
ruwv_claim.setTarget(ruwv_target)
page_claim = pywikibot.Claim(repo, 'P4656')
page_target = 'https://ru.wikivoyage.org/?oldid=%s' % page.latest_revision_id
page_claim.setTarget(page_target)
claim.addSources([ruwv_claim, page_claim])
def update_item_claims(item, template, page):
item_dict = item.get()
for pid in PARAM_MAPPING:
param = PARAM_MAPPING[pid]
if not template.has(param):
continue
target = template.get(param).value.strip()
if target == "":
continue
if "claims" in item_dict and pid in item_dict["claims"]:
continue # TODO
claim = pywikibot.Claim(repo, pid)
log('** %s = "%s"' % (pid, target))
if pid in ('P131'):
target = pywikibot.ItemPage(repo, target)
claim.setTarget(target)
item.addClaim(claim)
add_source(claim, page)
a = 1/0
def create_item(template, page):
item = pywikibot.ItemPage(repo)
data = get_label_data(template.title, page)
item.editEntity(data, summary=item_create_summary % page.getTitle())
log('* (new) [[d:%s]] = "%s"' % (item.title(), template.title))
template.replace('wdid', item.title())
# TODO: log
return template
def update_item(template, page):
qid = template.get('wdid').value.strip()
log("* [[d:%s]]" % qid)
item = pywikibot.ItemPage(repo, qid)
if not item.exists():
# TODO: log error
return template
# if item.getLabel() == '':
# label_data = get_label_from_template(template, page)
# # item.editEntity(label_data, summary=item_update_summary % page.getTitle())
# print('UPDATE')
# print(label_data)
update_item_claims(item, template, page)
# TODO: log
return template
def check_template(template, page):
if template.name.strip() == "monument":
qid = ''
if template.has("wdid"):
qid = template.get("wdid").value.strip()
if re.match("^Q\\d+$", qid):
update_item(template, page)
else:
a = 1
# create_item(template, page)
# TODO: save page to cache
# TODO: log
def process_page(page):
log('== [[%s]] ==' % page.title())
text = page.get()
code = mwparserfromhell.parse(text)
for template in code.filter_templates():
check_template(template, page)
new_text = str(code)
if new_text != text:
page.text = new_text
page.save(page_update_summary)
# TODO: log diff
return true
# TODO: log
return False
def iterate_category():
category = pywikibot.Category(site, category_name)
generator = pagegenerators.CategorizedPageGenerator(category)
for page in generator:
process_page(page)
page = pywikibot.Page(site, 'Культурное наследие России/Вологодская область/Вологда')
process_page(page)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment