pukkandan · April 18, 2024 01:12 · pukkandan · Jun 21, 2023 · gosuimbalyndh · Jan 14, 2024
diff --git a/ytdlp_nest_comments.py b/ytdlp_nest_comments.py
 #!/usr/bin/env python3

 """
 SPDX-License-Identifier: MIT https://opensource.org/licenses/MIT
 Copyright © 2021 pukkandan.ytdlp@gmail.com


 * Input file is an info.json (with comments) that yt-dlp (https://github.com/yt-dlp/yt-dlp) wrote
 * Change FIELDS according to your needs

 The output file will be in the format:
 [{
  'text': 'comment 1',
  ...
  'replies': [{
    'text': 'reply 1',
    ...
    'replies': [...],
  }, ...],
 }, ...]
 """

 import os.path
 import json
 import argparse
 from datetime import datetime


 def get_fields(dct):
    for name, fn in FIELDS.items():
        val = fn(dct, name)
        if val is not None:
            yield name, val


 def filter_func(comments):
    return [dict(get_fields(c)) for c in comments]


 FIELDS = {
    'text': dict.get,
    'author': dict.get,
    'timestamp': lambda dct, name: dct.get(name) and datetime.strftime(
        datetime.utcfromtimestamp(dct.get(name)), '%Y/%m/%d'),
    # Add more fields here
    'replies': lambda dct, name: filter_func(dct.get(name, [])) or None
 }


 parser = argparse.ArgumentParser()
 parser.add_argument(
    '--input-file', '-i',
    dest='inputfile', metavar='FILE', required=True,
    help='File to read video metadata from (info.json)')
 parser.add_argument(
    '--output-file', '-o',
    dest='outputfile', metavar='FILE', required=True,
    help='File to write comments to (json / html)')
 args = parser.parse_args()


 ext = os.path.splitext(args.outputfile)[1][1:]
 if ext == 'html':  # Error early
    try:
        from json2html import json2html
    except ImportError:
        raise SystemExit('ERROR: json2html is required for html output. Install it with  pip install json2html')
 elif ext != 'json':
    raise SystemExit(f'ERROR: Only json and html formats are supported, not {ext}')


 print('Reading file')
 with open(args.inputfile, encoding='utf-8') as f:
    info_dict = json.load(f)

 comment_data = {c['id']: c for c in sorted(
    info_dict['comments'], key=lambda c: c.get('timestamp') or 0)}
 count = len(info_dict['comments'])
 del info_dict
 nested_comments = []
 for i, (cid, c) in enumerate(comment_data.items(), 1):
    print(f'Processing comment {i}/{count}', end='\r')
    parent = nested_comments if c['parent'] == 'root' else comment_data[c['parent']].setdefault('replies', [])
    parent.append(c)
 del parent


 print('')
 nested_comments = filter_func(nested_comments)


 if ext == 'json':
    print('Converting to json')
    out = json.dumps(nested_comments, indent=4, ensure_ascii=False)


 elif ext == 'html':
    print('Converting to html')
    out = json2html.convert(nested_comments)


 del nested_comments
 print('Writing file')
 with open(args.outputfile, 'w', encoding='utf-8') as f:
    f.write(out)
 print('Done')
	#!/usr/bin/env python3

	"""
	SPDX-License-Identifier: MIT https://opensource.org/licenses/MIT
	Copyright © 2021 pukkandan.ytdlp@gmail.com


	* Input file is an info.json (with comments) that yt-dlp (https://github.com/yt-dlp/yt-dlp) wrote
	* Change FIELDS according to your needs

	The output file will be in the format:
	[{
	'text': 'comment 1',
	...
	'replies': [{
	'text': 'reply 1',
	...
	'replies': [...],
	}, ...],
	}, ...]
	"""

	import os.path
	import json
	import argparse
	from datetime import datetime


	def get_fields(dct):
	for name, fn in FIELDS.items():
	val = fn(dct, name)
	if val is not None:
	yield name, val


	def filter_func(comments):
	return [dict(get_fields(c)) for c in comments]


	FIELDS = {
	'text': dict.get,
	'author': dict.get,
	'timestamp': lambda dct, name: dct.get(name) and datetime.strftime(
	datetime.utcfromtimestamp(dct.get(name)), '%Y/%m/%d'),
	# Add more fields here
	'replies': lambda dct, name: filter_func(dct.get(name, [])) or None
	}


	parser = argparse.ArgumentParser()
	parser.add_argument(
	'--input-file', '-i',
	dest='inputfile', metavar='FILE', required=True,
	help='File to read video metadata from (info.json)')
	parser.add_argument(
	'--output-file', '-o',
	dest='outputfile', metavar='FILE', required=True,
	help='File to write comments to (json / html)')
	args = parser.parse_args()


	ext = os.path.splitext(args.outputfile)[1][1:]
	if ext == 'html': # Error early
	try:
	from json2html import json2html
	except ImportError:
	raise SystemExit('ERROR: json2html is required for html output. Install it with pip install json2html')
	elif ext != 'json':
	raise SystemExit(f'ERROR: Only json and html formats are supported, not {ext}')


	print('Reading file')
	with open(args.inputfile, encoding='utf-8') as f:
	info_dict = json.load(f)

	comment_data = {c['id']: c for c in sorted(
	info_dict['comments'], key=lambda c: c.get('timestamp') or 0)}
	count = len(info_dict['comments'])
	del info_dict
	nested_comments = []
	for i, (cid, c) in enumerate(comment_data.items(), 1):
	print(f'Processing comment {i}/{count}', end='\r')
	parent = nested_comments if c['parent'] == 'root' else comment_data[c['parent']].setdefault('replies', [])
	parent.append(c)
	del parent


	print('')
	nested_comments = filter_func(nested_comments)


	if ext == 'json':
	print('Converting to json')
	out = json.dumps(nested_comments, indent=4, ensure_ascii=False)


	elif ext == 'html':
	print('Converting to html')
	out = json2html.convert(nested_comments)


	del nested_comments
	print('Writing file')
	with open(args.outputfile, 'w', encoding='utf-8') as f:
	f.write(out)
	print('Done')