Skip to content

Instantly share code, notes, and snippets.

@GoingMyWay
Created January 29, 2021 02:47
Show Gist options
  • Save GoingMyWay/3782c11a541725763fc26082782a5973 to your computer and use it in GitHub Desktop.
Save GoingMyWay/3782c11a541725763fc26082782a5973 to your computer and use it in GitHub Desktop.
Download ICLR 2021 papers
secondString="pdf";
sec="";
while IFS="," read title url; do
nurl=${url/forum/$secondString};
ntitle=${title//[:?^]/_};
wget -O ./papers/21-"${ntitle}".pdf $(echo $nurl | tr -d '\r';) &
done < iclr_papers.csv
@GoingMyWay
Copy link
Author

GoingMyWay commented Nov 11, 2021

The code for downloading the data

import argparse
import multiprocessing

import tqdm
import openreview
import pandas as pd


def worker(reviews):
    client = openreview.Client(baseurl='https://api.openreview.net', username='', password='')
    papers = {'title': [], 'link': [], 'rating': [], 'avg_rating': [], 'keywords': [], 'n_comments': []}
    #papers = {'title': [], 'link': []}
    for review in tqdm.tqdm(reviews):
        _id = review.id
        _title = review.content['title']
        _comments = client.get_notes(forum=_id)
        _keywords = review.content['keywords']
        _ratings = []
        
        for c in _comments:
            if 'rating' in c.content.keys(): 
                _ratings.append(int(c.content['rating'][0]))
        
        papers['title'].append(_title)
        papers['link'].append(f'https://openreview.net/forum?id={_id}')
        papers['rating'].append(_ratings)
        papers['avg_rating'].append(sum(_ratings)/len(_ratings))
        papers['keywords'].append([v.lower() for v in _keywords])
        papers['n_comments'].append(len(_comments))
    return papers


def main(args):
    client = openreview.Client(baseurl='https://api.openreview.net', username='', password='')
    blind_submissions_iterator = openreview.tools.iterget_notes(client, invitation=args.conf)
    all_reviews = [review for review in blind_submissions_iterator]

    chunk = len(all_reviews) // args.n_runner

    p = multiprocessing.Pool(processes=args.n_runner)
    data = p.map(worker, [all_reviews[i*chunk: (i+1)*chunk if i != args.n_runner -1 else (args.n_runner+1)*chunk] for i in range(args.n_runner)])
    print(len(data))
    p.close()
    p.join()

    all_data = {}
    print('saving data....')
    for d in tqdm.tqdm(data):
        for k, v in d.items():
            if k not in all_data:
                all_data[k] = v
            else:
                all_data[k].extend(v)

    df = pd.DataFrame.from_dict(all_data)
    df.to_csv('iclr_2021_list.csv')


if __name__ == '__main__':
    argparser = argparse.ArgumentParser(description='ICLR data parser')
    # or NeurIPS
    argparser.add_argument('--conf', type=str, default='ICLR.cc/2021/Conference/-/Blind_Submission', help='conference link')
    argparser.add_argument('--n-runner', type=int, default=32, help='number of threads')
    parser = argparser.parse_args()

    main(parser)

python iclr_list.py

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment