Helpers
This module contains helper functions for building the network and storing data.
append_pub_data_to_json(publication_info)
Saves publication data from get_publication_data to data/scraped.json file.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
publication_info |
list |
Publication data. |
required |
Source code in scholar_network/helpers.py
def append_pub_data_to_json(publication_info: list[dict[str, str]]):
"""Saves publication data from
[get_publication_data][src.scholar_network.scraping.get_publication_data]
to data/scraped.json file.
Args:
publication_info (list[dict[str, str]]): Publication data.
"""
with open("data/scraped.json", "r", encoding=ENCODING) as f:
data = json.load(f)
data.extend(publication_info)
with open("data/scraped.json", "w", encoding=ENCODING) as f:
json.dump(data, f, indent=4, sort_keys=True)
build_graph(author1=None, author2=None)
This utility function builds the Graph for the network.
Currently the default graph type that is built is undirected.
If no authors are provided, then a network of all the data contained in
data/scraped.json will be built.
If one or two authors are provided, only their networks will be built.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
author1 |
Optional[str] |
Author name. Defaults to None. |
None |
author2 |
Optional[str] |
Author name. Defaults to None. |
None |
Returns:
| Type | Description |
|---|---|
Graph |
models.Graph: Network graph of authors. |
Source code in scholar_network/helpers.py
def build_graph(
author1: Union[str, None] = None, author2: Union[str, None] = None
) -> models.Graph:
"""This utility function builds the Graph for the network.
Currently the default graph type that is built is undirected.
If no authors are provided, then a network of all the data contained in
`data/scraped.json` will be built.
If one or two authors are provided, only their networks will be built.
Args:
author1 (Union[str, None], optional): Author name. Defaults to None.
author2 (Union[str, None], optional): Author name. Defaults to None.
Returns:
models.Graph: Network graph of authors.
"""
# TODO: add support for journal title as attribute of edge
# TODO: want weight to track number of connections on each edge
# TODO: look at speed enhancements
# journal = scholars[name].get("journal_title").strip()
publications = load_publications()
graph = models.Graph()
if not author1 and not author2: # make whole graph
for pub in publications:
co_authors = set([c.strip() for c in pub.get("authors").split(",")])
for first in co_authors:
n1 = models.Node(first)
graph.add_node(n1)
for second in co_authors:
if second == first:
continue
n2 = models.Node(second)
graph.add_node(n2)
graph.add_edge(
models.Edge(n1, n2)
)
return graph
else: # otherwise at least one author passed
for pub in publications:
co_authors = set([c.strip() for c in pub.get("authors").split(",")])
# if author in coauthors set then add that network
if author1 in co_authors or author2 in co_authors:
for first in co_authors:
n1 = models.Node(first)
graph.add_node(n1)
for second in co_authors:
if second == first:
continue
n2 = models.Node(second)
graph.add_node(n2)
graph.add_edge(
models.Edge(n1, n2)
)
return graph
load_publications()
Utility function to load publication data.
Returns:
| Type | Description |
|---|---|
list |
(list[dict[str, str]]): List of publication data. |
Source code in scholar_network/helpers.py
def load_publications() -> list[dict[str, str]]:
"""Utility function to load publication data.
Returns:
(list[dict[str, str]]): List of publication data.
"""
with open("data/scraped.json", "r", encoding=ENCODING) as f:
scholars = json.load(f)
return scholars