-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path(Non-)retracted papers in OpenAlex.py
75 lines (54 loc) · 1.95 KB
/
(Non-)retracted papers in OpenAlex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/env python
# coding: utf-8
# In[3]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
def categorize_titles(input_file):
df = pd.read_csv(input_file)
categories = {
"Notice of Retraction": [],
"Retracted": [],
"Withdrawn": [],
"Removed": [],
"Temporary Removal": [],
"Others": []
}
for title in df['display_name']:
if isinstance(title, float):
continue
found_category = False
for category in categories.keys():
if category.lower() in str(title).lower():
categories[category].append(title)
found_category = True
break
if not found_category:
categories["Others"].append(title)
return categories
def plot_bar_graph(categories):
custom_order = ["Retracted", "Notice of Retraction", "Withdrawn", "Removed", "Temporary Removal", "Others"]
sorted_categories = [(key, categories[key]) for key in custom_order]
labels = [x[0] for x in sorted_categories]
counts = [len(x[1]) for x in sorted_categories]
fig, ax = plt.subplots(figsize=(12, 8))
bars = ax.bar(labels, counts, color='skyblue')
for rect in bars:
height = rect.get_height()
ax.annotate('{}'.format(height),
xy=(rect.get_x() + rect.get_width() / 2, height),
xytext=(0, 3),
textcoords="offset points",
ha='center', va='bottom')
ax.set_xlabel('Categories')
ax.set_ylabel('Number of Publications')
ax.set_title('Publications by Category')
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
ax.grid(axis='y', linestyle='--')
plt.tight_layout()
plt.show()
if __name__ == "__main__":
input_file = "Downloads/works-2024-03-11T16-37-37.csv"
categories = categorize_titles(input_file)
plot_bar_graph(categories)
# In[ ]: