Skip to content

Commit

Permalink
Update main.py
Browse files Browse the repository at this point in the history
  • Loading branch information
wanghaisheng committed Jul 26, 2024
1 parent ed507cd commit 66d0667
Showing 1 changed file with 165 additions and 11 deletions.
176 changes: 165 additions & 11 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,19 +229,173 @@ def __init__(self):
self.storage_path_by_date = os.path.join(SERVER_DIR_STORAGE, self.update_time)
self.storage_path_docs = SERVER_PATH_DOCS
self.storage_path_readme = SERVER_PATH_README
def _generate_yaml_front_matter(self, paper: dict, editor_name: str) -> str:
post_title = paper["title"]
post_pubdate = str(datetime.now(TIME_ZONE_CN)).split('.')[0]
post_tags = paper['keywords']

front_matter = {
"layout": "../../layouts/MarkdownPost.astro",
"title": post_title,
"pubDate": post_pubdate,
"description": "",
"author": editor_name,
"cover": {
"url": "https://www.apple.com.cn/newsroom/images/product/homepod/standard/Apple-HomePod-hero-230118_big.jpg.large_2x.jpg",
"square": "https://www.apple.com.cn/newsroom/images/product/homepod/standard/Apple-HomePod-hero-230118_big.jpg.large_2x.jpg",
"alt": "cover"
},
"tags": post_tags,
"theme": "light",
"featured": True,
"meta": [
{"name": "author", "content": paper['authors']},
{"name": "keywords", "content": "key3, key4"}
],
"keywords": "key1, key2, key3"
}

yaml_front_matter = yaml.safe_dump(front_matter, default_flow_style=False)

return f"---\n{yaml_front_matter}---\n"
def _generate_markdown_content(self, paper: dict, pdf_link: str) -> str:
markdown_content = (
f"# title: {paper['title']} \n"
f"## publish date: \n{paper['publish_time']} \n"
f"## authors: \n {paper['authors']} \n"
f"## paper id\n"
f"{paper['id']}\n"
f"## download\n"
f"{pdf_link}\n"
f"## abstracts:\n"
f"{paper['abstract']}\n"
f"## QA:\n"
f"{paper['QA_md_contents']}\n"
)

return markdown_content

def _generate_markdown_table_content(self, paper: dict,tags=None):
# Formatting fields
paper['publish_time'] = f"**{paper['publish_time']}**"
# paper['title'] = f"**{paper['title']}"
if not paper['keywords']:
if not tags:
paper['keywords'] = list(set(tags))

QA_md_link =f"https://github.com/taesiri/ArXivQA/blob/main/papers/{paper['id']}.md"
paper['QA_md_contents']=ToolBox.handle_md(QA_md_link)
if paper['QA_md_contents']==None:
print('gen realtime')
paper['QA_md_contents']='coming soon'
# https://huggingface.co/spaces/taesiri/ClaudeReadsArxiv
# https://github.com/Nipun1212/Claude_api
pdf_link = self._set_markdown_hyperlink(text=paper['id'], link=paper['paper_url'])

# Generate YAML front matter
yaml_front_matter = self._generate_yaml_front_matter(paper, editor_name)

# Generate Markdown content
markdown_content = self._generate_markdown_content(paper, pdf_link)

paper_contents= f"{yaml_front_matter}\n{markdown_content}"
postname=self._check_for_illegal_char(paper['title'])
postname=postname.replace(' ','_')
## if filename start with __ ,astro post will 404
if postname.startswith('__'):
postname=postname.replace('__',"")
paper_path_appleblog=SERVER_PATH_STORAGE_MD.format(postname)
repo_url=os.getenv('repo')
repo_name=repo_url.split('/')[-1].replace('-',' ')
if not os.path.exists(SERVER_DIR_STORAGE):
os.makedirs(SERVER_DIR_STORAGE)
print(f"Directory '{SERVER_DIR_STORAGE}' was created.")
else:
print(f"Directory '{SERVER_DIR_STORAGE}' already exists.")

with open(paper_path_appleblog, "w", encoding="utf8") as f:
f.write(paper_contents)

if os.path.exists(SERVER_DIR_STORAGE.dirname()+'/tags.json'):
old=json.load(open(SERVER_DIR_STORAGE.dirname()+'/tags.json'),encoding='utf8').get('tags',[])
new=old+ paper['keywords'] + list(set(tags))
new=list(set(new))
else:
data={}
new= paper['keywords'] + list(set(tags))

new=list(set(new))

data['tags']=new

with open('data.json', 'w', encoding='utf-8') as file:
json.dump(data, file, ensure_ascii=False, indent=2)


def to_markdown(self, context):
# Mock implementation of to_markdown
return {
"hook": context["topic"],
"content": f"# {context['topic']} - {context['subtopic']}\n\n" +
f"**Publish Time:** {context['paper']['publish_time']}\n" +
f"**Title:** {context['paper']['title']}\n" +
f"**Authors:** {context['paper']['authors']}\n" +
f"**PDF Link:** {context['paper']['paper_url']}\n" +
f"**Code Repo:** {context['paper']['repo']}\n" +
f"**Abstract:** {context['paper']['abstract']}\n"

@staticmethod
def _set_style_to(style: str = "center"):
return " :---: " if style == "center" else " --- "

# -------------------
# Public API
# -------------------
def storage(self, template: str, obj_: str = "database"):
"""
将 Markdown 模板存档
@param template:
@param obj_: database:将 Markdown 模板存档至 database/store 中。其他值,替换根目录下的 README
@return:
"""
path_factory = {
'database': self.storage_path_by_date,
'readme': self.storage_path_readme,
'docs': self.storage_path_docs
}
if obj_ not in path_factory.keys():
path_ = path_factory['readme']
else:
path_ = path_factory[obj_]
with open(path_, "w", encoding="utf8") as f:
for i in template:
f.write(i)

def generate_markdown_template(self, content: str):



repo_url=os.getenv('repo')
repo_name=repo_url.split('/')[-1].replace('-',' ')
print('-====,',repo_url)
repo_url="https://github.com/"+repo_url

_project = f"# arxiv-daily latest papers around {repo_name}\n"
_pin = f"Automated deployment @ {self.update_time} Asia/Shanghai\n"
_tos = f"> Welcome to contribute! Add your topics and keywords in " \
f"[`topic.yml`]({repo_url}/blob/main/database/topic.yml).\n"
_tos += f"> You can also view historical data through the " \
f"[storage]({repo_url}/blob/main/database/storage).\n"

_form = _project + _pin + _tos + content

return _form

def to_markdown(self, context: dict) -> dict:
_fields = context["fields"]
_topic = context["topic"]
_subtopic = context["subtopic"]
_paper_obj = context["paper"]

_topic_md = f"\n## {_topic}\n"
_subtopic_md = f"\n### {_subtopic}\n"
_fields_md = f"|{'|'.join(_fields)}|\n"
_style_md = f"|{'|'.join([self._set_style_to('center') for _ in range(len(_fields))])}|\n"
table_lines = "".join([self._generate_markdown_table_content(
paper,tags=[_topic,_subtopic]) for paper in _paper_obj.values()])

_content_md = _subtopic_md + _fields_md + _style_md + table_lines

return {"hook": _topic_md, "content": _content_md}

def generate_markdown_template(self, content):
# Mock implementation of generate_markdown_template
Expand Down

0 comments on commit 66d0667

Please sign in to comment.