From 66d06675651f9f9c725b884da7d711a41139a108 Mon Sep 17 00:00:00 2001 From: HeisenBerg? Date: Sat, 27 Jul 2024 07:38:42 +0800 Subject: [PATCH] Update main.py --- main.py | 176 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 165 insertions(+), 11 deletions(-) diff --git a/main.py b/main.py index ec0502e2..fcbb0865 100644 --- a/main.py +++ b/main.py @@ -229,19 +229,173 @@ def __init__(self): self.storage_path_by_date = os.path.join(SERVER_DIR_STORAGE, self.update_time) self.storage_path_docs = SERVER_PATH_DOCS self.storage_path_readme = SERVER_PATH_README + def _generate_yaml_front_matter(self, paper: dict, editor_name: str) -> str: + post_title = paper["title"] + post_pubdate = str(datetime.now(TIME_ZONE_CN)).split('.')[0] + post_tags = paper['keywords'] + + front_matter = { + "layout": "../../layouts/MarkdownPost.astro", + "title": post_title, + "pubDate": post_pubdate, + "description": "", + "author": editor_name, + "cover": { + "url": "https://www.apple.com.cn/newsroom/images/product/homepod/standard/Apple-HomePod-hero-230118_big.jpg.large_2x.jpg", + "square": "https://www.apple.com.cn/newsroom/images/product/homepod/standard/Apple-HomePod-hero-230118_big.jpg.large_2x.jpg", + "alt": "cover" + }, + "tags": post_tags, + "theme": "light", + "featured": True, + "meta": [ + {"name": "author", "content": paper['authors']}, + {"name": "keywords", "content": "key3, key4"} + ], + "keywords": "key1, key2, key3" + } + + yaml_front_matter = yaml.safe_dump(front_matter, default_flow_style=False) + + return f"---\n{yaml_front_matter}---\n" + def _generate_markdown_content(self, paper: dict, pdf_link: str) -> str: + markdown_content = ( + f"# title: {paper['title']} \n" + f"## publish date: \n{paper['publish_time']} \n" + f"## authors: \n {paper['authors']} \n" + f"## paper id\n" + f"{paper['id']}\n" + f"## download\n" + f"{pdf_link}\n" + f"## abstracts:\n" + f"{paper['abstract']}\n" + f"## QA:\n" + f"{paper['QA_md_contents']}\n" + ) + + return markdown_content + + def _generate_markdown_table_content(self, paper: dict,tags=None): + # Formatting fields + paper['publish_time'] = f"**{paper['publish_time']}**" + # paper['title'] = f"**{paper['title']}" + if not paper['keywords']: + if not tags: + paper['keywords'] = list(set(tags)) + + QA_md_link =f"https://github.com/taesiri/ArXivQA/blob/main/papers/{paper['id']}.md" + paper['QA_md_contents']=ToolBox.handle_md(QA_md_link) + if paper['QA_md_contents']==None: + print('gen realtime') + paper['QA_md_contents']='coming soon' + # https://huggingface.co/spaces/taesiri/ClaudeReadsArxiv + # https://github.com/Nipun1212/Claude_api + pdf_link = self._set_markdown_hyperlink(text=paper['id'], link=paper['paper_url']) + + # Generate YAML front matter + yaml_front_matter = self._generate_yaml_front_matter(paper, editor_name) + + # Generate Markdown content + markdown_content = self._generate_markdown_content(paper, pdf_link) + + paper_contents= f"{yaml_front_matter}\n{markdown_content}" + postname=self._check_for_illegal_char(paper['title']) + postname=postname.replace(' ','_') + ## if filename start with __ ,astro post will 404 + if postname.startswith('__'): + postname=postname.replace('__',"") + paper_path_appleblog=SERVER_PATH_STORAGE_MD.format(postname) + repo_url=os.getenv('repo') + repo_name=repo_url.split('/')[-1].replace('-',' ') + if not os.path.exists(SERVER_DIR_STORAGE): + os.makedirs(SERVER_DIR_STORAGE) + print(f"Directory '{SERVER_DIR_STORAGE}' was created.") + else: + print(f"Directory '{SERVER_DIR_STORAGE}' already exists.") + + with open(paper_path_appleblog, "w", encoding="utf8") as f: + f.write(paper_contents) + + if os.path.exists(SERVER_DIR_STORAGE.dirname()+'/tags.json'): + old=json.load(open(SERVER_DIR_STORAGE.dirname()+'/tags.json'),encoding='utf8').get('tags',[]) + new=old+ paper['keywords'] + list(set(tags)) + new=list(set(new)) + else: + data={} + new= paper['keywords'] + list(set(tags)) + + new=list(set(new)) + + data['tags']=new + + with open('data.json', 'w', encoding='utf-8') as file: + json.dump(data, file, ensure_ascii=False, indent=2) + - def to_markdown(self, context): - # Mock implementation of to_markdown - return { - "hook": context["topic"], - "content": f"# {context['topic']} - {context['subtopic']}\n\n" + - f"**Publish Time:** {context['paper']['publish_time']}\n" + - f"**Title:** {context['paper']['title']}\n" + - f"**Authors:** {context['paper']['authors']}\n" + - f"**PDF Link:** {context['paper']['paper_url']}\n" + - f"**Code Repo:** {context['paper']['repo']}\n" + - f"**Abstract:** {context['paper']['abstract']}\n" + + @staticmethod + def _set_style_to(style: str = "center"): + return " :---: " if style == "center" else " --- " + + # ------------------- + # Public API + # ------------------- + def storage(self, template: str, obj_: str = "database"): + """ + 将 Markdown 模板存档 + @param template: + @param obj_: database:将 Markdown 模板存档至 database/store 中。其他值,替换根目录下的 README + @return: + """ + path_factory = { + 'database': self.storage_path_by_date, + 'readme': self.storage_path_readme, + 'docs': self.storage_path_docs } + if obj_ not in path_factory.keys(): + path_ = path_factory['readme'] + else: + path_ = path_factory[obj_] + with open(path_, "w", encoding="utf8") as f: + for i in template: + f.write(i) + + def generate_markdown_template(self, content: str): + + + + repo_url=os.getenv('repo') + repo_name=repo_url.split('/')[-1].replace('-',' ') + print('-====,',repo_url) + repo_url="https://github.com/"+repo_url + + _project = f"# arxiv-daily latest papers around {repo_name}\n" + _pin = f"Automated deployment @ {self.update_time} Asia/Shanghai\n" + _tos = f"> Welcome to contribute! Add your topics and keywords in " \ + f"[`topic.yml`]({repo_url}/blob/main/database/topic.yml).\n" + _tos += f"> You can also view historical data through the " \ + f"[storage]({repo_url}/blob/main/database/storage).\n" + + _form = _project + _pin + _tos + content + + return _form + + def to_markdown(self, context: dict) -> dict: + _fields = context["fields"] + _topic = context["topic"] + _subtopic = context["subtopic"] + _paper_obj = context["paper"] + + _topic_md = f"\n## {_topic}\n" + _subtopic_md = f"\n### {_subtopic}\n" + _fields_md = f"|{'|'.join(_fields)}|\n" + _style_md = f"|{'|'.join([self._set_style_to('center') for _ in range(len(_fields))])}|\n" + table_lines = "".join([self._generate_markdown_table_content( + paper,tags=[_topic,_subtopic]) for paper in _paper_obj.values()]) + + _content_md = _subtopic_md + _fields_md + _style_md + table_lines + + return {"hook": _topic_md, "content": _content_md} def generate_markdown_template(self, content): # Mock implementation of generate_markdown_template