Skip to content

Commit

Permalink
style: adjust PyLint style
Browse files Browse the repository at this point in the history
  • Loading branch information
0x5844 committed Nov 25, 2023
1 parent e047469 commit d39b2dc
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 12 deletions.
6 changes: 6 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
{
"editor.codeActionsOnSave": {
"source.fixAll": true,
"source.organizeImports": "always",
},
"cSpell.words": [
"Kaggle",
"maxsplit",
"ngram",
"ozlerhakan",
"phishingemails",
"rtatman",
Expand Down
20 changes: 12 additions & 8 deletions phishnet.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
"""
This module uses GPT-2 from Hugging Face's transformers library to generate text based on a prompt.
"""

from transformers import GPT2Tokenizer, GPT2LMHeadModel

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

prompt_text = "Urgent: Your account has been compromised. Please click the link immediately to protect your information:"
PROMPT_TEXT = "Urgent: Your account has been compromised"

inputs = tokenizer.encode(prompt_text, add_special_tokens=False, return_tensors='pt')
inputs = tokenizer.encode(PROMPT_TEXT, add_special_tokens=False, return_tensors='pt')
outputs = model.generate(
inputs,
max_length=100,
num_return_sequences=1,
temperature=0.7,
top_p=0.9,
do_sample=True,
inputs,
max_length=100,
num_return_sequences=1,
temperature=0.7,
top_p=0.9,
do_sample=True,
no_repeat_ngram_size=2
)

Expand Down
14 changes: 10 additions & 4 deletions scripts/download_base_datasets.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
"""
This module downloads various datasets from Kaggle and saves them locally.
"""

import os
import requests
from tqdm import tqdm
from kaggle.api.kaggle_api_extended import KaggleApi

def download_file(url, filename):
with requests.get(url, stream=True) as r:
"""
Downloads a file from the given URL and saves it to the specified filename.
"""
with requests.get(url, stream=True, timeout=10) as r: # Added timeout
total_length = int(r.headers.get('content-length'))
with open(filename, 'wb') as f:
for chunk in tqdm(r.iter_content(chunk_size=1024), total=total_length//1024, unit='KB', desc=f'Downloading {filename}'):
if chunk:
f.write(chunk)

if not os.path.exists('datasets'):
os.makedirs('datasets')

Expand All @@ -30,10 +36,10 @@ def download_file(url, filename):

# Download Kaggle datasets
for dataset in kaggle_datasets:
dataset_key = dataset.split('/')[-1]
dataset_key = dataset.rsplit('/', maxsplit=1)[-1]
dataset_path = f'datasets/{dataset_key}'
if not os.path.exists(dataset_path):
print(f'Trying to download {dataset_key}...')
api.dataset_download_files(dataset, path='datasets', unzip=True, quiet=False)

print("Datasets downloaded and renamed successfully.")
print("Datasets downloaded and renamed successfully.")

0 comments on commit d39b2dc

Please sign in to comment.