Skip to content

Commit

Permalink
updating gitloader to match the php code
Browse files Browse the repository at this point in the history
  • Loading branch information
codebanesr authored and codebanesr committed Aug 15, 2023
1 parent 4bb3f22 commit aeca0cc
Show file tree
Hide file tree
Showing 10 changed files with 160 additions and 153 deletions.
8 changes: 4 additions & 4 deletions dj_backend_server/api/data_sources/codebase_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
from api.utils import get_embeddings
from langchain.document_loaders import GitLoader
from api.utils import init_vector_store
from api.interfaces import StoreOptions

# https://python.langchain.com/docs/integrations/document_loaders/git
@csrf_exempt
def codebase_handler(repo_path: str, namespace: str):
try:
loader = GitLoader(repo_path=repo_path, branch="main", recursive=True, unknown="warn")
folder_path = f"website_data_sources/{namespace}"
loader = GitLoader(repo_path=folder_path, clone_url=repo_path, branch="master")

raw_docs = loader.load()

Expand All @@ -19,11 +21,9 @@ def codebase_handler(repo_path: str, namespace: str):
text_splitter = RecursiveCharacterTextSplitter(separators=["\n"], chunk_size=1000, chunk_overlap=200,length_function=len)
docs = text_splitter.split_documents(raw_docs)

print('Split documents')

embeddings = get_embeddings()

init_vector_store(docs, embeddings, namespace=namespace)
init_vector_store(docs, embeddings, options=StoreOptions(namespace))

print('Indexed documents. all done!')
except Exception as e:
Expand Down
76 changes: 38 additions & 38 deletions dj_backend_server/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ version: '3.9'
services:
mysql:
restart: unless-stopped
platform: linux/x86_64
platform: linux/arm64/v8 # for mac
image: "mysql:8"
ports:
- "3307:3306"
Expand All @@ -28,26 +28,26 @@ services:
networks:
- openchat_network

web:
build:
context: .
dockerfile: Dockerfile
ports:
- "8000:8000"
# volumes:
# - .:/app
depends_on:
- mysql
env_file:
- .env.docker
command: >
sh -c "python manage.py sync_models &&
python manage.py runserver 0.0.0.0:8000"
networks:
- openchat_network
dns:
- 8.8.8.8
- 8.8.4.4
# web:
# build:
# context: .
# dockerfile: Dockerfile
# ports:
# - "8000:8000"
# # volumes:
# # - .:/app
# depends_on:
# - mysql
# env_file:
# - .env.docker
# command: >
# sh -c "python manage.py sync_models &&
# python manage.py runserver 0.0.0.0:8000"
# networks:
# - openchat_network
# dns:
# - 8.8.8.8
# - 8.8.4.4

adminer:
image: adminer
Expand All @@ -60,23 +60,23 @@ services:
networks:
- openchat_network

celery_worker:
build:
context: .
dockerfile: Dockerfile
container_name: myproject_celery
# volumes:
# - .:/app
depends_on:
- web
env_file:
- .env.docker
command: celery -A dj_backend_server worker --loglevel=info
networks:
- openchat_network
dns:
- 8.8.8.8
- 8.8.4.4
# celery_worker:
# build:
# context: .
# dockerfile: Dockerfile
# container_name: myproject_celery
# # volumes:
# # - .:/app
# depends_on:
# - web
# env_file:
# - .env.docker
# command: celery -A dj_backend_server worker --loglevel=info
# networks:
# - openchat_network
# dns:
# - 8.8.8.8
# - 8.8.4.4


redis:
Expand Down
61 changes: 28 additions & 33 deletions dj_backend_server/web/listeners/ingest_codebase_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,44 +7,39 @@
from web.signals.codebase_datasource_was_created import codebase_data_source_added
from django.core.exceptions import ObjectDoesNotExist
from django.utils.timezone import now
from web.signals.codebase_datasource_was_created import codebase_data_source_added

class IngestCodebaseDataSource:
def handle(self, event):
if not isinstance(event, codebase_data_source_added):
return

bot_id = event.get_chatbot_id()
codebase_data_source_id = event.get_codebase_data_source_id()

try:
datasouce = CodebaseDataSource.objects.get(id=codebase_data_source_id)
except ObjectDoesNotExist:
return
@codebase_data_source_added.connect
def ingest_codebase_data_source(sender, chatbot_id, data_source_id, **kwargs):
try:
datasource = CodebaseDataSource.objects.get(id=data_source_id)
except ObjectDoesNotExist:
return

repo = datasouce.get_repository()
repo = datasource.repository

request_body = {
'type': 'codebase',
'repo': repo,
'namespace': bot_id,
}
request_body = {
'type': 'codebase',
'repo': repo,
'namespace': str(chatbot_id),
}

try:
# Call to ingest service endpoint
url = "http://localhost:3000/api/ingest" # Replace with the actual URL
response = requests.post(url, json=request_body)
try:
# Call to ingest service endpoint
url = "http://localhost:8000/api/ingest/" # Replace with the actual URL
response = requests.post(url, json=request_body)

datasouce.ingested_at = now()
datasource.ingested_at = now()

if response.status_code != 200:
datasouce.ingestion_status = IngestStatusType.FAILED
else:
datasouce.ingestion_status = IngestStatusType.SUCCESS
if response.status_code != 200:
datasource.ingestion_status = IngestStatusType.FAILED
else:
datasource.ingestion_status = IngestStatusType.SUCCESS

datasouce.save()
datasource.save()

except RequestException as e:
datasouce.ingested_at = now()
datasouce.ingestion_status = IngestStatusType.FAILED
datasouce.save()
return
except RequestException as e:
datasource.ingested_at = now()
datasource.ingestion_status = IngestStatusType.FAILED
datasource.save()
return
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.3 on 2023-08-15 00:35

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('web', '0001_initial'),
]

operations = [
migrations.AlterField(
model_name='codebasedatasource',
name='chatbot_id',
field=models.UUIDField(null=True),
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.3 on 2023-08-15 00:37

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('web', '0002_alter_codebasedatasource_chatbot_id'),
]

operations = [
migrations.AlterField(
model_name='codebasedatasource',
name='chatbot_id',
field=models.CharField(max_length=36, null=True),
),
]
29 changes: 1 addition & 28 deletions dj_backend_server/web/models/chatbot_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,39 +3,12 @@
from web.models.chatbot import Chatbot

class ChatbotSetting(models.Model):
id = models.CharField(max_length=36, primary_key=True)
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
chatbot_id = models.CharField(max_length=36, null=True)
name = models.CharField(max_length=255)
value = models.CharField(max_length=255)
created_at = models.DateTimeField(auto_now_add=True, null=True)
updated_at = models.DateTimeField(auto_now=True, null=True)

def set_chatbot_id(self, chatbot_id):
self.chatbot_id = chatbot_id

def set_name(self, name):
self.name = name

def set_value(self, value):
self.value = value

def get_id(self):
return self.id

def set_id(self, _id):
self.id = _id

def get_chatbot_id(self):
return self.chatbot_id

def get_name(self):
return self.name

def get_value(self):
return self.value

def chatbot(self):
return self.chatbot # Replace with the related name of the Chatbot model (if defined)

class Meta:
db_table = 'chatbot_settings' # Replace 'chatbot_setting' with the actual table name in the database
43 changes: 1 addition & 42 deletions dj_backend_server/web/models/codebase_data_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,46 +10,5 @@ class CodebaseDataSource(models.Model):
ingested_at = models.DateTimeField()
ingestion_status = models.CharField(max_length=50)

def set_id(self, _id):
self.id = _id

def get_id(self):
return self.id

def get_repository(self):
return self.repository

def set_repository(self, repository):
self.repository = repository

def get_chatbot_id(self):
return self.chatbot_id

def set_chatbot_id(self, chatbot_id):
self.chatbot_id = chatbot_id

def get_ingested_at(self):
return self.ingested_at

def set_ingested_at(self, ingested_at):
self.ingested_at = ingested_at

def get_ingestion_status(self):
return self.ingestion_status

def set_ingestion_status(self, ingestion_status):
self.ingestion_status = ingestion_status

def get_created_at(self):
return self.created_at

def get_updated_at(self):
return self.updated_at

class Meta:
db_table = 'codebase_data_sources' # Replace 'codebase_data_source' with the actual table name in the database

def save(self, *args, **kwargs):
if not self.ingested_at:
self.ingested_at = timezone.now()
super().save(*args, **kwargs)
db_table = 'codebase_data_sources' # Replace 'codebase_data_source' with the actual table name in the database
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ <h1 class="text-3xl text-slate-800 font-bold mb-6">GitHub Repo information ✨</
</div>
</div>
{% endif %}
<form action="{% url 'onboarding.codebase.create' %}" method="POST">
<form action="{% url 'onboarding.codebase.create' %}" method="POST" id="codebaseCreate">
{% csrf_token %}
<div class="space-y-4 mb-8">
<!-- Company Name -->
Expand All @@ -91,7 +91,7 @@ <h1 class="text-3xl text-slate-800 font-bold mb-6">GitHub Repo information ✨</
<div class="flex items-center justify-between">
<a class="text-sm underline hover:no-underline" href="{% url 'onboarding.data-source' %}">&lt;-
Back</a>
<button type="submit" class="btn bg-indigo-500 hover:bg-indigo-600 text-white ml-auto">Next Step
<button form="codebaseCreate" type="submit" class="btn bg-indigo-500 hover:bg-indigo-600 text-white ml-auto">Next Step
-&gt;
</button>
</div>
Expand Down
38 changes: 38 additions & 0 deletions dj_backend_server/web/utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,41 @@ def get_session_id(request, bot_id):

session_id = request.COOKIES.get(cookie_name)
return session_id



import re
import random
import string

def generate_chatbot_name(repo_url, name=None):
"""
Generate a chatbot name based on a Git repository URL and an optional name.
Parameters:
repo_url (str): The Git repository URL.
name (str, optional): The name provided in the POST request (default is None).
Returns:
str: A generated chatbot name.
If 'name' is not provided, a random suffix is added to a default name based on the last part of the Git URL.
"""
# Extracting the last part of the Git URL
last_part_of_git_url = re.search(r'[^/]+$', repo_url).group() if repo_url else ""

# Creating a default name based on the last part of the Git URL
default_name = f"chatbot-url-{last_part_of_git_url}"

# If 'name' is not provided in the POST request, generate a random string
if name is None:
random_suffix = ''.join(random.choices(string.ascii_letters, k=5))
name = f"{default_name}-{random_suffix}"

return name

# Example usage
git_url = "your-git-url-here"
provided_name = "provided-name"
chatbot_name = generate_chatbot_name(git_url, provided_name)
print(chatbot_name)
Loading

0 comments on commit aeca0cc

Please sign in to comment.