def _search(self, term: str): results = self.analyzer.search_similar_content(term) if results: print(f"\n๐ Search results for 'term':") for result in results: print(f"\n Page result['page_number'] (Similarity: result['similarity_score']:.2f)") print(f" Excerpt: result['excerpt'][:200]...") else: print(f"No results found for 'term'")
def _show_case_studies(self): print("\n๐ CASE STUDIES:") for i, case in enumerate(self.analyzer.case_studies[:5], 1): print(f"\ni. case['title']") print(f" case['description'][:200]...") urban planning lecture notes pdf
def extract_key_concepts(self) -> List[Dict]: """Extract and rank key urban planning concepts""" stop_words = set(stopwords.words('english')) # Urban planning specific terminology planning_terms = [ 'zoning', 'land use', 'transportation', 'infrastructure', 'sustainability', 'urban design', 'smart growth', 'new urbanism', 'gentrification', 'affordable housing', 'public space', 'transit-oriented development', 'mixed-use', 'walkability', 'green infrastructure', 'climate resilience', 'urban renewal', 'community engagement', 'comprehensive plan', 'subdivision', 'environmental impact', 'historic preservation', 'urban sprawl', 'density', 'parking', 'complete streets', 'placemaking' ] # Tokenize and find frequencies words = word_tokenize(self.full_text.lower()) words = [w for w in words if w.isalpha() and w not in stop_words] # Count frequencies of planning terms concept_counts = Counter() for term in planning_terms: count = self.full_text.lower().count(term) if count > 0: concept_counts[term] = count # Extract context for each concept concepts = [] for concept, count in concept_counts.most_common(20): # Find sentences containing the concept sentences = sent_tokenize(self.full_text) context_sentences = [s for s in sentences if concept.lower() in s.lower()] context = context_sentences[:2] if context_sentences else [] concepts.append( 'term': concept, 'frequency': count, 'context': context ) self.key_concepts = concepts return concepts def _search(self, term: str): results = self
def generate_study_questions(self) -> List[Dict]: """Generate study questions based on key concepts and sections""" questions = [] # Generate questions from key concepts for concept in self.key_concepts[:10]: questions.append( 'type': 'concept', 'question': f"What are the key principles and applications of concept['term'] in urban planning?", 'related_concept': concept['term'], 'hint': f"Review section discussing concept['term'] (mentioned concept['frequency'] times)" ) # Generate questions from sections for section_name, section_text in list(self.sections.items())[:5]: if len(section_text) > 100: questions.append( 'type': 'section', 'question': f"Summarize the main arguments presented in 'section_name' regarding urban planning approaches.", 'related_section': section_name, 'hint': "Focus on the key definitions and examples provided" ) # Add comparative questions if len(self.case_studies) >= 2: questions.append( 'type': 'comparative', 'question': f"Compare and contrast the urban planning approaches in 'self.case_studies[0]['title']' vs 'self.case_studies[1]['title']'.", 'hint': "Consider differences in context, implementation, and outcomes" ) return questions case in enumerate(self.analyzer.case_studies[:5]
def _show_questions(self): questions = self.analyzer.generate_study_questions() print("\nโ STUDY QUESTIONS:") for i, q in enumerate(questions, 1): print(f"\ni. q['question']") print(f" ๐ก Hint: q['hint']")
def search_similar_content(self, query: str, top_k: int = 3) -> List[Dict]: """Search for content similar to query using TF-IDF""" # Prepare documents (each page as a document) documents = [page['text'] for page in self.pages_text] documents.append(query) # Create TF-IDF matrix vectorizer = TfidfVectorizer(stop_words='english') tfidf_matrix = vectorizer.fit_transform(documents) # Calculate similarity cosine_similarities = cosine_similarity(tfidf_matrix[-1:], tfidf_matrix[:-1]) # Get top similar pages similar_indices = cosine_similarities.argsort()[0][-top_k:][::-1] results = [] for idx in similar_indices: if cosine_similarities[0][idx] > 0: results.append( 'page_number': self.pages_text[idx]['page_num'], 'similarity_score': float(cosine_similarities[0][idx]), 'excerpt': self.pages_text[idx]['text'][:500] ) return results