source: telemeta/models/query.py @ 2f57f0e

cremcrem2devdev2diademsfeature/breadcrumbsfeature/ts-0.5feature/ts-0.5.4feature/writecacheformagenericinstru_searchlamlam2mapsv3mergenlivemultiproductionrelease/1.4.4sabiodsecurityserversocialstoragetelecastertest
Last change on this file since 2f57f0e was 2f57f0e, checked in by yomguy <yomguy@…>, 2 years ago

get pattern search and title search working for corpus and fonds

  • Property mode set to 100644
File size: 18.2 KB
Line 
1# -*- coding: utf-8 -*-
2# Copyright (C) 2007-2010 Samalyse SARL
3# Copyright (C) 2010-2011 Parisson SARL
4#
5# This software is a computer program whose purpose is to backup, analyse,
6# transcode and stream any audio content with its metadata over a web frontend.
7#
8# This software is governed by the CeCILL  license under French law and
9# abiding by the rules of distribution of free software.  You can  use,
10# modify and/ or redistribute the software under the terms of the CeCILL
11# license as circulated by CEA, CNRS and INRIA at the following URL
12# "http://www.cecill.info".
13#
14# As a counterpart to the access to the source code and  rights to copy,
15# modify and redistribute granted by the license, users are provided only
16# with a limited warranty  and the software's author,  the holder of the
17# economic rights,  and the successive licensors  have only  limited
18# liability.
19#
20# In this respect, the user's attention is drawn to the risks associated
21# with loading,  using,  modifying and/or developing or reproducing the
22# software by the user in light of its specific status of free software,
23# that may mean  that it is complicated to manipulate,  and  that  also
24# therefore means  that it is reserved for developers  and  experienced
25# professionals having in-depth computer knowledge. Users are therefore
26# encouraged to load and test the software's suitability as regards their
27# requirements in conditions enabling the security of their systems and/or
28# data to be ensured and,  more generally, to use and operate it in the
29# same conditions as regards security.
30#
31# The fact that you are presently reading this means that you have had
32# knowledge of the CeCILL license and that you accept its terms.
33#
34# Authors: Olivier Guilyardi <olivier@samalyse.com>
35#          David LIPSZYC <davidlipszyc@gmail.com>
36#          Guillaume Pellerin <yomguy@parisson.com>
37
38from django.conf import settings
39from django.db.models import Q, Max, Min
40from telemeta.models.core import *
41from telemeta.util.unaccent import unaccent, unaccent_icmp
42from telemeta.models.enum import EthnicGroup
43import re
44
45engine = settings.DATABASES['default']['ENGINE']
46
47class MediaItemQuerySet(CoreQuerySet):
48    "Base class for all media item query sets"
49
50    def quick_search(self, pattern):
51        "Perform a quick search on code, title and collector name"
52        pattern = pattern.strip()
53
54#        from telemeta.models.media import MediaItem
55#        mod = MediaItem()
56#        fields = mod.to_dict()
57#        keys =  fields.keys()
58#        q = self.by_fuzzy_collector_q(pattern)
59#        for field in keys:
60#            field_str = str(mod._meta.get_field(field))
61#            if 'CharField' in field_str:
62#                q = q | word_search_q(field)
63
64        q = ( Q(code__contains=pattern) |
65            Q(old_code__contains=pattern) |
66            word_search_q('title', pattern) |
67            word_search_q('comment', pattern) |
68            self.by_fuzzy_collector_q(pattern) )
69
70        return self.filter(q)
71
72    def without_collection(self):
73        "Find items which do not belong to any collection"
74        return self.extra(
75            where = ["collection_id NOT IN (SELECT id FROM media_collections)"]);
76
77    def by_public_id(self, public_id):
78        "Find items by public_id"
79        return self.filter(public_id=public_id)
80
81    def by_recording_date(self, from_date, to_date = None):
82        "Find items by recording date"
83        if to_date is None:
84            return (self.filter(recorded_from_date__lte=from_date, recorded_to_date__gte=from_date))
85        else :
86            return (self.filter(Q(recorded_from_date__range=(from_date, to_date))
87                                | Q(recorded_to_date__range=(from_date, to_date))))
88
89    def by_title(self, pattern):
90        "Find items by title"
91        # to (sort of) sync with models.media.MediaItem.get_title()
92        return self.filter(word_search_q("title", pattern) |
93                           (Q(title="") & word_search_q("collection__title", pattern)))
94
95    def by_publish_year(self, from_year, to_year = None):
96        "Find items by publishing year"
97        if to_year is None:
98            to_year = from_year
99        return self.filter(collection__year_published__range=(from_year, to_year))
100
101    def by_change_time(self, from_time = None, until_time = None):
102        "Find items by last change time"
103        return self._by_change_time('item', from_time, until_time)
104
105    def by_location(self, location):
106        "Find items by location"
107        return self.filter(location__in=location.apparented())
108
109    @staticmethod
110    def __name_cmp(obj1, obj2):
111        return unaccent_icmp(obj1.name, obj2.name)
112
113    def locations(self):
114        from telemeta.models import Location, LocationRelation
115        l = self.values('location')
116        c = self.values('location__current_location')
117        r = LocationRelation.objects.filter(location__in=l).values('ancestor_location')
118        return Location.objects.filter(Q(pk__in=l) | Q(pk__in=r) | Q(pk__in=c))
119
120    def countries(self, group_by_continent=False):
121        countries = []
122        from telemeta.models import Location
123        for id in self.filter(location__isnull=False).values_list('location', flat=True).distinct():
124            location = Location.objects.get(pk=id)
125            for l in location.countries():
126                c = l.current_location
127                if not c in countries:
128                    countries.append(c)
129
130        if group_by_continent:
131            grouped = {}
132
133            for country in countries:
134                for continent in country.continents():
135                    if not grouped.has_key(continent):
136                        grouped[continent] = []
137
138                    grouped[continent].append(country)
139
140            keys = grouped.keys()
141            keys.sort(self.__name_cmp)
142            ordered = []
143            for c in keys:
144                grouped[c].sort(self.__name_cmp)
145                ordered.append({'continent': c, 'countries': grouped[c]})
146
147            countries = ordered
148        else:
149            countries.sort(self.__name_cmp)
150
151        return countries
152
153    def virtual(self, *args):
154        qs = self
155        need_collection = False
156        related = []
157        from telemeta.models import Location
158        for f in args:
159            if f == 'apparent_collector':
160                if not 'sqlite3' in engine:
161                    related.append('collection')
162                    qs = qs.extra(select={f:
163                        'IF(collector_from_collection, '
164                            'IF(media_collections.collector_is_creator, '
165                               'media_collections.creator, '
166                               'media_collections.collector),'
167                            'media_items.collector)'})
168            elif f == 'country_or_continent':
169                related.append('location')
170                if not 'sqlite3' in engine:
171                    qs = qs.extra(select={f:
172                        'IF(locations.type = ' + str(Location.COUNTRY) + ' '
173                        'OR locations.type = ' + str(Location.CONTINENT) + ','
174                        'locations.name, '
175                        '(SELECT l2.name FROM location_relations AS r INNER JOIN locations AS l2 '
176                        'ON r.ancestor_location_id = l2.id '
177                        'WHERE r.location_id = media_items.location_id AND l2.type = ' + str(Location.COUNTRY) + ' LIMIT 1))'
178                    })
179            else:
180                raise Exception("Unsupported virtual field: %s" % f)
181
182        if related:
183            qs = qs.select_related(*related)
184
185        return qs
186
187    def ethnic_groups(self):
188        ids = self.filter(ethnic_group__isnull=False).values('ethnic_group');
189        return EthnicGroup.objects.filter(pk__in=ids)
190
191    @staticmethod
192    def by_fuzzy_collector_q(pattern):
193        return (word_search_q('collection__creator', pattern) |
194                word_search_q('collection__collector', pattern) |
195                word_search_q('collector', pattern))
196
197    def by_fuzzy_collector(self, pattern):
198        return self.filter(self.by_fuzzy_collector_q(pattern))
199
200    def sound(self):
201        return self.filter(file__contains='/')
202
203
204class MediaItemManager(CoreManager):
205    "Manage media items queries"
206
207    def get_query_set(self):
208        "Return media query sets"
209        return MediaItemQuerySet(self.model)
210
211    def enriched(self):
212        "Query set with additional virtual fields such as apparent_collector and country_or_continent"
213        return self.get_query_set().virtual('apparent_collector', 'country_or_continent')
214
215    def quick_search(self, *args, **kwargs):
216        return self.get_query_set().quick_search(*args, **kwargs)
217    quick_search.__doc__ = MediaItemQuerySet.quick_search.__doc__
218
219    def without_collection(self, *args, **kwargs):
220        return self.get_query_set().without_collection(*args, **kwargs)
221    without_collection.__doc__ = MediaItemQuerySet.without_collection.__doc__
222
223    def by_recording_date(self, *args, **kwargs):
224        return self.get_query_set().by_recording_date(*args, **kwargs)
225    by_recording_date.__doc__ = MediaItemQuerySet.by_recording_date.__doc__
226
227    def by_title(self, *args, **kwargs):
228        return self.get_query_set().by_title(*args, **kwargs)
229    by_title.__doc__ = MediaItemQuerySet.by_title.__doc__
230
231    def by_publish_year(self, *args, **kwargs):
232        return self.get_query_set().by_publish_year(*args, **kwargs)
233    by_publish_year.__doc__ = MediaItemQuerySet.by_publish_year.__doc__
234
235    def by_change_time(self, *args, **kwargs):
236        return self.get_query_set().by_change_time(*args, **kwargs)
237    by_change_time.__doc__ = MediaItemQuerySet.by_change_time.__doc__
238
239    def by_location(self, *args, **kwargs):
240        return self.get_query_set().by_location(*args, **kwargs)
241    by_location.__doc__ = MediaItemQuerySet.by_location.__doc__
242
243    def sound(self, *args, **kwargs):
244        return self.get_query_set().sound(*args, **kwargs)
245    sound.__doc__ = MediaItemQuerySet.sound.__doc__
246
247
248class MediaCollectionQuerySet(CoreQuerySet):
249
250    def quick_search(self, pattern):
251        "Perform a quick search on code, title and collector name"
252        from telemeta.models.media import MediaCollection
253        pattern = pattern.strip()
254        mod = MediaCollection()
255        fields = mod.to_dict()
256        keys =  fields.keys()
257        q = self.by_fuzzy_collector_q(pattern)
258        for field in keys:
259            field_str = str(mod._meta.get_field(field))
260            if 'CharField' in field_str or 'TextField' in field_str:
261                q = q | word_search_q(field, pattern)
262        return self.filter(q)
263
264    def by_location(self, location):
265        "Find collections by location"
266        return self.filter(items__location__in=location.apparented()).distinct()
267
268    def by_recording_year(self, from_year, to_year=None):
269        "Find collections by recording year"
270        if to_year is None:
271            return (self.filter(recorded_from_year__lte=from_year, recorded_to_year__gte=from_year))
272        else:
273            return (self.filter(Q(recorded_from_year__range=(from_year, to_year)) |
274                    Q(recorded_to_year__range=(from_year, to_year))))
275
276    def by_publish_year(self, from_year, to_year=None):
277        "Find collections by publishing year"
278        if to_year is None:
279            to_year = from_year
280        return self.filter(year_published__range=(from_year, to_year))
281
282    def by_ethnic_group(self, group):
283        "Find collections by ethnic group"
284        return self.filter(items__ethnic_group=group).distinct()
285
286    def by_change_time(self, from_time=None, until_time=None):
287        "Find collections between two dates"
288        return self._by_change_time('collection', from_time, until_time)
289
290    def virtual(self, *args):
291        qs = self
292        for f in args:
293            if f == 'apparent_collector':
294                if not 'sqlite3' in engine:
295                    qs = qs.extra(select={f: 'IF(media_collections.collector_is_creator, '
296                                         'media_collections.creator, media_collections.collector)'})
297            else:
298                raise Exception("Unsupported virtual field: %s" % f)
299
300        return qs
301
302    def recording_year_range(self):
303        from_max = self.aggregate(Max('recorded_from_year'))['recorded_from_year__max']
304        to_max   = self.aggregate(Max('recorded_to_year'))['recorded_to_year__max']
305        year_max = max(from_max, to_max)
306
307        from_min = self.filter(recorded_from_year__gt=0).aggregate(Min('recorded_from_year'))['recorded_from_year__min']
308        to_min   = self.filter(recorded_to_year__gt=0).aggregate(Min('recorded_to_year'))['recorded_to_year__min']
309        year_min = min(from_min, to_min)
310
311        if not year_max:
312            year_max = year_min
313        elif not year_min:
314            year_min = year_max
315
316        return year_min, year_max
317
318    def publishing_year_range(self):
319        year_max = self.aggregate(Max('year_published'))['year_published__max']
320        year_min = self.filter(year_published__gt=0).aggregate(Min('year_published'))['year_published__min']
321
322        return year_min, year_max
323
324    @staticmethod
325    def by_fuzzy_collector_q(pattern):
326        return word_search_q('creator', pattern) | word_search_q('collector', pattern)
327
328    def by_fuzzy_collector(self, pattern):
329        return self.filter(self.by_fuzzy_collector_q(pattern))
330
331    def sound(self):
332        return self.filter(items__file__contains='/').distinct()
333
334
335class MediaCollectionManager(CoreManager):
336    "Manage collection queries"
337
338    def get_query_set(self):
339        "Return the collection query"
340        return MediaCollectionQuerySet(self.model)
341
342    def enriched(self):
343        "Query set with additional virtual fields such as apparent_collector"
344        return self.get_query_set().virtual('apparent_collector')
345
346    def quick_search(self, *args, **kwargs):
347        return self.get_query_set().quick_search(*args, **kwargs)
348    quick_search.__doc__ = MediaCollectionQuerySet.quick_search.__doc__
349
350    def by_location(self, *args, **kwargs):
351        return self.get_query_set().by_location(*args, **kwargs)
352    by_location.__doc__ = MediaCollectionQuerySet.by_location.__doc__
353
354    def by_recording_year(self, *args, **kwargs):
355        return self.get_query_set().by_recording_year(*args, **kwargs)
356    by_recording_year.__doc__ = MediaCollectionQuerySet.by_recording_year.__doc__
357
358    def by_publish_year(self, *args, **kwargs):
359        return self.get_query_set().by_publish_year(*args, **kwargs)
360    by_publish_year.__doc__ = MediaCollectionQuerySet.by_publish_year.__doc__
361
362    def by_ethnic_group(self, *args, **kwargs):
363        return self.get_query_set().by_ethnic_group(*args, **kwargs)
364    by_ethnic_group.__doc__ = MediaCollectionQuerySet.by_ethnic_group.__doc__
365
366    def by_change_time(self, *args, **kwargs):
367        return self.get_query_set().by_change_time(*args, **kwargs)
368    by_change_time.__doc__ = MediaCollectionQuerySet.by_change_time.__doc__
369
370    @staticmethod
371    def __name_cmp(obj1, obj2):
372        return unaccent_icmp(obj1.name, obj2.name)
373
374    def sound(self, *args, **kwargs):
375        return self.get_query_set().sound(*args, **kwargs)
376    sound.__doc__ = MediaCollectionQuerySet.sound.__doc__
377
378
379class LocationQuerySet(CoreQuerySet):
380    __flatname_map = None
381
382    def by_flatname(self, flatname):
383        map = self.flatname_map()
384        return self.filter(pk=map[flatname])
385
386    def flatname_map(self):
387        if self.__class__.__flatname_map:
388            return self.__class__.__flatname_map
389
390        map = {}
391        locations = self.filter(Q(type=self.model.COUNTRY) | Q(type=self.model.CONTINENT))
392        for l in locations:
393            flatname = unaccent(l.name).lower()
394            flatname = re.sub('[^a-z]', '_', flatname)
395            while map.has_key(flatname):
396                flatname = '_' + flatname
397            map[flatname] = l.id
398
399        self.__class__.__flatname_map = map
400        return map
401
402    def current(self):
403        return self.filter(id__in=self.values_list('current_location_id', flat=True)).distinct()
404
405class LocationManager(CoreManager):
406
407    def get_query_set(self):
408        "Return location query set"
409        return LocationQuerySet(self.model)
410
411    def by_flatname(self, *args, **kwargs):
412        return self.get_query_set().by_flatname(*args, **kwargs)
413    by_flatname.__doc__ = LocationQuerySet.by_flatname.__doc__
414
415    def flatname_map(self, *args, **kwargs):
416        return self.get_query_set().flatname_map(*args, **kwargs)
417    flatname_map.__doc__ = LocationQuerySet.flatname_map.__doc__
418
419
420class MediaCorpusQuerySet(CoreQuerySet):
421    "Base class for all media resource query sets"
422
423    def quick_search(self, pattern):
424        "Perform a quick search on text and char fields"
425        from telemeta.models.media import MediaCorpus
426        mod = MediaCorpus()
427        pattern = pattern.strip()
428        q = Q(code__contains=pattern)
429        fields = mod.to_dict()
430        keys =  fields.keys()
431
432        for field in keys:
433            field_str = str(mod._meta.get_field(field))
434            if 'CharField' in field_str or 'TextField' in field_str:
435                q = q | word_search_q(field, pattern)
436
437        return self.filter(q)
438
439
440class MediaCorpusManager(CoreManager):
441    "Manage media resource queries"
442
443    def get_query_set(self):
444        "Return resource query sets"
445        return MediaCorpusQuerySet(self.model)
446
447    def quick_search(self, *args, **kwargs):
448        return self.get_query_set().quick_search(*args, **kwargs)
449    quick_search.__doc__ = MediaCorpusQuerySet.quick_search.__doc__
450
451
452class MediaFondsQuerySet(CoreQuerySet):
453    "Base class for all media resource query sets"
454
455    def quick_search(self, pattern):
456        "Perform a quick search on text and char fields"
457        from telemeta.models.media import MediaFonds
458        mod = MediaFonds()
459        pattern = pattern.strip()
460        q = Q(code__contains=pattern)
461        fields = mod.to_dict()
462        keys =  fields.keys()
463        for field in keys:
464            field_str = str(mod._meta.get_field(field))
465            if 'CharField' in field_str or 'TextField' in field_str:
466                q = q | word_search_q(field, pattern)
467        return self.filter(q)
468
469
470class MediaFondsManager(CoreManager):
471    "Manage media resource queries"
472
473    def get_query_set(self):
474        "Return resource query sets"
475        return MediaFondsQuerySet(self.model)
476
477    def quick_search(self, *args, **kwargs):
478        return self.get_query_set().quick_search(*args, **kwargs)
479    quick_search.__doc__ = MediaFondsQuerySet.quick_search.__doc__
Note: See TracBrowser for help on using the repository browser.